gh-danielscholl-agent-skill…/skills/kalshi-markets/scripts/search.py

#!/usr/bin/env python3
# /// script
# dependencies = [
#     "httpx",
#     "click",
#     "pandas",
# ]
# ///

"""
Kalshi Market Search Script

Search for markets by keyword across titles and descriptions.
Uses a local cache for fast, comprehensive searches across all markets.

The cache is built on first run (takes 2-5 minutes) and refreshed every 6 hours.
Subsequent searches are instant.

Usage:
    uv run search.py bitcoin
    uv run search.py "election"
    uv run search.py keyword --limit 5
    uv run search.py keyword --json
    uv run search.py --rebuild-cache  # Force cache rebuild
"""

import json
import sys
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any

import click
import httpx
import pandas as pd

# Configuration
API_BASE_URL = "https://api.elections.kalshi.com/trade-api/v2"
API_TIMEOUT = 30.0  # seconds
USER_AGENT = "Kalshi-CLI/1.0"
# Use user cache directory for portable installation
# This works regardless of where the script is installed (marketplace, manual, development)
CACHE_DIR = Path.home() / ".cache" / "kalshi-markets"
CACHE_TTL_HOURS = 6


class KalshiSearchCache:
    """Embedded search cache functionality for fast market searches"""

    def __init__(self):
        """Initialize cache manager"""
        self.cache_dir = CACHE_DIR
        self.cache_ttl = timedelta(hours=CACHE_TTL_HOURS)
        self.df_cache = None

    def get_cache_file(self) -> Path | None:
        """Get the most recent cache file"""
        if not self.cache_dir.exists():
            return None

        # Look for CSV cache files with pattern kalshi_markets_*.csv
        cache_files = list(self.cache_dir.glob("kalshi_markets_*.csv"))
        if not cache_files:
            return None

        # Return the most recent file
        return max(cache_files, key=lambda f: f.stat().st_mtime)

    def is_cache_valid(self) -> bool:
        """Check if cache exists and is still valid"""
        cache_file = self.get_cache_file()
        if not cache_file:
            return False

        # Check cache age
        cache_age = datetime.now() - datetime.fromtimestamp(cache_file.stat().st_mtime)
        return cache_age < self.cache_ttl

    def load_cache(self, quiet: bool = False) -> pd.DataFrame | None:
        """Load cache from disk if valid"""
        if not self.is_cache_valid():
            return None

        cache_file = self.get_cache_file()
        if not cache_file:
            return None

        try:
            df = pd.read_csv(cache_file)
            if not quiet:
                print(f"[CACHE] Loaded {len(df)} markets from cache")
            return df
        except Exception as e:
            if not quiet:
                print(f"[CACHE] Failed to load cache: {e}")
            return None

    def save_cache(self, df: pd.DataFrame, quiet: bool = False):
        """Save DataFrame to cache"""
        try:
            self.cache_dir.mkdir(parents=True, exist_ok=True)
            # Use timestamp in filename like the CLI does
            timestamp = datetime.now().strftime("%Y%m%d_%H%M")
            cache_file = self.cache_dir / f"kalshi_markets_{timestamp}.csv"
            df.to_csv(cache_file, index=False)
            if not quiet:
                print(f"[CACHE] Saved {len(df)} markets to cache")
        except Exception as e:
            if not quiet:
                print(f"[CACHE] Failed to save cache: {e}")

    def build_cache(self, client: "KalshiClient", quiet: bool = False) -> pd.DataFrame:
        """Build complete market cache by fetching all series and their markets"""
        if not quiet:
            print("[CACHE BUILD] Starting market data collection...")
            print("[CACHE BUILD] This may take 2-5 minutes on first run...")
            print("[CACHE BUILD] Fetching all series and their markets...")

        # Step 1: Fetch all series
        if not quiet:
            print("[CACHE BUILD] Step 1/2: Fetching series list...")
        series_data = client.get_series_list()
        all_series = series_data.get("series", [])
        if not quiet:
            print(f"[CACHE BUILD] Found {len(all_series)} series to process")

        # Step 2: Fetch markets for each series
        if not quiet:
            print("[CACHE BUILD] Step 2/2: Fetching markets from each series...")
            print("[CACHE BUILD] Filter: status='open' (active tradeable markets only)")

        all_markets = []
        series_with_markets = 0
        errors = 0

        for i, series in enumerate(all_series):
            series_ticker = series.get("ticker")
            series_title = series.get("title", "")
            series_category = series.get("category", "")

            if not quiet and (i + 1) % 100 == 0:
                print(
                    f"[CACHE BUILD] Progress: {i + 1}/{len(all_series)} series ({100*(i+1)/len(all_series):.1f}%)"
                )
                print(f"[CACHE BUILD]   Markets collected: {len(all_markets)}")

            try:
                # Fetch markets for this series (open markets only)
                markets_data = client.get_markets(
                    limit=100, status="open", series_ticker=series_ticker
                )
                series_markets = markets_data.get("markets", [])

                if series_markets:
                    series_with_markets += 1

                # Add series info to each market
                for market in series_markets:
                    market["series_ticker"] = series_ticker
                    market["series_title"] = series_title
                    market["series_category"] = series_category
                    all_markets.append(market)

            except Exception:
                errors += 1
                if errors > 50:
                    if not quiet:
                        print(f"[CACHE BUILD] Too many errors ({errors}), stopping")
                    break
                continue

        if not quiet:
            print("[CACHE BUILD] Collection complete!")
            print(f"[CACHE BUILD]   Total markets: {len(all_markets)}")
            print(f"[CACHE BUILD]   Series with markets: {series_with_markets}")

        # Convert to DataFrame
        df = pd.DataFrame(all_markets) if all_markets else pd.DataFrame()

        # Save to cache
        self.save_cache(df, quiet=quiet)

        return df

    def search(self, keyword: str, limit: int = 10, quiet: bool = False) -> list[dict[str, Any]]:
        """Search markets using cache"""
        # Load or build cache
        if self.df_cache is None:
            self.df_cache = self.load_cache(quiet=quiet)

        if self.df_cache is None:
            # Need to build cache
            if not quiet:
                print("[CACHE] No valid cache found, building...")
            with KalshiClient() as client:
                self.df_cache = self.build_cache(client, quiet=quiet)

        # Perform search
        keyword_lower = keyword.lower()

        # Create mask for matching rows (include series fields for better search)
        mask = (
            self.df_cache["title"].str.lower().str.contains(keyword_lower, na=False)
            | self.df_cache["subtitle"].str.lower().str.contains(keyword_lower, na=False)
            | self.df_cache["ticker"].str.lower().str.contains(keyword_lower, na=False)
        )

        # Add series fields if they exist in the DataFrame
        if "series_title" in self.df_cache.columns:
            mask = mask | self.df_cache["series_title"].str.lower().str.contains(
                keyword_lower, na=False
            )
        if "series_ticker" in self.df_cache.columns:
            mask = mask | self.df_cache["series_ticker"].str.lower().str.contains(
                keyword_lower, na=False
            )

        # Get matching markets
        matches = self.df_cache[mask]

        # Sort by volume and limit results
        if "volume_24h" in matches.columns:
            matches = matches.sort_values("volume_24h", ascending=False)

        matches = matches.head(limit)

        # Convert back to list of dicts
        return matches.to_dict("records")


class KalshiClient:
    """Minimal HTTP client for Kalshi API - search functionality"""

    def __init__(self):
        """Initialize HTTP client"""
        self.client = httpx.Client(
            base_url=API_BASE_URL,
            timeout=API_TIMEOUT,
            headers={"User-Agent": USER_AGENT},
        )

    def __enter__(self):
        """Context manager entry"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit - cleanup"""
        self.client.close()

    def get_series_list(self) -> dict[str, Any]:
        """
        Get list of all series.

        Returns:
            Dict with 'series' array

        Raises:
            Exception if API call fails
        """
        try:
            response = self.client.get("/series")
            response.raise_for_status()
            return response.json()
        except httpx.HTTPStatusError as e:
            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
        except httpx.RequestError as e:
            raise Exception(f"Network error: {str(e)}")
        except Exception as e:
            raise Exception(f"Unexpected error: {str(e)}")

    def get_markets(
        self,
        limit: int = 100,
        status: str | None = "open",
        cursor: str = None,
        series_ticker: str | None = None,
    ) -> dict[str, Any]:
        """
        Get markets for searching.

        Args:
            limit: Number of markets to fetch
            status: Market status filter (None for all statuses)
            cursor: Pagination cursor
            series_ticker: Filter by series ticker

        Returns:
            Dict with markets array and cursor

        Raises:
            Exception if API call fails
        """
        params = {"limit": str(min(limit, 1000))}
        if status is not None:
            params["status"] = status
        if cursor:
            params["cursor"] = cursor
        if series_ticker:
            params["series_ticker"] = series_ticker

        try:
            response = self.client.get("/markets", params=params)
            response.raise_for_status()
            return response.json()
        except httpx.HTTPStatusError as e:
            raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
        except httpx.RequestError as e:
            raise Exception(f"Network error: {str(e)}")
        except Exception as e:
            raise Exception(f"Unexpected error: {str(e)}")


def format_search_result(market: dict[str, Any], index: int) -> str:
    """Format a single search result"""
    ticker = market.get("ticker", "N/A")
    title = market.get("title", "N/A")
    yes_bid = market.get("yes_bid", 0)
    yes_ask = market.get("yes_ask", 0)
    last_price = market.get("last_price", 0)
    volume_24h = market.get("volume_24h", 0)
    status = market.get("status", "unknown")

    status_icon = "🟢" if status == "active" else "🔴"

    lines = []
    lines.append(f"{index}. {status_icon} {ticker}")
    lines.append(f"   {title[:70]}{'...' if len(title) > 70 else ''}")

    if yes_bid or yes_ask:
        lines.append(f"   Price: Bid {yes_bid}¢ | Ask {yes_ask}¢ | Last {last_price}¢")

    if volume_24h > 0:
        lines.append(f"   24h Volume: ${volume_24h/100:,.2f}")

    return "\n".join(lines)


def format_search_results(keyword: str, results: list[dict[str, Any]]) -> str:
    """
    Format search results for human-readable output.

    Args:
        keyword: Search keyword
        results: List of matching markets

    Returns:
        Formatted string for display
    """
    lines = []
    lines.append("\n" + "=" * 60)
    lines.append(f"🔍 Search Results for '{keyword}'")
    lines.append("=" * 60)

    if not results:
        lines.append("\nNo markets found matching your search.")
        lines.append("\nTip: Try broader keywords.")
    else:
        lines.append(f"Found {len(results)} matching markets:\n")

        for i, market in enumerate(results, 1):
            lines.append(format_search_result(market, i))
            lines.append("")

        lines.append("─" * 60)
        lines.append("Note: Searches across all ~6900 markets using local cache.")
        lines.append("Cache refreshes automatically every 6 hours.")

    lines.append("=" * 60)
    return "\n".join(lines)


@click.command()
@click.argument("keyword", required=False)
@click.option("--limit", default=10, type=int, help="Maximum number of results to return")
@click.option(
    "--json",
    "output_json",
    is_flag=True,
    help="Output as JSON instead of human-readable format",
)
@click.option("--rebuild-cache", is_flag=True, help="Force rebuild of the market cache")
def main(keyword: str, limit: int, output_json: bool, rebuild_cache: bool):
    """
    Search for markets by keyword using cached data.

    KEYWORD is the search term to look for in market titles and descriptions.

    Uses a local cache for fast searches across all ~6900 markets.
    Cache is built on first run (2-5 minutes) and refreshed every 6 hours.
    No authentication required.
    """
    try:
        # Initialize cache
        cache = KalshiSearchCache()

        # Handle cache rebuild
        if rebuild_cache:
            if not output_json:
                click.echo("Rebuilding market cache...")

            # Delete existing cache files
            if cache.cache_dir.exists():
                for old_cache in cache.cache_dir.glob("kalshi_markets_*.csv"):
                    old_cache.unlink()

            # Rebuild
            with KalshiClient() as client:
                cache.build_cache(client, quiet=output_json)

            if not output_json:
                click.echo("✅ Cache rebuilt successfully!")

            if not keyword:
                sys.exit(0)

        # Require keyword for search
        if not keyword:
            raise ValueError("Keyword is required for search (or use --rebuild-cache)")

        if not keyword.strip():
            raise ValueError("Keyword cannot be empty")

        # Search using cache
        results = cache.search(keyword, limit=limit, quiet=output_json)

        # Output results
        if output_json:
            # JSON output for automation/MCP
            click.echo(json.dumps(results, indent=2))
        else:
            # Human-readable output
            formatted = format_search_results(keyword, results)
            click.echo(formatted)

        sys.exit(0)

    except Exception as e:
        if output_json:
            # JSON error format
            error_data = {"error": str(e)}
            click.echo(json.dumps(error_data, indent=2))
        else:
            # Human-readable error
            click.echo(f"❌ Error: {e}", err=True)
        sys.exit(1)


if __name__ == "__main__":
    main()