Initial commit
This commit is contained in:
449
skills/kalshi-markets/scripts/search.py
Normal file
449
skills/kalshi-markets/scripts/search.py
Normal file
@@ -0,0 +1,449 @@
|
||||
#!/usr/bin/env python3
|
||||
# /// script
|
||||
# dependencies = [
|
||||
# "httpx",
|
||||
# "click",
|
||||
# "pandas",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
"""
|
||||
Kalshi Market Search Script
|
||||
|
||||
Search for markets by keyword across titles and descriptions.
|
||||
Uses a local cache for fast, comprehensive searches across all markets.
|
||||
|
||||
The cache is built on first run (takes 2-5 minutes) and refreshed every 6 hours.
|
||||
Subsequent searches are instant.
|
||||
|
||||
Usage:
|
||||
uv run search.py bitcoin
|
||||
uv run search.py "election"
|
||||
uv run search.py keyword --limit 5
|
||||
uv run search.py keyword --json
|
||||
uv run search.py --rebuild-cache # Force cache rebuild
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import click
|
||||
import httpx
|
||||
import pandas as pd
|
||||
|
||||
# Configuration
|
||||
API_BASE_URL = "https://api.elections.kalshi.com/trade-api/v2"
|
||||
API_TIMEOUT = 30.0 # seconds
|
||||
USER_AGENT = "Kalshi-CLI/1.0"
|
||||
# Use user cache directory for portable installation
|
||||
# This works regardless of where the script is installed (marketplace, manual, development)
|
||||
CACHE_DIR = Path.home() / ".cache" / "kalshi-markets"
|
||||
CACHE_TTL_HOURS = 6
|
||||
|
||||
|
||||
class KalshiSearchCache:
|
||||
"""Embedded search cache functionality for fast market searches"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize cache manager"""
|
||||
self.cache_dir = CACHE_DIR
|
||||
self.cache_ttl = timedelta(hours=CACHE_TTL_HOURS)
|
||||
self.df_cache = None
|
||||
|
||||
def get_cache_file(self) -> Path | None:
|
||||
"""Get the most recent cache file"""
|
||||
if not self.cache_dir.exists():
|
||||
return None
|
||||
|
||||
# Look for CSV cache files with pattern kalshi_markets_*.csv
|
||||
cache_files = list(self.cache_dir.glob("kalshi_markets_*.csv"))
|
||||
if not cache_files:
|
||||
return None
|
||||
|
||||
# Return the most recent file
|
||||
return max(cache_files, key=lambda f: f.stat().st_mtime)
|
||||
|
||||
def is_cache_valid(self) -> bool:
|
||||
"""Check if cache exists and is still valid"""
|
||||
cache_file = self.get_cache_file()
|
||||
if not cache_file:
|
||||
return False
|
||||
|
||||
# Check cache age
|
||||
cache_age = datetime.now() - datetime.fromtimestamp(cache_file.stat().st_mtime)
|
||||
return cache_age < self.cache_ttl
|
||||
|
||||
def load_cache(self, quiet: bool = False) -> pd.DataFrame | None:
|
||||
"""Load cache from disk if valid"""
|
||||
if not self.is_cache_valid():
|
||||
return None
|
||||
|
||||
cache_file = self.get_cache_file()
|
||||
if not cache_file:
|
||||
return None
|
||||
|
||||
try:
|
||||
df = pd.read_csv(cache_file)
|
||||
if not quiet:
|
||||
print(f"[CACHE] Loaded {len(df)} markets from cache")
|
||||
return df
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
print(f"[CACHE] Failed to load cache: {e}")
|
||||
return None
|
||||
|
||||
def save_cache(self, df: pd.DataFrame, quiet: bool = False):
|
||||
"""Save DataFrame to cache"""
|
||||
try:
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Use timestamp in filename like the CLI does
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M")
|
||||
cache_file = self.cache_dir / f"kalshi_markets_{timestamp}.csv"
|
||||
df.to_csv(cache_file, index=False)
|
||||
if not quiet:
|
||||
print(f"[CACHE] Saved {len(df)} markets to cache")
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
print(f"[CACHE] Failed to save cache: {e}")
|
||||
|
||||
def build_cache(self, client: "KalshiClient", quiet: bool = False) -> pd.DataFrame:
|
||||
"""Build complete market cache by fetching all series and their markets"""
|
||||
if not quiet:
|
||||
print("[CACHE BUILD] Starting market data collection...")
|
||||
print("[CACHE BUILD] This may take 2-5 minutes on first run...")
|
||||
print("[CACHE BUILD] Fetching all series and their markets...")
|
||||
|
||||
# Step 1: Fetch all series
|
||||
if not quiet:
|
||||
print("[CACHE BUILD] Step 1/2: Fetching series list...")
|
||||
series_data = client.get_series_list()
|
||||
all_series = series_data.get("series", [])
|
||||
if not quiet:
|
||||
print(f"[CACHE BUILD] Found {len(all_series)} series to process")
|
||||
|
||||
# Step 2: Fetch markets for each series
|
||||
if not quiet:
|
||||
print("[CACHE BUILD] Step 2/2: Fetching markets from each series...")
|
||||
print("[CACHE BUILD] Filter: status='open' (active tradeable markets only)")
|
||||
|
||||
all_markets = []
|
||||
series_with_markets = 0
|
||||
errors = 0
|
||||
|
||||
for i, series in enumerate(all_series):
|
||||
series_ticker = series.get("ticker")
|
||||
series_title = series.get("title", "")
|
||||
series_category = series.get("category", "")
|
||||
|
||||
if not quiet and (i + 1) % 100 == 0:
|
||||
print(
|
||||
f"[CACHE BUILD] Progress: {i + 1}/{len(all_series)} series ({100*(i+1)/len(all_series):.1f}%)"
|
||||
)
|
||||
print(f"[CACHE BUILD] Markets collected: {len(all_markets)}")
|
||||
|
||||
try:
|
||||
# Fetch markets for this series (open markets only)
|
||||
markets_data = client.get_markets(
|
||||
limit=100, status="open", series_ticker=series_ticker
|
||||
)
|
||||
series_markets = markets_data.get("markets", [])
|
||||
|
||||
if series_markets:
|
||||
series_with_markets += 1
|
||||
|
||||
# Add series info to each market
|
||||
for market in series_markets:
|
||||
market["series_ticker"] = series_ticker
|
||||
market["series_title"] = series_title
|
||||
market["series_category"] = series_category
|
||||
all_markets.append(market)
|
||||
|
||||
except Exception:
|
||||
errors += 1
|
||||
if errors > 50:
|
||||
if not quiet:
|
||||
print(f"[CACHE BUILD] Too many errors ({errors}), stopping")
|
||||
break
|
||||
continue
|
||||
|
||||
if not quiet:
|
||||
print("[CACHE BUILD] Collection complete!")
|
||||
print(f"[CACHE BUILD] Total markets: {len(all_markets)}")
|
||||
print(f"[CACHE BUILD] Series with markets: {series_with_markets}")
|
||||
|
||||
# Convert to DataFrame
|
||||
df = pd.DataFrame(all_markets) if all_markets else pd.DataFrame()
|
||||
|
||||
# Save to cache
|
||||
self.save_cache(df, quiet=quiet)
|
||||
|
||||
return df
|
||||
|
||||
def search(self, keyword: str, limit: int = 10, quiet: bool = False) -> list[dict[str, Any]]:
|
||||
"""Search markets using cache"""
|
||||
# Load or build cache
|
||||
if self.df_cache is None:
|
||||
self.df_cache = self.load_cache(quiet=quiet)
|
||||
|
||||
if self.df_cache is None:
|
||||
# Need to build cache
|
||||
if not quiet:
|
||||
print("[CACHE] No valid cache found, building...")
|
||||
with KalshiClient() as client:
|
||||
self.df_cache = self.build_cache(client, quiet=quiet)
|
||||
|
||||
# Perform search
|
||||
keyword_lower = keyword.lower()
|
||||
|
||||
# Create mask for matching rows (include series fields for better search)
|
||||
mask = (
|
||||
self.df_cache["title"].str.lower().str.contains(keyword_lower, na=False)
|
||||
| self.df_cache["subtitle"].str.lower().str.contains(keyword_lower, na=False)
|
||||
| self.df_cache["ticker"].str.lower().str.contains(keyword_lower, na=False)
|
||||
)
|
||||
|
||||
# Add series fields if they exist in the DataFrame
|
||||
if "series_title" in self.df_cache.columns:
|
||||
mask = mask | self.df_cache["series_title"].str.lower().str.contains(
|
||||
keyword_lower, na=False
|
||||
)
|
||||
if "series_ticker" in self.df_cache.columns:
|
||||
mask = mask | self.df_cache["series_ticker"].str.lower().str.contains(
|
||||
keyword_lower, na=False
|
||||
)
|
||||
|
||||
# Get matching markets
|
||||
matches = self.df_cache[mask]
|
||||
|
||||
# Sort by volume and limit results
|
||||
if "volume_24h" in matches.columns:
|
||||
matches = matches.sort_values("volume_24h", ascending=False)
|
||||
|
||||
matches = matches.head(limit)
|
||||
|
||||
# Convert back to list of dicts
|
||||
return matches.to_dict("records")
|
||||
|
||||
|
||||
class KalshiClient:
|
||||
"""Minimal HTTP client for Kalshi API - search functionality"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize HTTP client"""
|
||||
self.client = httpx.Client(
|
||||
base_url=API_BASE_URL,
|
||||
timeout=API_TIMEOUT,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
|
||||
def __enter__(self):
|
||||
"""Context manager entry"""
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Context manager exit - cleanup"""
|
||||
self.client.close()
|
||||
|
||||
def get_series_list(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get list of all series.
|
||||
|
||||
Returns:
|
||||
Dict with 'series' array
|
||||
|
||||
Raises:
|
||||
Exception if API call fails
|
||||
"""
|
||||
try:
|
||||
response = self.client.get("/series")
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"Network error: {str(e)}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Unexpected error: {str(e)}")
|
||||
|
||||
def get_markets(
|
||||
self,
|
||||
limit: int = 100,
|
||||
status: str | None = "open",
|
||||
cursor: str = None,
|
||||
series_ticker: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
Get markets for searching.
|
||||
|
||||
Args:
|
||||
limit: Number of markets to fetch
|
||||
status: Market status filter (None for all statuses)
|
||||
cursor: Pagination cursor
|
||||
series_ticker: Filter by series ticker
|
||||
|
||||
Returns:
|
||||
Dict with markets array and cursor
|
||||
|
||||
Raises:
|
||||
Exception if API call fails
|
||||
"""
|
||||
params = {"limit": str(min(limit, 1000))}
|
||||
if status is not None:
|
||||
params["status"] = status
|
||||
if cursor:
|
||||
params["cursor"] = cursor
|
||||
if series_ticker:
|
||||
params["series_ticker"] = series_ticker
|
||||
|
||||
try:
|
||||
response = self.client.get("/markets", params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"API error: {e.response.status_code} - {e.response.text}")
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"Network error: {str(e)}")
|
||||
except Exception as e:
|
||||
raise Exception(f"Unexpected error: {str(e)}")
|
||||
|
||||
|
||||
def format_search_result(market: dict[str, Any], index: int) -> str:
|
||||
"""Format a single search result"""
|
||||
ticker = market.get("ticker", "N/A")
|
||||
title = market.get("title", "N/A")
|
||||
yes_bid = market.get("yes_bid", 0)
|
||||
yes_ask = market.get("yes_ask", 0)
|
||||
last_price = market.get("last_price", 0)
|
||||
volume_24h = market.get("volume_24h", 0)
|
||||
status = market.get("status", "unknown")
|
||||
|
||||
status_icon = "🟢" if status == "active" else "🔴"
|
||||
|
||||
lines = []
|
||||
lines.append(f"{index}. {status_icon} {ticker}")
|
||||
lines.append(f" {title[:70]}{'...' if len(title) > 70 else ''}")
|
||||
|
||||
if yes_bid or yes_ask:
|
||||
lines.append(f" Price: Bid {yes_bid}¢ | Ask {yes_ask}¢ | Last {last_price}¢")
|
||||
|
||||
if volume_24h > 0:
|
||||
lines.append(f" 24h Volume: ${volume_24h/100:,.2f}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def format_search_results(keyword: str, results: list[dict[str, Any]]) -> str:
|
||||
"""
|
||||
Format search results for human-readable output.
|
||||
|
||||
Args:
|
||||
keyword: Search keyword
|
||||
results: List of matching markets
|
||||
|
||||
Returns:
|
||||
Formatted string for display
|
||||
"""
|
||||
lines = []
|
||||
lines.append("\n" + "=" * 60)
|
||||
lines.append(f"🔍 Search Results for '{keyword}'")
|
||||
lines.append("=" * 60)
|
||||
|
||||
if not results:
|
||||
lines.append("\nNo markets found matching your search.")
|
||||
lines.append("\nTip: Try broader keywords.")
|
||||
else:
|
||||
lines.append(f"Found {len(results)} matching markets:\n")
|
||||
|
||||
for i, market in enumerate(results, 1):
|
||||
lines.append(format_search_result(market, i))
|
||||
lines.append("")
|
||||
|
||||
lines.append("─" * 60)
|
||||
lines.append("Note: Searches across all ~6900 markets using local cache.")
|
||||
lines.append("Cache refreshes automatically every 6 hours.")
|
||||
|
||||
lines.append("=" * 60)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument("keyword", required=False)
|
||||
@click.option("--limit", default=10, type=int, help="Maximum number of results to return")
|
||||
@click.option(
|
||||
"--json",
|
||||
"output_json",
|
||||
is_flag=True,
|
||||
help="Output as JSON instead of human-readable format",
|
||||
)
|
||||
@click.option("--rebuild-cache", is_flag=True, help="Force rebuild of the market cache")
|
||||
def main(keyword: str, limit: int, output_json: bool, rebuild_cache: bool):
|
||||
"""
|
||||
Search for markets by keyword using cached data.
|
||||
|
||||
KEYWORD is the search term to look for in market titles and descriptions.
|
||||
|
||||
Uses a local cache for fast searches across all ~6900 markets.
|
||||
Cache is built on first run (2-5 minutes) and refreshed every 6 hours.
|
||||
No authentication required.
|
||||
"""
|
||||
try:
|
||||
# Initialize cache
|
||||
cache = KalshiSearchCache()
|
||||
|
||||
# Handle cache rebuild
|
||||
if rebuild_cache:
|
||||
if not output_json:
|
||||
click.echo("Rebuilding market cache...")
|
||||
|
||||
# Delete existing cache files
|
||||
if cache.cache_dir.exists():
|
||||
for old_cache in cache.cache_dir.glob("kalshi_markets_*.csv"):
|
||||
old_cache.unlink()
|
||||
|
||||
# Rebuild
|
||||
with KalshiClient() as client:
|
||||
cache.build_cache(client, quiet=output_json)
|
||||
|
||||
if not output_json:
|
||||
click.echo("✅ Cache rebuilt successfully!")
|
||||
|
||||
if not keyword:
|
||||
sys.exit(0)
|
||||
|
||||
# Require keyword for search
|
||||
if not keyword:
|
||||
raise ValueError("Keyword is required for search (or use --rebuild-cache)")
|
||||
|
||||
if not keyword.strip():
|
||||
raise ValueError("Keyword cannot be empty")
|
||||
|
||||
# Search using cache
|
||||
results = cache.search(keyword, limit=limit, quiet=output_json)
|
||||
|
||||
# Output results
|
||||
if output_json:
|
||||
# JSON output for automation/MCP
|
||||
click.echo(json.dumps(results, indent=2))
|
||||
else:
|
||||
# Human-readable output
|
||||
formatted = format_search_results(keyword, results)
|
||||
click.echo(formatted)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
except Exception as e:
|
||||
if output_json:
|
||||
# JSON error format
|
||||
error_data = {"error": str(e)}
|
||||
click.echo(json.dumps(error_data, indent=2))
|
||||
else:
|
||||
# Human-readable error
|
||||
click.echo(f"❌ Error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user