502 lines
17 KiB
Python
502 lines
17 KiB
Python
#!/usr/bin/env python3
|
|
# /// script
|
|
# requires-python = ">=3.10"
|
|
# dependencies = [
|
|
# "litellm",
|
|
# "requests>=2.31.0",
|
|
# "tenacity",
|
|
# "markitdown>=0.1.0",
|
|
# ]
|
|
# ///
|
|
"""
|
|
Consultant CLI - LiteLLM-powered LLM consultation tool
|
|
Supports async invocation, custom base URLs, and flexible model selection
|
|
|
|
Run with: uv run consultant_cli.py [args]
|
|
This automatically installs/updates dependencies (litellm, requests) on first run.
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add scripts directory to path
|
|
SCRIPTS_DIR = Path(__file__).parent
|
|
sys.path.insert(0, str(SCRIPTS_DIR))
|
|
|
|
import config
|
|
from file_handler import (
|
|
FileHandler,
|
|
build_multimodal_content,
|
|
build_prompt_with_references,
|
|
has_images,
|
|
validate_vision_support,
|
|
)
|
|
from litellm_client import LiteLLMClient
|
|
from model_selector import ModelSelector
|
|
from session_manager import SessionManager
|
|
|
|
|
|
def validate_context_size(
|
|
full_prompt: str, model: str, client: LiteLLMClient, num_files: int
|
|
) -> bool:
|
|
"""
|
|
Validate that full prompt fits in model context.
|
|
Returns True if OK, raises ValueError if exceeds.
|
|
"""
|
|
|
|
# Count tokens for the complete prompt
|
|
total_tokens = client.count_tokens(full_prompt, model)
|
|
|
|
# Get limit
|
|
max_tokens = client.get_max_tokens(model)
|
|
|
|
# Reserve for response
|
|
available_tokens = int(max_tokens * (1 - config.CONTEXT_RESERVE_RATIO))
|
|
|
|
# Print summary
|
|
print("\n📊 Token Usage:")
|
|
print(f"- Input: {total_tokens:,} tokens ({num_files} files)")
|
|
print(f"- Limit: {max_tokens:,} tokens")
|
|
print(
|
|
f"- Available: {available_tokens:,} tokens ({int((available_tokens/max_tokens)*100)}%)\n"
|
|
)
|
|
|
|
if total_tokens > max_tokens:
|
|
raise ValueError(
|
|
f"Input exceeds context limit!\n"
|
|
f" Input: {total_tokens:,} tokens\n"
|
|
f" Limit: {max_tokens:,} tokens\n"
|
|
f" Overage: {total_tokens - max_tokens:,} tokens\n\n"
|
|
f"Suggestions:\n"
|
|
f"1. Reduce number of files (currently {num_files})\n"
|
|
f"2. Use a model with larger context\n"
|
|
f"3. Shorten the prompt"
|
|
)
|
|
|
|
if total_tokens > available_tokens:
|
|
print(f"⚠️ WARNING: Using {int((total_tokens/max_tokens)*100)}% of context")
|
|
print(" Consider reducing input size for better response quality\n")
|
|
|
|
return True
|
|
|
|
|
|
def handle_invocation(args: argparse.Namespace) -> int:
|
|
"""Handle main invocation command"""
|
|
|
|
# Determine base URL: --base-url flag > OPENAI_BASE_URL env var > None
|
|
base_url = args.base_url
|
|
if not base_url:
|
|
base_url = config.get_base_url()
|
|
if base_url:
|
|
print(f"Using base URL from OPENAI_BASE_URL: {base_url}")
|
|
|
|
# Initialize components
|
|
session_mgr = SessionManager()
|
|
client = LiteLLMClient(base_url=base_url, api_key=args.api_key)
|
|
|
|
# Process files using FileHandler
|
|
file_handler = FileHandler()
|
|
processed_files = []
|
|
multimodal_content = None
|
|
|
|
if args.files:
|
|
processed_files, file_errors = file_handler.process_files(args.files)
|
|
|
|
# If any files failed, report errors and exit
|
|
if file_errors:
|
|
print("\nERROR: Some files could not be processed:", file=sys.stderr)
|
|
for err in file_errors:
|
|
print(f" - {err.path}: {err.reason}", file=sys.stderr)
|
|
print(
|
|
"\nPlease fix or remove the problematic files and try again.",
|
|
file=sys.stderr,
|
|
)
|
|
return 1
|
|
|
|
# Validate vision support if images present
|
|
if has_images(processed_files):
|
|
validate_vision_support(args.model, has_images=True)
|
|
|
|
# Print file processing summary
|
|
text_count = sum(1 for f in processed_files if f.category.value == "text")
|
|
office_count = sum(1 for f in processed_files if f.category.value == "office")
|
|
image_count = sum(1 for f in processed_files if f.category.value == "image")
|
|
|
|
print("\nFile Processing Summary:")
|
|
print(f" - Text files: {text_count}")
|
|
print(f" - Office documents (converted): {office_count}")
|
|
print(f" - Images: {image_count}")
|
|
|
|
# Log model being used
|
|
print(f"Using model: {args.model}")
|
|
|
|
# Validate environment variables (only if no custom base URL)
|
|
if not base_url:
|
|
env_status = client.validate_environment(args.model)
|
|
if not env_status.get("keys_in_environment", False):
|
|
missing = env_status.get("missing_keys", [])
|
|
error = env_status.get("error", "")
|
|
|
|
print(
|
|
f"\n❌ ERROR: Missing required environment variables for model '{args.model}'",
|
|
file=sys.stderr,
|
|
)
|
|
print(f"\nMissing keys: {', '.join(missing)}", file=sys.stderr)
|
|
|
|
if error:
|
|
print(f"\nDetails: {error}", file=sys.stderr)
|
|
|
|
print("\n💡 To fix this:", file=sys.stderr)
|
|
print(" 1. Set the required environment variable(s):", file=sys.stderr)
|
|
for key in missing:
|
|
print(f" export {key}=your-api-key", file=sys.stderr)
|
|
print(
|
|
" 2. Or use --base-url to specify a custom LiteLLM endpoint",
|
|
file=sys.stderr,
|
|
)
|
|
print(
|
|
" 3. Or use --model to specify a different model\n", file=sys.stderr
|
|
)
|
|
|
|
return 1
|
|
|
|
# Build full prompt with reference files section
|
|
full_prompt = build_prompt_with_references(args.prompt, processed_files)
|
|
|
|
# Build multimodal content if we have images
|
|
if has_images(processed_files):
|
|
multimodal_content = build_multimodal_content(full_prompt, processed_files)
|
|
|
|
# Check context limits on the full prompt
|
|
try:
|
|
validate_context_size(full_prompt, args.model, client, len(processed_files))
|
|
except ValueError as e:
|
|
print(f"ERROR: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
# Create and start session
|
|
session_id = session_mgr.create_session(
|
|
slug=args.slug,
|
|
prompt=full_prompt,
|
|
model=args.model,
|
|
base_url=base_url,
|
|
api_key=args.api_key,
|
|
reasoning_effort=args.reasoning_effort,
|
|
multimodal_content=multimodal_content,
|
|
)
|
|
|
|
print(f"Session created: {session_id}")
|
|
print(f"Reattach via: python3 {__file__} session {args.slug}")
|
|
print("Waiting for completion...")
|
|
|
|
try:
|
|
result = session_mgr.wait_for_completion(session_id)
|
|
|
|
if result.get("status") == "completed":
|
|
print("\n" + "=" * 80)
|
|
print("RESPONSE:")
|
|
print("=" * 80)
|
|
print(result.get("output", "No output available"))
|
|
print("=" * 80)
|
|
|
|
# Print metadata section (model, reasoning effort, tokens, cost)
|
|
print("\n" + "=" * 80)
|
|
print("METADATA:")
|
|
print("=" * 80)
|
|
|
|
# Model info
|
|
print(f"model: {result.get('model', args.model)}")
|
|
print(
|
|
f"reasoning_effort: {result.get('reasoning_effort', args.reasoning_effort)}"
|
|
)
|
|
|
|
# Token usage and cost
|
|
usage = result.get("usage")
|
|
cost_info = result.get("cost_info")
|
|
|
|
if cost_info:
|
|
print(f"input_tokens: {cost_info.get('input_tokens', 0)}")
|
|
print(f"output_tokens: {cost_info.get('output_tokens', 0)}")
|
|
print(
|
|
f"total_tokens: {cost_info.get('input_tokens', 0) + cost_info.get('output_tokens', 0)}"
|
|
)
|
|
print(f"input_cost_usd: {cost_info.get('input_cost', 0):.6f}")
|
|
print(f"output_cost_usd: {cost_info.get('output_cost', 0):.6f}")
|
|
print(f"total_cost_usd: {cost_info.get('total_cost', 0):.6f}")
|
|
elif usage:
|
|
input_tokens = usage.get("prompt_tokens") or usage.get(
|
|
"input_tokens", 0
|
|
)
|
|
output_tokens = usage.get("completion_tokens") or usage.get(
|
|
"output_tokens", 0
|
|
)
|
|
print(f"input_tokens: {input_tokens}")
|
|
print(f"output_tokens: {output_tokens}")
|
|
print(f"total_tokens: {input_tokens + output_tokens}")
|
|
|
|
print("=" * 80)
|
|
|
|
return 0
|
|
else:
|
|
print(f"\nSession ended with status: {result.get('status')}")
|
|
if "error" in result:
|
|
print(f"Error: {result['error']}")
|
|
return 1
|
|
|
|
except TimeoutError as e:
|
|
print(f"\nERROR: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
|
|
def handle_session_status(args: argparse.Namespace) -> int:
|
|
"""Handle session status check"""
|
|
|
|
session_mgr = SessionManager()
|
|
status = session_mgr.get_session_status(args.slug)
|
|
|
|
if "error" in status and "No session found" in status["error"]:
|
|
print(f"ERROR: {status['error']}", file=sys.stderr)
|
|
return 1
|
|
|
|
# Pretty print status
|
|
print(json.dumps(status, indent=2))
|
|
return 0
|
|
|
|
|
|
def handle_list_sessions(args: argparse.Namespace) -> int:
|
|
"""Handle list sessions command"""
|
|
|
|
session_mgr = SessionManager()
|
|
sessions = session_mgr.list_sessions()
|
|
|
|
if not sessions:
|
|
print("No sessions found.")
|
|
return 0
|
|
|
|
print(f"\nFound {len(sessions)} session(s):\n")
|
|
for s in sessions:
|
|
status_icon = {
|
|
"running": "🔄",
|
|
"completed": "✅",
|
|
"error": "❌",
|
|
"calling_llm": "📞",
|
|
}.get(s.get("status", ""), "❓")
|
|
|
|
print(
|
|
f"{status_icon} {s.get('slug', 'unknown')} - {s.get('status', 'unknown')}"
|
|
)
|
|
print(f" Created: {s.get('created_at', 'unknown')}")
|
|
print(f" Model: {s.get('model', 'unknown')}")
|
|
if s.get("error"):
|
|
print(f" Error: {s['error'][:100]}...")
|
|
print()
|
|
|
|
return 0
|
|
|
|
|
|
def handle_list_models(args: argparse.Namespace) -> int:
|
|
"""Handle list models command"""
|
|
|
|
# Determine base URL: --base-url flag > OPENAI_BASE_URL env var > None
|
|
base_url = args.base_url
|
|
if not base_url:
|
|
base_url = config.get_base_url()
|
|
if base_url:
|
|
print(f"Using base URL from OPENAI_BASE_URL: {base_url}")
|
|
|
|
LiteLLMClient(base_url=base_url)
|
|
models = ModelSelector.list_models(base_url)
|
|
|
|
print(json.dumps(models, indent=2))
|
|
return 0
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description="""
|
|
Consultant CLI - LiteLLM-powered LLM consultation tool
|
|
|
|
This CLI tool allows you to consult powerful LLM models for code analysis,
|
|
reviews, architectural decisions, and complex technical questions. It supports
|
|
100+ LLM providers via LiteLLM with custom base URLs.
|
|
|
|
CORE WORKFLOW:
|
|
1. Provide a prompt describing your analysis task
|
|
2. Attach relevant files for context
|
|
3. The CLI sends everything to the LLM and waits for completion
|
|
4. Results are printed with full metadata (model, tokens, cost)
|
|
|
|
OUTPUT FORMAT:
|
|
The CLI prints structured output with clear sections:
|
|
- RESPONSE: The LLM's analysis/response
|
|
- METADATA: Model used, reasoning effort, token counts, costs
|
|
|
|
ENVIRONMENT VARIABLES:
|
|
LITELLM_API_KEY Primary API key (checked first)
|
|
OPENAI_API_KEY OpenAI API key (fallback)
|
|
ANTHROPIC_API_KEY Anthropic API key (fallback)
|
|
OPENAI_BASE_URL Default base URL for custom LiteLLM proxy
|
|
""",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
EXAMPLES:
|
|
|
|
Basic consultation with prompt and files:
|
|
%(prog)s -p "Review this code for bugs" -f src/main.py -s code-review
|
|
|
|
Multiple files:
|
|
%(prog)s -p "Analyze architecture" -f src/api.py -f src/db.py -f src/models.py -s arch-review
|
|
|
|
Specify model explicitly:
|
|
%(prog)s -p "Security audit" -f auth.py -s security -m claude-3-5-sonnet-20241022
|
|
|
|
Use custom LiteLLM proxy:
|
|
%(prog)s -p "Code review" -f app.py -s review --base-url http://localhost:8000
|
|
|
|
Lower reasoning effort (faster, cheaper):
|
|
%(prog)s -p "Quick check" -f code.py -s quick --reasoning-effort low
|
|
|
|
Check session status:
|
|
%(prog)s session my-review
|
|
|
|
List all sessions:
|
|
%(prog)s list
|
|
|
|
List available models from proxy:
|
|
%(prog)s models --base-url http://localhost:8000
|
|
|
|
SUBCOMMANDS:
|
|
session <slug> Check status of a session by its slug
|
|
list List all sessions with their status
|
|
models List available models (from proxy or known models)
|
|
|
|
For more information, see the consultant plugin documentation.
|
|
""",
|
|
)
|
|
|
|
# Subcommands
|
|
subparsers = parser.add_subparsers(dest="command", help="Available subcommands")
|
|
|
|
# Main invocation arguments
|
|
parser.add_argument(
|
|
"-p",
|
|
"--prompt",
|
|
metavar="TEXT",
|
|
help="""The analysis prompt to send to the LLM. This should describe
|
|
what you want the model to analyze or review. The prompt will
|
|
be combined with any attached files to form the full request.
|
|
REQUIRED for main invocation.""",
|
|
)
|
|
parser.add_argument(
|
|
"-f",
|
|
"--file",
|
|
action="append",
|
|
dest="files",
|
|
metavar="PATH",
|
|
help="""File to attach for analysis. Can be specified multiple times
|
|
to attach multiple files. Each file's contents will be included
|
|
in the prompt sent to the LLM. Supports any text file format.
|
|
Example: -f src/main.py -f src/utils.py -f README.md""",
|
|
)
|
|
parser.add_argument(
|
|
"-s",
|
|
"--slug",
|
|
metavar="NAME",
|
|
help="""Unique identifier for this session. Used to track and retrieve
|
|
session results. Should be descriptive (e.g., "pr-review-123",
|
|
"security-audit", "arch-analysis"). REQUIRED for main invocation.""",
|
|
)
|
|
parser.add_argument(
|
|
"-m",
|
|
"--model",
|
|
metavar="MODEL_ID",
|
|
default="gpt-5-pro",
|
|
help="""Specific LLM model to use. Default: gpt-5-pro. Examples:
|
|
"gpt-5.1", "claude-sonnet-4-5", "gemini/gemini-2.5-flash".
|
|
Use the "models" subcommand to see available models.""",
|
|
)
|
|
parser.add_argument(
|
|
"--base-url",
|
|
metavar="URL",
|
|
help="""Custom base URL for LiteLLM proxy server (e.g., "http://localhost:8000").
|
|
When set, all API calls go through this proxy. The proxy's /v1/models
|
|
endpoint will be queried for available models. If not set, uses
|
|
direct provider APIs based on the model prefix.""",
|
|
)
|
|
parser.add_argument(
|
|
"--api-key",
|
|
metavar="KEY",
|
|
help="""API key for the LLM provider. If not provided, the CLI will look
|
|
for keys in environment variables: LITELLM_API_KEY, OPENAI_API_KEY,
|
|
or ANTHROPIC_API_KEY (in that order).""",
|
|
)
|
|
parser.add_argument(
|
|
"--reasoning-effort",
|
|
choices=["low", "medium", "high"],
|
|
default="high",
|
|
metavar="LEVEL",
|
|
help="""Reasoning effort level for the LLM. Higher effort = more thorough
|
|
analysis but slower and more expensive. Choices: low, medium, high.
|
|
Default: high. Use "low" for quick checks, "high" for thorough reviews.""",
|
|
)
|
|
|
|
# Session status subcommand
|
|
session_parser = subparsers.add_parser(
|
|
"session",
|
|
help="Check the status of a session",
|
|
description="""Check the current status of a consultation session.
|
|
Returns JSON with session metadata, status, and output if completed.""",
|
|
)
|
|
session_parser.add_argument(
|
|
"slug", help="Session slug/identifier to check (the value passed to -s/--slug)"
|
|
)
|
|
|
|
# List sessions subcommand
|
|
subparsers.add_parser(
|
|
"list",
|
|
help="List all consultation sessions",
|
|
description="""List all consultation sessions with their status.
|
|
Shows session slug, status, creation time, model used, and any errors.""",
|
|
)
|
|
|
|
# List models subcommand
|
|
models_parser = subparsers.add_parser(
|
|
"models",
|
|
help="List available LLM models",
|
|
description="""List available LLM models. If --base-url is provided, queries
|
|
the proxy's /v1/models endpoint. Otherwise, returns known models
|
|
from LiteLLM's model registry.""",
|
|
)
|
|
models_parser.add_argument(
|
|
"--base-url",
|
|
metavar="URL",
|
|
help="Base URL of LiteLLM proxy to query for available models",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Handle commands
|
|
if args.command == "session":
|
|
return handle_session_status(args)
|
|
|
|
elif args.command == "list":
|
|
return handle_list_sessions(args)
|
|
|
|
elif args.command == "models":
|
|
return handle_list_models(args)
|
|
|
|
else:
|
|
# Main invocation
|
|
if not args.prompt or not args.slug:
|
|
parser.print_help()
|
|
print("\nERROR: --prompt and --slug are required", file=sys.stderr)
|
|
return 1
|
|
|
|
return handle_invocation(args)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|