Initial commit

2025-11-29 18:23:41 +08:00
commit 016e36f3f3
20 changed files with 4365 additions and 0 deletions
--- a/skills/consultant/scripts/consultant_cli.py
+++ b/skills/consultant/scripts/consultant_cli.py
@@ -0,0 +1,501 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "litellm",
+#     "requests>=2.31.0",
+#     "tenacity",
+#     "markitdown>=0.1.0",
+# ]
+# ///
+"""
+Consultant CLI - LiteLLM-powered LLM consultation tool
+Supports async invocation, custom base URLs, and flexible model selection
+
+Run with: uv run consultant_cli.py [args]
+This automatically installs/updates dependencies (litellm, requests) on first run.
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+# Add scripts directory to path
+SCRIPTS_DIR = Path(__file__).parent
+sys.path.insert(0, str(SCRIPTS_DIR))
+
+import config
+from file_handler import (
+    FileHandler,
+    build_multimodal_content,
+    build_prompt_with_references,
+    has_images,
+    validate_vision_support,
+)
+from litellm_client import LiteLLMClient
+from model_selector import ModelSelector
+from session_manager import SessionManager
+
+
+def validate_context_size(
+    full_prompt: str, model: str, client: LiteLLMClient, num_files: int
+) -> bool:
+    """
+    Validate that full prompt fits in model context.
+    Returns True if OK, raises ValueError if exceeds.
+    """
+
+    # Count tokens for the complete prompt
+    total_tokens = client.count_tokens(full_prompt, model)
+
+    # Get limit
+    max_tokens = client.get_max_tokens(model)
+
+    # Reserve for response
+    available_tokens = int(max_tokens * (1 - config.CONTEXT_RESERVE_RATIO))
+
+    # Print summary
+    print("\n📊 Token Usage:")
+    print(f"- Input: {total_tokens:,} tokens ({num_files} files)")
+    print(f"- Limit: {max_tokens:,} tokens")
+    print(
+        f"- Available: {available_tokens:,} tokens ({int((available_tokens/max_tokens)*100)}%)\n"
+    )
+
+    if total_tokens > max_tokens:
+        raise ValueError(
+            f"Input exceeds context limit!\n"
+            f"  Input: {total_tokens:,} tokens\n"
+            f"  Limit: {max_tokens:,} tokens\n"
+            f"  Overage: {total_tokens - max_tokens:,} tokens\n\n"
+            f"Suggestions:\n"
+            f"1. Reduce number of files (currently {num_files})\n"
+            f"2. Use a model with larger context\n"
+            f"3. Shorten the prompt"
+        )
+
+    if total_tokens > available_tokens:
+        print(f"⚠️  WARNING: Using {int((total_tokens/max_tokens)*100)}% of context")
+        print("   Consider reducing input size for better response quality\n")
+
+    return True
+
+
+def handle_invocation(args: argparse.Namespace) -> int:
+    """Handle main invocation command"""
+
+    # Determine base URL: --base-url flag > OPENAI_BASE_URL env var > None
+    base_url = args.base_url
+    if not base_url:
+        base_url = config.get_base_url()
+        if base_url:
+            print(f"Using base URL from OPENAI_BASE_URL: {base_url}")
+
+    # Initialize components
+    session_mgr = SessionManager()
+    client = LiteLLMClient(base_url=base_url, api_key=args.api_key)
+
+    # Process files using FileHandler
+    file_handler = FileHandler()
+    processed_files = []
+    multimodal_content = None
+
+    if args.files:
+        processed_files, file_errors = file_handler.process_files(args.files)
+
+        # If any files failed, report errors and exit
+        if file_errors:
+            print("\nERROR: Some files could not be processed:", file=sys.stderr)
+            for err in file_errors:
+                print(f"  - {err.path}: {err.reason}", file=sys.stderr)
+            print(
+                "\nPlease fix or remove the problematic files and try again.",
+                file=sys.stderr,
+            )
+            return 1
+
+        # Validate vision support if images present
+        if has_images(processed_files):
+            validate_vision_support(args.model, has_images=True)
+
+        # Print file processing summary
+        text_count = sum(1 for f in processed_files if f.category.value == "text")
+        office_count = sum(1 for f in processed_files if f.category.value == "office")
+        image_count = sum(1 for f in processed_files if f.category.value == "image")
+
+        print("\nFile Processing Summary:")
+        print(f"  - Text files: {text_count}")
+        print(f"  - Office documents (converted): {office_count}")
+        print(f"  - Images: {image_count}")
+
+    # Log model being used
+    print(f"Using model: {args.model}")
+
+    # Validate environment variables (only if no custom base URL)
+    if not base_url:
+        env_status = client.validate_environment(args.model)
+        if not env_status.get("keys_in_environment", False):
+            missing = env_status.get("missing_keys", [])
+            error = env_status.get("error", "")
+
+            print(
+                f"\n❌ ERROR: Missing required environment variables for model '{args.model}'",
+                file=sys.stderr,
+            )
+            print(f"\nMissing keys: {', '.join(missing)}", file=sys.stderr)
+
+            if error:
+                print(f"\nDetails: {error}", file=sys.stderr)
+
+            print("\n💡 To fix this:", file=sys.stderr)
+            print("   1. Set the required environment variable(s):", file=sys.stderr)
+            for key in missing:
+                print(f"      export {key}=your-api-key", file=sys.stderr)
+            print(
+                "   2. Or use --base-url to specify a custom LiteLLM endpoint",
+                file=sys.stderr,
+            )
+            print(
+                "   3. Or use --model to specify a different model\n", file=sys.stderr
+            )
+
+            return 1
+
+    # Build full prompt with reference files section
+    full_prompt = build_prompt_with_references(args.prompt, processed_files)
+
+    # Build multimodal content if we have images
+    if has_images(processed_files):
+        multimodal_content = build_multimodal_content(full_prompt, processed_files)
+
+    # Check context limits on the full prompt
+    try:
+        validate_context_size(full_prompt, args.model, client, len(processed_files))
+    except ValueError as e:
+        print(f"ERROR: {e}", file=sys.stderr)
+        return 1
+
+    # Create and start session
+    session_id = session_mgr.create_session(
+        slug=args.slug,
+        prompt=full_prompt,
+        model=args.model,
+        base_url=base_url,
+        api_key=args.api_key,
+        reasoning_effort=args.reasoning_effort,
+        multimodal_content=multimodal_content,
+    )
+
+    print(f"Session created: {session_id}")
+    print(f"Reattach via: python3 {__file__} session {args.slug}")
+    print("Waiting for completion...")
+
+    try:
+        result = session_mgr.wait_for_completion(session_id)
+
+        if result.get("status") == "completed":
+            print("\n" + "=" * 80)
+            print("RESPONSE:")
+            print("=" * 80)
+            print(result.get("output", "No output available"))
+            print("=" * 80)
+
+            # Print metadata section (model, reasoning effort, tokens, cost)
+            print("\n" + "=" * 80)
+            print("METADATA:")
+            print("=" * 80)
+
+            # Model info
+            print(f"model: {result.get('model', args.model)}")
+            print(
+                f"reasoning_effort: {result.get('reasoning_effort', args.reasoning_effort)}"
+            )
+
+            # Token usage and cost
+            usage = result.get("usage")
+            cost_info = result.get("cost_info")
+
+            if cost_info:
+                print(f"input_tokens: {cost_info.get('input_tokens', 0)}")
+                print(f"output_tokens: {cost_info.get('output_tokens', 0)}")
+                print(
+                    f"total_tokens: {cost_info.get('input_tokens', 0) + cost_info.get('output_tokens', 0)}"
+                )
+                print(f"input_cost_usd: {cost_info.get('input_cost', 0):.6f}")
+                print(f"output_cost_usd: {cost_info.get('output_cost', 0):.6f}")
+                print(f"total_cost_usd: {cost_info.get('total_cost', 0):.6f}")
+            elif usage:
+                input_tokens = usage.get("prompt_tokens") or usage.get(
+                    "input_tokens", 0
+                )
+                output_tokens = usage.get("completion_tokens") or usage.get(
+                    "output_tokens", 0
+                )
+                print(f"input_tokens: {input_tokens}")
+                print(f"output_tokens: {output_tokens}")
+                print(f"total_tokens: {input_tokens + output_tokens}")
+
+            print("=" * 80)
+
+            return 0
+        else:
+            print(f"\nSession ended with status: {result.get('status')}")
+            if "error" in result:
+                print(f"Error: {result['error']}")
+            return 1
+
+    except TimeoutError as e:
+        print(f"\nERROR: {e}", file=sys.stderr)
+        return 1
+
+
+def handle_session_status(args: argparse.Namespace) -> int:
+    """Handle session status check"""
+
+    session_mgr = SessionManager()
+    status = session_mgr.get_session_status(args.slug)
+
+    if "error" in status and "No session found" in status["error"]:
+        print(f"ERROR: {status['error']}", file=sys.stderr)
+        return 1
+
+    # Pretty print status
+    print(json.dumps(status, indent=2))
+    return 0
+
+
+def handle_list_sessions(args: argparse.Namespace) -> int:
+    """Handle list sessions command"""
+
+    session_mgr = SessionManager()
+    sessions = session_mgr.list_sessions()
+
+    if not sessions:
+        print("No sessions found.")
+        return 0
+
+    print(f"\nFound {len(sessions)} session(s):\n")
+    for s in sessions:
+        status_icon = {
+            "running": "🔄",
+            "completed": "✅",
+            "error": "❌",
+            "calling_llm": "📞",
+        }.get(s.get("status", ""), "❓")
+
+        print(
+            f"{status_icon} {s.get('slug', 'unknown')} - {s.get('status', 'unknown')}"
+        )
+        print(f"   Created: {s.get('created_at', 'unknown')}")
+        print(f"   Model: {s.get('model', 'unknown')}")
+        if s.get("error"):
+            print(f"   Error: {s['error'][:100]}...")
+        print()
+
+    return 0
+
+
+def handle_list_models(args: argparse.Namespace) -> int:
+    """Handle list models command"""
+
+    # Determine base URL: --base-url flag > OPENAI_BASE_URL env var > None
+    base_url = args.base_url
+    if not base_url:
+        base_url = config.get_base_url()
+        if base_url:
+            print(f"Using base URL from OPENAI_BASE_URL: {base_url}")
+
+    LiteLLMClient(base_url=base_url)
+    models = ModelSelector.list_models(base_url)
+
+    print(json.dumps(models, indent=2))
+    return 0
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="""
+Consultant CLI - LiteLLM-powered LLM consultation tool
+
+This CLI tool allows you to consult powerful LLM models for code analysis,
+reviews, architectural decisions, and complex technical questions. It supports
+100+ LLM providers via LiteLLM with custom base URLs.
+
+CORE WORKFLOW:
+  1. Provide a prompt describing your analysis task
+  2. Attach relevant files for context
+  3. The CLI sends everything to the LLM and waits for completion
+  4. Results are printed with full metadata (model, tokens, cost)
+
+OUTPUT FORMAT:
+  The CLI prints structured output with clear sections:
+  - RESPONSE: The LLM's analysis/response
+  - METADATA: Model used, reasoning effort, token counts, costs
+
+ENVIRONMENT VARIABLES:
+  LITELLM_API_KEY      Primary API key (checked first)
+  OPENAI_API_KEY       OpenAI API key (fallback)
+  ANTHROPIC_API_KEY    Anthropic API key (fallback)
+  OPENAI_BASE_URL      Default base URL for custom LiteLLM proxy
+""",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+EXAMPLES:
+
+  Basic consultation with prompt and files:
+    %(prog)s -p "Review this code for bugs" -f src/main.py -s code-review
+
+  Multiple files:
+    %(prog)s -p "Analyze architecture" -f src/api.py -f src/db.py -f src/models.py -s arch-review
+
+  Specify model explicitly:
+    %(prog)s -p "Security audit" -f auth.py -s security -m claude-3-5-sonnet-20241022
+
+  Use custom LiteLLM proxy:
+    %(prog)s -p "Code review" -f app.py -s review --base-url http://localhost:8000
+
+  Lower reasoning effort (faster, cheaper):
+    %(prog)s -p "Quick check" -f code.py -s quick --reasoning-effort low
+
+  Check session status:
+    %(prog)s session my-review
+
+  List all sessions:
+    %(prog)s list
+
+  List available models from proxy:
+    %(prog)s models --base-url http://localhost:8000
+
+SUBCOMMANDS:
+  session <slug>    Check status of a session by its slug
+  list              List all sessions with their status
+  models            List available models (from proxy or known models)
+
+For more information, see the consultant plugin documentation.
+""",
+    )
+
+    # Subcommands
+    subparsers = parser.add_subparsers(dest="command", help="Available subcommands")
+
+    # Main invocation arguments
+    parser.add_argument(
+        "-p",
+        "--prompt",
+        metavar="TEXT",
+        help="""The analysis prompt to send to the LLM. This should describe
+                what you want the model to analyze or review. The prompt will
+                be combined with any attached files to form the full request.
+                REQUIRED for main invocation.""",
+    )
+    parser.add_argument(
+        "-f",
+        "--file",
+        action="append",
+        dest="files",
+        metavar="PATH",
+        help="""File to attach for analysis. Can be specified multiple times
+                to attach multiple files. Each file's contents will be included
+                in the prompt sent to the LLM. Supports any text file format.
+                Example: -f src/main.py -f src/utils.py -f README.md""",
+    )
+    parser.add_argument(
+        "-s",
+        "--slug",
+        metavar="NAME",
+        help="""Unique identifier for this session. Used to track and retrieve
+                session results. Should be descriptive (e.g., "pr-review-123",
+                "security-audit", "arch-analysis"). REQUIRED for main invocation.""",
+    )
+    parser.add_argument(
+        "-m",
+        "--model",
+        metavar="MODEL_ID",
+        default="gpt-5-pro",
+        help="""Specific LLM model to use. Default: gpt-5-pro. Examples:
+                "gpt-5.1", "claude-sonnet-4-5", "gemini/gemini-2.5-flash".
+                Use the "models" subcommand to see available models.""",
+    )
+    parser.add_argument(
+        "--base-url",
+        metavar="URL",
+        help="""Custom base URL for LiteLLM proxy server (e.g., "http://localhost:8000").
+                When set, all API calls go through this proxy. The proxy's /v1/models
+                endpoint will be queried for available models. If not set, uses
+                direct provider APIs based on the model prefix.""",
+    )
+    parser.add_argument(
+        "--api-key",
+        metavar="KEY",
+        help="""API key for the LLM provider. If not provided, the CLI will look
+                for keys in environment variables: LITELLM_API_KEY, OPENAI_API_KEY,
+                or ANTHROPIC_API_KEY (in that order).""",
+    )
+    parser.add_argument(
+        "--reasoning-effort",
+        choices=["low", "medium", "high"],
+        default="high",
+        metavar="LEVEL",
+        help="""Reasoning effort level for the LLM. Higher effort = more thorough
+                analysis but slower and more expensive. Choices: low, medium, high.
+                Default: high. Use "low" for quick checks, "high" for thorough reviews.""",
+    )
+
+    # Session status subcommand
+    session_parser = subparsers.add_parser(
+        "session",
+        help="Check the status of a session",
+        description="""Check the current status of a consultation session.
+                       Returns JSON with session metadata, status, and output if completed.""",
+    )
+    session_parser.add_argument(
+        "slug", help="Session slug/identifier to check (the value passed to -s/--slug)"
+    )
+
+    # List sessions subcommand
+    subparsers.add_parser(
+        "list",
+        help="List all consultation sessions",
+        description="""List all consultation sessions with their status.
+                       Shows session slug, status, creation time, model used, and any errors.""",
+    )
+
+    # List models subcommand
+    models_parser = subparsers.add_parser(
+        "models",
+        help="List available LLM models",
+        description="""List available LLM models. If --base-url is provided, queries
+                       the proxy's /v1/models endpoint. Otherwise, returns known models
+                       from LiteLLM's model registry.""",
+    )
+    models_parser.add_argument(
+        "--base-url",
+        metavar="URL",
+        help="Base URL of LiteLLM proxy to query for available models",
+    )
+
+    args = parser.parse_args()
+
+    # Handle commands
+    if args.command == "session":
+        return handle_session_status(args)
+
+    elif args.command == "list":
+        return handle_list_sessions(args)
+
+    elif args.command == "models":
+        return handle_list_models(args)
+
+    else:
+        # Main invocation
+        if not args.prompt or not args.slug:
+            parser.print_help()
+            print("\nERROR: --prompt and --slug are required", file=sys.stderr)
+            return 1
+
+        return handle_invocation(args)
+
+
+if __name__ == "__main__":
+    sys.exit(main())