commit 0d5e9e52d9e915dc491314ae68b12d74f27aa929 Author: Zhongwei Li Date: Sun Nov 30 09:06:44 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..2f9f3bf --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "multi-ai-consult", + "description": "Consult Gemini, Codex, Qwen, and OpenCode AIs in parallel and synthesize responses", + "version": "1.0.0", + "author": { + "name": "Walter Levan", + "github": "wjlevan" + }, + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..b24cae0 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# multi-ai-consult + +Consult Gemini, Codex, Qwen, and OpenCode AIs in parallel and synthesize responses diff --git a/commands/consult.md b/commands/consult.md new file mode 100644 index 0000000..959e3c5 --- /dev/null +++ b/commands/consult.md @@ -0,0 +1,588 @@ +--- +description: Consult Gemini, Codex, Qwen, and OpenCode AIs in parallel and synthesize responses +--- + +# Multi-AI Consultation + +Consult multiple AIs in parallel, then synthesize their responses into a unified answer. + +## Configuration + +Models and timeouts can be overridden via environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `CONSULT_GEMINI_MODEL` | `gemini-3-pro-preview` | Gemini model | +| `CONSULT_CODEX_MODEL` | `gpt-5.1-codex-max` | Codex model | +| `CONSULT_QWEN_MODEL` | (CLI default) | Qwen model | +| `CONSULT_OPENCODE_MODEL` | `anthropic/claude-opus-4-5` | OpenCode model | +| `CONSULT_TIMEOUT` | `120` | Timeout in seconds per CLI | +| `CONSULT_MAX_RESPONSE_CHARS` | `20000` | Max chars per response (~5K tokens) | + +## Arguments + +Parse these from the query if present: +- `--only=gemini,codex` - Consult only specified AIs (comma-separated) +- `--exclude=opencode` - Exclude specified AIs (comma-separated) +- `--verbose` - Show individual AI responses before synthesis +- `--dry-run` - Show prepared prompt without executing + +## Instructions + +### Step 1: Verify Dependencies + +Check required CLIs and tools exist: + +```bash +# Platform detection and timeout command selection +# CRITICAL: macOS timeout behaves differently and may not inherit PATH +if [[ "$(uname)" == "Darwin" ]]; then + # macOS: prefer gtimeout from GNU coreutils (brew install coreutils) + if command -v gtimeout >/dev/null 2>&1; then + TIMEOUT_CMD="$(command -v gtimeout)" + elif command -v timeout >/dev/null 2>&1; then + TIMEOUT_CMD="$(command -v timeout)" + echo "Warning: Using macOS timeout - consider 'brew install coreutils' for gtimeout" + else + echo "Error: No timeout command found. Install with: brew install coreutils" + exit 11 + fi +else + # Linux: use standard timeout + TIMEOUT_CMD="$(command -v timeout)" +fi + +echo "Platform: $(uname), using timeout: $TIMEOUT_CMD" + +# Check jq for JSON parsing +command -v jq >/dev/null 2>&1 || { echo "Warning: jq not found, JSON parsing may fail"; } + +# Resolve FULL PATHS to CLI binaries +# CRITICAL: timeout subprocess may not inherit shell PATH +AVAILABLE_CLIS="" +GEMINI_BIN="" +CODEX_BIN="" +QWEN_BIN="" +OPENCODE_BIN="" + +if command -v gemini >/dev/null 2>&1; then + GEMINI_BIN="$(command -v gemini)" + AVAILABLE_CLIS="$AVAILABLE_CLIS gemini" + echo " Gemini: $GEMINI_BIN" +fi + +if command -v codex >/dev/null 2>&1; then + CODEX_BIN="$(command -v codex)" + AVAILABLE_CLIS="$AVAILABLE_CLIS codex" + echo " Codex: $CODEX_BIN" +fi + +if command -v qwen >/dev/null 2>&1; then + QWEN_BIN="$(command -v qwen)" + AVAILABLE_CLIS="$AVAILABLE_CLIS qwen" + echo " Qwen: $QWEN_BIN" +fi + +if command -v opencode >/dev/null 2>&1; then + OPENCODE_BIN="$(command -v opencode)" + AVAILABLE_CLIS="$AVAILABLE_CLIS opencode" + echo " OpenCode: $OPENCODE_BIN" +fi +``` + +If no AI CLIs are available, stop and provide installation guidance. Otherwise, proceed with available CLIs (graceful degradation). + +### CLI Command Reference + +| CLI | Command | Key Flags | +|-----|---------|-----------| +| Gemini | `gemini -m "$MODEL" -o json` | `-m`: model, `-o json`: JSON output | +| Codex | `codex exec --json -m "$MODEL" --skip-git-repo-check` | `--json`: NDJSON output | +| Qwen | `qwen -p "" -o json` | `-p ""`: stdin prompt, `-o json`: JSON output | +| OpenCode | `opencode run -m "$MODEL" --format json` | `--format json`: NDJSON events | + +### Step 2: Prepare the Prompt + +Construct a clear prompt: + +1. **Reword** the user's query for clarity +2. **Include file contents** if files are being discussed (read them first) +3. **Add context** about what kind of response is expected +4. **Validate input** - ensure no sensitive data (API keys, passwords) is included + +Store in a variable using proper quoting: + +```bash +PROMPT="Your prepared prompt here" + +# For dry-run mode, display and exit +if [ "$DRY_RUN" = "true" ]; then + echo "=== DRY RUN - Prompt ===" + printf '%s\n' "$PROMPT" + exit 0 +fi +``` + +### Step 3: Execute All in Parallel + +Create temp files and run CLIs with timeout. **Important:** Do NOT wrap commands in subshells `( )` - this breaks job control. + +```bash +# Configuration with defaults +TIMEOUT="${CONSULT_TIMEOUT:-120}" +MAX_RESPONSE_CHARS="${CONSULT_MAX_RESPONSE_CHARS:-20000}" +GEMINI_MODEL="${CONSULT_GEMINI_MODEL:-gemini-3-pro-preview}" +CODEX_MODEL="${CONSULT_CODEX_MODEL:-gpt-5.1-codex-max}" +OPENCODE_MODEL="${CONSULT_OPENCODE_MODEL:-anthropic/claude-opus-4-5}" + +# Create temp files (use mktemp for safety) +GEMINI_OUT=$(mktemp /tmp/consult_gemini.XXXXXX) +GEMINI_ERR=$(mktemp /tmp/consult_gemini_err.XXXXXX) +CODEX_OUT=$(mktemp /tmp/consult_codex.XXXXXX) +CODEX_ERR=$(mktemp /tmp/consult_codex_err.XXXXXX) +QWEN_OUT=$(mktemp /tmp/consult_qwen.XXXXXX) +QWEN_ERR=$(mktemp /tmp/consult_qwen_err.XXXXXX) +OPENCODE_OUT=$(mktemp /tmp/consult_opencode.XXXXXX) +OPENCODE_ERR=$(mktemp /tmp/consult_opencode_err.XXXXXX) + +# Cleanup trap - runs on exit, interrupt, or termination +cleanup() { + rm -f "$GEMINI_OUT" "$GEMINI_ERR" "$CODEX_OUT" "$CODEX_ERR" \ + "$QWEN_OUT" "$QWEN_ERR" "$OPENCODE_OUT" "$OPENCODE_ERR" + # Kill any remaining background jobs + jobs -p | xargs -r kill 2>/dev/null +} +trap cleanup EXIT INT TERM + +echo "Starting parallel consultation..." +echo " Timeout: ${TIMEOUT}s per CLI" +echo " CLIs:$AVAILABLE_CLIS" + +# Run in parallel using FULL PATHS and $TIMEOUT_CMD +# CRITICAL: Use full paths resolved in Step 1 to avoid PATH inheritance issues +# Use printf for safe prompt handling (handles special chars, -n, etc.) + +[ -n "$GEMINI_BIN" ] && { + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$TIMEOUT" "$GEMINI_BIN" -m "$GEMINI_MODEL" -o json > "$GEMINI_OUT" 2>"$GEMINI_ERR" & + GEMINI_PID=$! +} + +[ -n "$CODEX_BIN" ] && { + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$TIMEOUT" "$CODEX_BIN" exec --json -m "$CODEX_MODEL" --skip-git-repo-check > "$CODEX_OUT" 2>"$CODEX_ERR" & + CODEX_PID=$! +} + +[ -n "$QWEN_BIN" ] && { + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$TIMEOUT" "$QWEN_BIN" -p "" -o json > "$QWEN_OUT" 2>"$QWEN_ERR" & + QWEN_PID=$! +} + +[ -n "$OPENCODE_BIN" ] && { + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$TIMEOUT" "$OPENCODE_BIN" run -m "$OPENCODE_MODEL" --format json > "$OPENCODE_OUT" 2>"$OPENCODE_ERR" & + OPENCODE_PID=$! +} + +echo "All CLIs launched. Waiting for responses..." + +# Wait for all and capture exit codes +[ -n "$GEMINI_PID" ] && { wait $GEMINI_PID 2>/dev/null; GEMINI_EXIT=$?; } || GEMINI_EXIT=127 +[ -n "$CODEX_PID" ] && { wait $CODEX_PID 2>/dev/null; CODEX_EXIT=$?; } || CODEX_EXIT=127 +[ -n "$QWEN_PID" ] && { wait $QWEN_PID 2>/dev/null; QWEN_EXIT=$?; } || QWEN_EXIT=127 +[ -n "$OPENCODE_PID" ] && { wait $OPENCODE_PID 2>/dev/null; OPENCODE_EXIT=$?; } || OPENCODE_EXIT=127 + +echo "All CLIs completed. Processing responses..." +echo "Exit codes: Gemini=$GEMINI_EXIT Codex=$CODEX_EXIT Qwen=$QWEN_EXIT OpenCode=$OPENCODE_EXIT" +``` + +### Step 3.5: Progress Monitoring (Optional) + +For long consultations, provide periodic status updates to show activity during the 2+ minutes of parallel execution: + +```bash +# Optional: Monitor progress every 15 seconds +monitor_progress() { + local start_time=$(date +%s) + while true; do + sleep 15 + local elapsed=$(( $(date +%s) - start_time )) + + local status="" + [ -s "$GEMINI_OUT" ] && status="${status} Gemini:$(wc -c < "$GEMINI_OUT" | tr -d ' ')b" + [ -s "$CODEX_OUT" ] && status="${status} Codex:$(wc -c < "$CODEX_OUT" | tr -d ' ')b" + [ -s "$QWEN_OUT" ] && status="${status} Qwen:$(wc -c < "$QWEN_OUT" | tr -d ' ')b" + [ -s "$OPENCODE_OUT" ] && status="${status} OpenCode:$(wc -c < "$OPENCODE_OUT" | tr -d ' ')b" + + echo "[${elapsed}s] Progress:${status:-" waiting..."}" + + # Check if all background jobs completed + if ! jobs -r 2>/dev/null | grep -q .; then + break + fi + done +} + +# Start monitor in background before waiting +monitor_progress & +MONITOR_PID=$! + +# ... (wait commands from Step 3) ... + +# Stop monitor after all CLIs complete +kill $MONITOR_PID 2>/dev/null +``` + +**Simpler alternative:** Just use the status messages already added: +- "Starting parallel consultation..." (before launch) +- "All CLIs launched. Waiting for responses..." (after launch) +- "All CLIs completed. Processing responses..." (after wait) + +### Step 4: Classify Errors and Retry with Backoff + +Classify error types and retry failures with exponential backoff (NOT parallel): + +| Exit Code | Meaning | Action | +|-----------|---------|--------| +| 0 | Success | Continue | +| 124 | Timeout (GNU) | Retry with +60s timeout | +| 127 | Command not found | Check PATH resolution - likely a setup issue | +| 1-2 | General error | Check stderr AND stdout, retry once | +| Other | Unknown | Check stderr AND stdout, retry once | + +Check **BOTH stderr AND stdout** for error patterns (some CLIs embed errors in JSON): + +| Location | Pattern | Error Type | Retry? | +|----------|---------|------------|--------| +| stderr | `401`, `403`, `invalid.*key`, `unauthorized` | Auth failure | No - fix credentials | +| stdout JSON | `"error":`, `"FatalToolExecutionError"` | API/tool error | No - check CLI setup | +| stdout JSON | `"type":"error"` | Structured error event | Check message, maybe retry | +| stderr | `429`, `rate.*limit`, `quota` | Rate limit | Yes - with backoff | +| stderr | `timeout`, `timed out` | Timeout | Yes - increase timeout | +| stderr | `connection`, `network`, `ENOTFOUND` | Network error | Yes - with backoff | + +```bash +# Enhanced error classification function +classify_error() { + local exit_code="$1" err_file="$2" out_file="$3" + + # Exit 127 = command not found (PATH issue from Step 1) + if [ "$exit_code" -eq 127 ]; then + echo "PATH_ERROR" + return + fi + + # Exit 124 = GNU timeout expired + if [ "$exit_code" -eq 124 ]; then + echo "TIMEOUT" + return + fi + + # Check stderr for auth errors + if grep -qiE '401|403|invalid.*key|unauthorized' "$err_file" 2>/dev/null; then + echo "AUTH_ERROR" + return + fi + + # Check stdout JSON for embedded errors (Qwen, Gemini tool errors) + if grep -qiE '"error":|FatalToolExecutionError|"type":"error"' "$out_file" 2>/dev/null; then + echo "API_ERROR" + return + fi + + # Rate limit + if grep -qiE '429|rate.*limit|quota' "$err_file" 2>/dev/null; then + echo "RATE_LIMIT" + return + fi + + # Network errors + if grep -qiE 'connection|network|ENOTFOUND|ETIMEDOUT' "$err_file" 2>/dev/null; then + echo "NETWORK_ERROR" + return + fi + + echo "UNKNOWN_ERROR" +} + +retry_with_backoff() { + local cli="$1" exit_code="$2" err_file="$3" out_file="$4" + local error_type=$(classify_error "$exit_code" "$err_file" "$out_file") + + echo " $cli: $error_type (exit $exit_code)" + + # Don't retry auth or API errors + case "$error_type" in + AUTH_ERROR|API_ERROR|PATH_ERROR) + echo " $cli: Not retrying $error_type" + return 1 + ;; + esac + + # Backoff delay: 2 seconds for first retry + echo " $cli: Retrying in 2s..." + sleep 2 + + # Retry the specific CLI using full paths + local extra_timeout=60 + [ "$error_type" = "TIMEOUT" ] && extra_timeout=90 + + case "$cli" in + gemini) + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$((TIMEOUT + extra_timeout))" "$GEMINI_BIN" -m "$GEMINI_MODEL" -o json > "$out_file" 2>"$err_file" + ;; + codex) + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$((TIMEOUT + extra_timeout))" "$CODEX_BIN" exec --json -m "$CODEX_MODEL" --skip-git-repo-check > "$out_file" 2>"$err_file" + ;; + qwen) + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$((TIMEOUT + extra_timeout))" "$QWEN_BIN" -p "" -o json > "$out_file" 2>"$err_file" + ;; + opencode) + printf '%s' "$PROMPT" | "$TIMEOUT_CMD" "$((TIMEOUT + extra_timeout))" "$OPENCODE_BIN" run -m "$OPENCODE_MODEL" --format json > "$out_file" 2>"$err_file" + ;; + esac +} + +# Retry failed CLIs sequentially with backoff +[ $GEMINI_EXIT -ne 0 ] && [ -n "$GEMINI_BIN" ] && retry_with_backoff gemini $GEMINI_EXIT "$GEMINI_ERR" "$GEMINI_OUT" && GEMINI_EXIT=$? +[ $CODEX_EXIT -ne 0 ] && [ -n "$CODEX_BIN" ] && retry_with_backoff codex $CODEX_EXIT "$CODEX_ERR" "$CODEX_OUT" && CODEX_EXIT=$? +[ $QWEN_EXIT -ne 0 ] && [ -n "$QWEN_BIN" ] && retry_with_backoff qwen $QWEN_EXIT "$QWEN_ERR" "$QWEN_OUT" && QWEN_EXIT=$? +[ $OPENCODE_EXIT -ne 0 ] && [ -n "$OPENCODE_BIN" ] && retry_with_backoff opencode $OPENCODE_EXIT "$OPENCODE_ERR" "$OPENCODE_OUT" && OPENCODE_EXIT=$? +``` + +### Step 5: Parse Results with jq + +Extract text responses using proper jq filters. Stderr is separate, so JSON parsing won't break. + +**IMPORTANT:** AI responses can be very large (100K+ chars). Truncate to `MAX_RESPONSE_CHARS` to avoid token limit issues when synthesizing. + +```bash +# Response truncation function - prevents token overflow +truncate_response() { + local response="$1" + local max_chars="${2:-$MAX_RESPONSE_CHARS}" + local char_count="${#response}" + + if [ "$char_count" -gt "$max_chars" ]; then + # Keep first 70% and last 20% for context + local head_chars=$((max_chars * 70 / 100)) + local tail_chars=$((max_chars * 20 / 100)) + + local head_part="${response:0:$head_chars}" + local tail_part="${response: -$tail_chars}" + + echo "${head_part} + +... [TRUNCATED: ${char_count} chars total, showing first ${head_chars} and last ${tail_chars}] ... + +${tail_part}" + else + echo "$response" + fi +} + +# Gemini - check for errors first, then extract response +if grep -q '"error"' "$GEMINI_OUT" 2>/dev/null; then + GEMINI_RESPONSE="" +else + GEMINI_RESPONSE=$(jq -r '.response // .candidates[0].content.parts[0].text // empty' "$GEMINI_OUT" 2>/dev/null) +fi + +# Codex - NDJSON format with multiple item types +# NOTE: Codex outputs reasoning traces, NOT agent_message. Try multiple extraction patterns. +# Pattern 1: Look for message items with content array containing output_text +CODEX_RESPONSE=$(grep '"item.completed"' "$CODEX_OUT" 2>/dev/null | \ + jq -rs ' + [.[] | select(.item.type == "message" and .item.content)] | + last | + .item.content | + if type == "array" then + [.[] | select(.type == "output_text") | .text] | join("") + else + . + end + ' 2>/dev/null) + +# Pattern 2: Fall back to reasoning summaries if no message found +if [ -z "$CODEX_RESPONSE" ]; then + CODEX_RESPONSE=$(grep '"item.completed"' "$CODEX_OUT" 2>/dev/null | \ + jq -rs '[.[] | select(.item.type == "reasoning") | .item.text // .item.summary // empty] | join("\n\n")' 2>/dev/null) +fi + +# Qwen - filter out error lines, then parse standard JSON +QWEN_RESPONSE=$(grep -v 'FatalToolExecutionError' "$QWEN_OUT" 2>/dev/null | \ + jq -rs 'last | .response // empty' 2>/dev/null) + +# OpenCode - NDJSON, concatenate all text parts +OPENCODE_RESPONSE=$(grep '"type":"text"' "$OPENCODE_OUT" 2>/dev/null | \ + jq -rs '[.[].part.text // .[].text // empty] | join("")' 2>/dev/null) + +# Fallback to raw output if jq parsing fails (truncated for safety) +[ -z "$GEMINI_RESPONSE" ] && [ -s "$GEMINI_OUT" ] && GEMINI_RESPONSE=$(head -c "$MAX_RESPONSE_CHARS" "$GEMINI_OUT") +[ -z "$CODEX_RESPONSE" ] && [ -s "$CODEX_OUT" ] && CODEX_RESPONSE=$(head -c "$MAX_RESPONSE_CHARS" "$CODEX_OUT") +[ -z "$QWEN_RESPONSE" ] && [ -s "$QWEN_OUT" ] && QWEN_RESPONSE=$(head -c "$MAX_RESPONSE_CHARS" "$QWEN_OUT") +[ -z "$OPENCODE_RESPONSE" ] && [ -s "$OPENCODE_OUT" ] && OPENCODE_RESPONSE=$(head -c "$MAX_RESPONSE_CHARS" "$OPENCODE_OUT") + +# Apply truncation to prevent token overflow during synthesis +GEMINI_RESPONSE=$(truncate_response "$GEMINI_RESPONSE") +CODEX_RESPONSE=$(truncate_response "$CODEX_RESPONSE") +QWEN_RESPONSE=$(truncate_response "$QWEN_RESPONSE") +OPENCODE_RESPONSE=$(truncate_response "$OPENCODE_RESPONSE") + +# CRITICAL: Write to SEPARATE files to avoid 25K token limit +# Claude can read each file individually during synthesis +echo "$GEMINI_RESPONSE" > /tmp/consult_gemini_response.txt +echo "$CODEX_RESPONSE" > /tmp/consult_codex_response.txt +echo "$QWEN_RESPONSE" > /tmp/consult_qwen_response.txt +echo "$OPENCODE_RESPONSE" > /tmp/consult_opencode_response.txt + +# Report file sizes for verification +echo "Response sizes (chars):" +echo " Gemini: ${#GEMINI_RESPONSE}" +echo " Codex: ${#CODEX_RESPONSE}" +echo " Qwen: ${#QWEN_RESPONSE}" +echo " OpenCode: ${#OPENCODE_RESPONSE}" +echo " TOTAL: $((${#GEMINI_RESPONSE} + ${#CODEX_RESPONSE} + ${#QWEN_RESPONSE} + ${#OPENCODE_RESPONSE}))" +``` + +**IMPORTANT - Token Limit Prevention:** + +To avoid the 25K token limit when reading responses: + +1. **NEVER write a combined file** - Don't concatenate all responses into one file +2. **Read each response file separately** during synthesis: + - `Read(/tmp/consult_gemini_response.txt)` + - `Read(/tmp/consult_codex_response.txt)` + - `Read(/tmp/consult_qwen_response.txt)` + - `Read(/tmp/consult_opencode_response.txt)` +3. **Max safe combined size**: ~80K chars (4 × 20K) ≈ 20K tokens, well under 25K limit +4. **If responses are still too large**: Reduce `CONSULT_MAX_RESPONSE_CHARS` to 15000 + +### Step 6: Report Status + +Show which AIs responded: + +```bash +echo "---" +echo "Consultation Results:" +[ -n "$GEMINI_RESPONSE" ] && echo " Gemini: ✓" || echo " Gemini: ✗ ($(head -1 "$GEMINI_ERR" 2>/dev/null || echo 'no response'))" +[ -n "$CODEX_RESPONSE" ] && echo " Codex: ✓" || echo " Codex: ✗ ($(head -1 "$CODEX_ERR" 2>/dev/null || echo 'no response'))" +[ -n "$QWEN_RESPONSE" ] && echo " Qwen: ✓" || echo " Qwen: ✗ ($(head -1 "$QWEN_ERR" 2>/dev/null || echo 'no response'))" +[ -n "$OPENCODE_RESPONSE" ] && echo " OpenCode: ✓" || echo " OpenCode: ✗ ($(head -1 "$OPENCODE_ERR" 2>/dev/null || echo 'no response'))" + +# Count successful responses +SUCCESS_COUNT=0 +[ -n "$GEMINI_RESPONSE" ] && SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) +[ -n "$CODEX_RESPONSE" ] && SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) +[ -n "$QWEN_RESPONSE" ] && SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) +[ -n "$OPENCODE_RESPONSE" ] && SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + +echo " Consensus basis: $SUCCESS_COUNT/4 AIs responded" +echo "---" +``` + +### Step 7: Synthesize Response + +If `--verbose` was specified, show individual responses first. + +#### Tiered Synthesis Strategy + +Choose synthesis approach based on **total response size**: + +```bash +# Calculate total response size +TOTAL_CHARS=$((${#GEMINI_RESPONSE} + ${#CODEX_RESPONSE} + ${#QWEN_RESPONSE} + ${#OPENCODE_RESPONSE})) +echo "Total response size: $TOTAL_CHARS chars" +``` + +| Tier | Total Size | Strategy | +|------|-----------|----------| +| **Small** | < 30K chars | Read all responses directly, synthesize in one pass | +| **Medium** | 30K-80K chars | Summarize each response first, then synthesize summaries | +| **Large** | > 80K chars | Process incrementally: read → summarize → integrate one at a time | + +**Tier 1 (Small):** Direct synthesis - read all and combine. + +**Tier 2 (Medium):** First summarize each response: + +``` +For each AI response, extract: +1. Main recommendation/answer (1-2 sentences) +2. Key reasoning/evidence (2-3 bullet points) +3. Notable caveats or alternatives +4. Unique insights not mentioned by others +``` + +**Tier 3 (Large):** Incremental synthesis: +1. Read + summarize Gemini response +2. Read Codex, compare to Gemini summary, update synthesis +3. Read Qwen, integrate new points +4. Read OpenCode, finalize synthesis + +#### Synthesis Guidelines + +Combine all perspectives into a **single unified response**: + +- Do NOT show separate "According to Gemini/Codex/Qwen/OpenCode" sections +- Integrate the best insights from all responding AIs into one cohesive answer +- If they agree, present the consensus with high confidence +- If they disagree, synthesize a balanced view incorporating all perspectives +- Present as your own synthesized analysis +- **Deduplicate**: AI responses often overlap - don't repeat the same point multiple times + +**Confidence indicator** (optional footer): +- **High** (4/4 or 3/3 agree on core points) +- **Medium** (3/4 agree, or 2/2 with partial overlap) +- **Low** (significant disagreement or only 1-2 responses) + +### Step 8: Cleanup + +Cleanup happens automatically via the trap, but you can also explicitly call: + +```bash +cleanup +``` + +## Exit Codes + +Use distinct exit codes for scripting: + +| Code | Meaning | +|------|---------| +| 0 | Success - at least 2 AIs responded | +| 1 | Partial success - only 1 AI responded | +| 2 | All AIs failed - auth errors | +| 3 | All AIs failed - rate limit | +| 4 | All AIs failed - timeout | +| 5 | All AIs failed - other errors | +| 10 | No AI CLIs installed | +| 11 | Missing required dependency (jq) | + +## Error Handling Summary + +### CLI Installation +If a CLI is missing, continue with others. Provide installation guidance only when asked: +- **Gemini**: `npm install -g @anthropic-ai/gemini-cli` or `pip install google-generativeai` +- **Codex**: `npm install -g @openai/codex` +- **Qwen**: `npm install -g @anthropic-ai/qwen-cli` +- **OpenCode**: See https://opencode.ai/docs/installation + +### Graceful Degradation +- **3/4 respond**: Full synthesis, note one was unavailable +- **2/4 respond**: Synthesis with reduced confidence +- **1/4 responds**: Report single response, offer Claude-only alternative +- **0/4 respond**: Report all errors, provide Claude-only response + +## Example + +User: `/consult How should I structure my React state management?` + +1. **Verify**: Check available CLIs and jq +2. **Prepare**: "What are best practices for structuring React state management? Consider scalability, maintainability, and performance." +3. **Execute**: Run available CLIs in parallel with 120s timeout +4. **Retry**: Any failures get one retry with 2s backoff +5. **Parse**: Extract responses using jq: + - Gemini: `.response` + - Codex: NDJSON `.item.text` from `agent_message` + - Qwen: `.response` + - OpenCode: NDJSON `.part.text` +6. **Report**: "Gemini ✓ | Codex ✓ | Qwen ✓ | OpenCode ✓ (4/4)" +7. **Synthesize**: "For React state management, consider a layered approach: local state for UI concerns, Context for shared app state, and a dedicated store (Redux/Zustand) for complex global state. Key principles include..." diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..048d870 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,45 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:wlevan3/claude-plugins:multi-ai-consult", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "20b05a091e399d4630a3fc1cadba09569b3a9703", + "treeHash": "be9d199d617753156f100dc2c6beb200aee3e0df3f2a277ecb716e0a9c2807e6", + "generatedAt": "2025-11-28T10:29:04.285270Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "multi-ai-consult", + "description": "Consult Gemini, Codex, Qwen, and OpenCode AIs in parallel and synthesize responses", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "d7baa9e23dfc7e0a6be2813e36a34d18a412d648db1b98476701c0ed4eecfb3b" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "17d7d32a9d3e76675bf89081b6ad3c3e0b570d5181eb0bac132e8c02ce749947" + }, + { + "path": "commands/consult.md", + "sha256": "915c85a560ee470232647ac4a8ca2bb737dafe3d9928e1bcf427769b07224b41" + } + ], + "dirSha256": "be9d199d617753156f100dc2c6beb200aee3e0df3f2a277ecb716e0a9c2807e6" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file