#!/usr/bin/env bash # Purpose: State machine for template-selector agent with integrated classification # Inputs: XML input via stdin containing user_task, suggested_template (optional), confidence (optional) # Outputs: Instructions with classification result # Architecture: Incorporates template-selector.sh logic directly (no external bash calls needed) set -euo pipefail # Source common functions SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" . "$SCRIPT_DIR/../../scripts/common.sh" # Setup plugin root setup_plugin_root TEMPLATE_DIR="${CLAUDE_PLUGIN_ROOT}/templates" # ============================================================================ # Confidence Threshold Configuration # ============================================================================ # # These values control template classification confidence scoring: # # BORDERLINE_MIN (60%): # - Minimum confidence for accepting keyword-based classification # - Below this: routes to LLM for borderline case review # - Rationale: Ensures 90%+ accuracy by letting LLM handle uncertain cases # # STRONG_INDICATOR_BASE (75%): # - Base confidence when a strong indicator keyword is found # - Examples: "refactor", "compare", "test", "review" # - Rationale: Strong indicators reliably predict template choice # # SUPPORTING_KEYWORD_BONUS (8% per keyword): # - Additional confidence for each supporting keyword (max 100%) # - Examples: "code", "module", "security", etc. # - Rationale: Multiple related keywords increase confidence # # Confidence levels for supporting keywords only (no strong indicator): # - ONE_KEYWORD_CONFIDENCE (35%): One supporting keyword found # - TWO_KEYWORD_CONFIDENCE (60%): Two supporting keywords found # - THREE_KEYWORD_CONFIDENCE (75%): Three+ supporting keywords found # # Example scoring: # "Refactor the authentication module" = 75% (strong) + 8% (code keyword) = 83% # "Check security of code" = 60% (two keywords: security, code) # ============================================================================ BORDERLINE_MIN=60 STRONG_INDICATOR_BASE=75 SUPPORTING_KEYWORD_BONUS=8 ONE_KEYWORD_CONFIDENCE=35 TWO_KEYWORD_CONFIDENCE=60 THREE_KEYWORD_CONFIDENCE=75 # Regex patterns for strong indicators PATTERN_CODE="refactor|codebase|implement|fix|update|modify|create|build|reorganize|improve|enhance|transform|optimize|rework|revamp|refine|polish|modernize|clean|streamline" PATTERN_COMPARISON="compare|classify|check|same|different|verify|determine|match|matches|equivalent|equals?|similar|duplicate|identical" PATTERN_TEST="tests?|spec|testing|unittest" PATTERN_REVIEW="review|feedback|critique|analyze|assess|evaluate|examine|inspect|scrutinize|audit|scan|survey|vet|investigate|appraise" PATTERN_DOCUMENTATION="documentation|readme|docstring|docs|document|write.*(comment|docstring|documentation|guide|instruction)|author.*(documentation|instruction|guide)" PATTERN_EXTRACTION="extract|parse|pull|retrieve|mine|harvest|collect|scrape|distill|gather|isolate|obtain|sift|fish|pluck|glean|cull|unearth|dredge|winnow" # Parse XML input from command-line argument (supports multiline content) read_xml_input() { local xml_input="$1" # Validate input provided if [ -z "$xml_input" ]; then echo "Error: No input provided. Usage: $0 ''" >&2 exit 1 fi # Extract required field using common function local user_task user_task=$(require_xml_field "$xml_input" "user_task" "auto") || exit 1 # Extract optional fields with defaults local suggested_template suggested_template=$(optional_xml_field "$xml_input" "suggested_template" "") local confidence confidence=$(optional_xml_field "$xml_input" "confidence" "") # Validate confidence is a number if provided if [ -n "$confidence" ] && ! [[ "$confidence" =~ ^[0-9]+$ ]]; then echo "Error: Invalid confidence value: $confidence (must be integer)" >&2 exit 1 fi # Export for use by other functions export USER_TASK="$user_task" export SUGGESTED_TEMPLATE="$suggested_template" export SUGGESTED_CONFIDENCE="$confidence" } # Convert text to lowercase to_lowercase() { echo "$1" | tr '[:upper:]' '[:lower:]' } # Count keyword matches count_matches() { local text="$1" shift local IFS='|' local pattern="$*" echo "$text" | grep -oiE "\b($pattern)\b" 2>/dev/null | wc -l | tr -d ' ' } # Pre-compute strong indicators # Optimized to run grep once and check all patterns compute_strong_indicators() { local text="$1" # Combine all patterns into a single grep call local all_matches all_matches=$(echo "$text" | grep -oiE "\b($PATTERN_CODE|$PATTERN_COMPARISON|$PATTERN_TEST|$PATTERN_REVIEW|$PATTERN_DOCUMENTATION|$PATTERN_EXTRACTION)\b" | tr '[:upper:]' '[:lower:]') || true # Check which patterns matched if echo "$all_matches" | grep -qiE "\b($PATTERN_CODE)\b"; then HAS_STRONG_CODE=1 else HAS_STRONG_CODE=0 fi if echo "$all_matches" | grep -qiE "\b($PATTERN_COMPARISON)\b"; then HAS_STRONG_COMPARISON=1 else HAS_STRONG_COMPARISON=0 fi if echo "$all_matches" | grep -qiE "\b($PATTERN_TEST)\b"; then HAS_STRONG_TEST=1 else HAS_STRONG_TEST=0 fi if echo "$all_matches" | grep -qiE "\b($PATTERN_REVIEW)\b"; then HAS_STRONG_REVIEW=1 else HAS_STRONG_REVIEW=0 fi if echo "$all_matches" | grep -qiE "\b($PATTERN_DOCUMENTATION)\b"; then HAS_STRONG_DOCUMENTATION=1 else HAS_STRONG_DOCUMENTATION=0 fi if echo "$all_matches" | grep -qiE "\b($PATTERN_EXTRACTION)\b"; then HAS_STRONG_EXTRACTION=1 else HAS_STRONG_EXTRACTION=0 fi } # Check if category has strong indicator has_strong_indicator() { local category="$1" case "$category" in "code") return $((1 - HAS_STRONG_CODE)) ;; "comparison") return $((1 - HAS_STRONG_COMPARISON)) ;; "test") return $((1 - HAS_STRONG_TEST)) ;; "review") return $((1 - HAS_STRONG_REVIEW)) ;; "documentation") return $((1 - HAS_STRONG_DOCUMENTATION)) ;; "extraction") return $((1 - HAS_STRONG_EXTRACTION)) ;; *) return 1 ;; esac } # Calculate confidence for a category calculate_confidence() { local category=$1 local supporting_count=$2 if has_strong_indicator "$category"; then local confidence=$((STRONG_INDICATOR_BASE + supporting_count * SUPPORTING_KEYWORD_BONUS)) [ "$confidence" -gt 100 ] && confidence=100 echo "$confidence" else if [ "$supporting_count" -eq 0 ]; then echo 0 elif [ "$supporting_count" -eq 1 ]; then echo "$ONE_KEYWORD_CONFIDENCE" elif [ "$supporting_count" -eq 2 ]; then echo "$TWO_KEYWORD_CONFIDENCE" else echo "$THREE_KEYWORD_CONFIDENCE" fi fi } # Classify task and return template name with confidence classify_task() { local task_description="$1" local task_lower=$(to_lowercase "$task_description") # Pre-compute strong indicators compute_strong_indicators "$task_lower" # Define supporting keywords local code_keywords=("code" "file" "class" "bug" "module" "system" "endpoint") local comparison_keywords=("sentence" "equal" "whether" "mean" "version" "duplicate" "similarity") local test_keywords=("coverage" "jest" "pytest" "junit" "mocha" "case" "suite" "edge" "unit" "generate") local review_keywords=("readability" "maintainability" "practices" "smell") local documentation_keywords=("comment" "guide" "reference" "explain" "api" "write" "function" "inline" "author" "instructions" "setup" "method") local extraction_keywords=("data" "scrape" "retrieve" "json" "html" "csv" "email" "address" "timestamp" "logs" "file") # Count matches local code_count=$(count_matches "$task_lower" "${code_keywords[@]}") local comparison_count=$(count_matches "$task_lower" "${comparison_keywords[@]}") local testgen_count=$(count_matches "$task_lower" "${test_keywords[@]}") local review_count=$(count_matches "$task_lower" "${review_keywords[@]}") local documentation_count=$(count_matches "$task_lower" "${documentation_keywords[@]}") local extraction_count=$(count_matches "$task_lower" "${extraction_keywords[@]}") # Calculate confidence scores local code_confidence=$(calculate_confidence "code" $code_count) local comparison_confidence=$(calculate_confidence "comparison" $comparison_count) local test_confidence=$(calculate_confidence "test" $testgen_count) local review_confidence=$(calculate_confidence "review" $review_count) local documentation_confidence=$(calculate_confidence "documentation" $documentation_count) local extraction_confidence=$(calculate_confidence "extraction" $extraction_count) # Find highest confidence local max_confidence=0 local selected_template="custom" if [ "$code_confidence" -gt "$max_confidence" ]; then max_confidence=$code_confidence selected_template="code-refactoring" fi if [ "$comparison_confidence" -gt "$max_confidence" ]; then max_confidence=$comparison_confidence selected_template="code-comparison" fi if [ "$test_confidence" -gt "$max_confidence" ]; then max_confidence=$test_confidence selected_template="test-generation" fi if [ "$review_confidence" -gt "$max_confidence" ]; then max_confidence=$review_confidence selected_template="code-review" fi if [ "$documentation_confidence" -gt "$max_confidence" ]; then max_confidence=$documentation_confidence selected_template="documentation-generator" fi if [ "$extraction_confidence" -gt "$max_confidence" ]; then max_confidence=$extraction_confidence selected_template="data-extraction" fi # Set to custom if below borderline threshold if [ "$max_confidence" -lt "$BORDERLINE_MIN" ]; then selected_template="custom" max_confidence=0 fi # Validate template file exists if [ "$selected_template" != "custom" ]; then local template_file="${TEMPLATE_DIR}/${selected_template}.md" if [ ! -f "$template_file" ]; then selected_template="custom" max_confidence=0 fi fi # Output: template_name confidence echo "$selected_template $max_confidence" } # Determine scenario based on confidence level get_scenario() { local confidence=$1 if [ "$confidence" -ge 70 ]; then echo "high" elif [ "$confidence" -ge 60 ]; then echo "borderline" else echo "weak" fi } # Generate instructions based on scenario generate_instructions() { # Run classification local result=$(classify_task "$USER_TASK") local template_name=$(echo "$result" | cut -d' ' -f1) local confidence=$(echo "$result" | cut -d' ' -f2) # Sanitize for output local sanitized_task=$(sanitize_input "$USER_TASK") # Determine scenario local scenario=$(get_scenario "$confidence") # Generate comparison note if suggested template differs local comparison_note="" if [ -n "$SUGGESTED_TEMPLATE" ] && [ "$template_name" != "$SUGGESTED_TEMPLATE" ]; then comparison_note=" Note: Classification selected '$template_name' instead of suggested '$SUGGESTED_TEMPLATE'" elif [ -n "$SUGGESTED_TEMPLATE" ]; then comparison_note=" Note: Classification agrees with suggested template '$SUGGESTED_TEMPLATE'" fi case "$scenario" in high) # High confidence (70%+): Accept classification directly cat < $template_name $confidence Keyword-based classification has high confidence ($confidence%) based on pattern matching. \`\`\` EOF ;; borderline) # Borderline confidence (60-69%): Validate the classification cat < template-name final-confidence-percentage 1-2 sentence explanation \`\`\` EOF ;; weak) # Weak/no confidence (<60%): Full evaluation cat < template-name final-confidence-percentage 1-2 sentence explanation \`\`\` EOF ;; esac } # Main function main() { # Check for command-line argument if [ $# -eq 0 ]; then echo "Error: No input provided. Usage: $0 ''" >&2 exit 1 fi # Read and parse XML input from first argument read_xml_input "$1" # Generate and output instructions generate_instructions } # Run main function main "$@"