Initial commit

2025-11-29 18:20:21 +08:00
commit bbbaf7acad
63 changed files with 38552 additions and 0 deletions
--- a/commands/debug/.scripts/analyze-logs.sh
+++ b/commands/debug/.scripts/analyze-logs.sh
@@ -0,0 +1,230 @@
+#!/bin/bash
+# Purpose: Analyze log files for patterns, errors, and anomalies
+# Version: 1.0.0
+# Usage: ./analyze-logs.sh --file <log-file> [options]
+# Returns: 0=success, 1=error, 2=invalid params
+# Dependencies: awk, grep, sed, jq (optional for JSON logs)
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+LOG_FILE=""
+PATTERN=""
+LEVEL=""
+CONTEXT_LINES=5
+START_TIME=""
+END_TIME=""
+OUTPUT_FORMAT="text"
+SINCE=""
+
+# Help message
+show_help() {
+    cat << EOF
+Log Analysis Utility
+
+Usage: $0 --file <log-file> [options]
+
+Options:
+    --file FILE          Log file to analyze (required)
+    --pattern REGEX      Filter by regex pattern
+    --level LEVEL        Filter by log level (ERROR|WARN|INFO|DEBUG)
+    --context N          Show N lines before and after matches (default: 5)
+    --start TIME         Start time (format: "YYYY-MM-DD HH:MM:SS")
+    --end TIME           End time (format: "YYYY-MM-DD HH:MM:SS")
+    --since DURATION     Time ago (e.g., "1 hour ago", "30 minutes ago")
+    --format FORMAT      Output format: text|json (default: text)
+    -h, --help           Show this help message
+
+Examples:
+    # Find all errors in last hour
+    $0 --file app.log --level ERROR --since "1 hour ago"
+
+    # Find timeout errors with context
+    $0 --file app.log --pattern "timeout" --context 10
+
+    # Analyze specific timeframe
+    $0 --file app.log --start "2024-10-14 14:00:00" --end "2024-10-14 15:00:00"
+
+EOF
+    exit 0
+}
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --file)
+            LOG_FILE="$2"
+            shift 2
+            ;;
+        --pattern)
+            PATTERN="$2"
+            shift 2
+            ;;
+        --level)
+            LEVEL="$2"
+            shift 2
+            ;;
+        --context)
+            CONTEXT_LINES="$2"
+            shift 2
+            ;;
+        --start)
+            START_TIME="$2"
+            shift 2
+            ;;
+        --end)
+            END_TIME="$2"
+            shift 2
+            ;;
+        --since)
+            SINCE="$2"
+            shift 2
+            ;;
+        --format)
+            OUTPUT_FORMAT="$2"
+            shift 2
+            ;;
+        -h|--help)
+            show_help
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}" >&2
+            exit 2
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [ -z "$LOG_FILE" ]; then
+    echo -e "${RED}Error: --file is required${NC}" >&2
+    echo "Use --help for usage information"
+    exit 2
+fi
+
+if [ ! -f "$LOG_FILE" ]; then
+    echo -e "${RED}Error: Log file not found: $LOG_FILE${NC}" >&2
+    exit 1
+fi
+
+# Functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Convert "since" to start time
+if [ -n "$SINCE" ]; then
+    if command -v date &> /dev/null; then
+        START_TIME=$(date -d "$SINCE" '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date -v -1H '+%Y-%m-%d %H:%M:%S')
+    fi
+fi
+
+log_info "Analyzing log file: $LOG_FILE"
+
+# Build grep command
+GREP_CMD="cat '$LOG_FILE'"
+
+# Time filtering
+if [ -n "$START_TIME" ]; then
+    log_info "Filtering from: $START_TIME"
+    GREP_CMD="$GREP_CMD | awk '\$0 >= \"$START_TIME\"'"
+fi
+
+if [ -n "$END_TIME" ]; then
+    log_info "Filtering to: $END_TIME"
+    GREP_CMD="$GREP_CMD | awk '\$0 <= \"$END_TIME\"'"
+fi
+
+# Level filtering
+if [ -n "$LEVEL" ]; then
+    log_info "Filtering by level: $LEVEL"
+    GREP_CMD="$GREP_CMD | grep -i '$LEVEL'"
+fi
+
+# Pattern filtering
+if [ -n "$PATTERN" ]; then
+    log_info "Filtering by pattern: $PATTERN"
+    GREP_CMD="$GREP_CMD | grep -E '$PATTERN' -A $CONTEXT_LINES -B $CONTEXT_LINES"
+fi
+
+# Execute filtering
+FILTERED_OUTPUT=$(eval "$GREP_CMD")
+
+if [ -z "$FILTERED_OUTPUT" ]; then
+    log_warn "No matching log entries found"
+    exit 0
+fi
+
+# Count results
+MATCH_COUNT=$(echo "$FILTERED_OUTPUT" | wc -l)
+log_info "Found $MATCH_COUNT matching lines"
+
+# Analysis
+echo ""
+echo "═══════════════════════════════════════════════════════════"
+echo "                    LOG ANALYSIS RESULTS"
+echo "═══════════════════════════════════════════════════════════"
+echo ""
+
+# Error statistics
+echo "Error Statistics:"
+echo "─────────────────────────────────────────────────────────"
+ERROR_COUNT=$(echo "$FILTERED_OUTPUT" | grep -i "ERROR" | wc -l || echo "0")
+WARN_COUNT=$(echo "$FILTERED_OUTPUT" | grep -i "WARN" | wc -l || echo "0")
+INFO_COUNT=$(echo "$FILTERED_OUTPUT" | grep -i "INFO" | wc -l || echo "0")
+
+echo "  ERROR: $ERROR_COUNT"
+echo "  WARN:  $WARN_COUNT"
+echo "  INFO:  $INFO_COUNT"
+echo ""
+
+# Top errors
+echo "Top Error Messages (Top 10):"
+echo "─────────────────────────────────────────────────────────"
+echo "$FILTERED_OUTPUT" | grep -i "ERROR" | awk -F'ERROR' '{print $2}' | sort | uniq -c | sort -rn | head -10 || echo "  No errors found"
+echo ""
+
+# Time distribution (if timestamps present)
+echo "Time Distribution:"
+echo "─────────────────────────────────────────────────────────"
+echo "$FILTERED_OUTPUT" | awk '{print substr($0, 1, 13)}' | sort | uniq -c | tail -20 || echo "  No timestamp pattern detected"
+echo ""
+
+# Output filtered results
+if [ "$OUTPUT_FORMAT" = "json" ]; then
+    log_info "Generating JSON output..."
+    # Simple JSON array of log lines
+    echo "{"
+    echo "  \"file\": \"$LOG_FILE\","
+    echo "  \"matches\": $MATCH_COUNT,"
+    echo "  \"entries\": ["
+    echo "$FILTERED_OUTPUT" | awk '{printf "    \"%s\",\n", $0}' | sed '$ s/,$//'
+    echo "  ]"
+    echo "}"
+else
+    echo "Matching Log Entries:"
+    echo "─────────────────────────────────────────────────────────"
+    echo "$FILTERED_OUTPUT"
+fi
+
+echo ""
+log_success "Analysis complete"
+exit 0
--- a/commands/debug/.scripts/memory-check.sh
+++ b/commands/debug/.scripts/memory-check.sh
@@ -0,0 +1,418 @@
+#!/bin/bash
+# Purpose: Monitor memory usage and detect leaks
+# Version: 1.0.0
+# Usage: ./memory-check.sh --app <app-name> [options]
+# Returns: 0=success, 1=error, 2=invalid params
+# Dependencies: ps, awk, bc
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+APP_NAME=""
+DURATION=300
+INTERVAL=10
+THRESHOLD=1024
+OUTPUT_DIR="./memory-check-output"
+ALERT_ON_GROWTH=true
+
+# Help message
+show_help() {
+    cat << EOF
+Memory Monitoring Utility
+
+Usage: $0 --app <app-name> [options]
+
+Options:
+    --app NAME           Application/process name to monitor (required)
+    --duration N         Monitoring duration in seconds (default: 300)
+    --interval N         Sampling interval in seconds (default: 10)
+    --threshold MB       Alert if memory exceeds threshold in MB (default: 1024)
+    --output DIR         Output directory (default: ./memory-check-output)
+    --no-alert           Disable growth alerts
+    -h, --help           Show this help message
+
+Examples:
+    # Monitor Node.js app for 5 minutes
+    $0 --app node --duration 300
+
+    # Monitor with custom threshold
+    $0 --app node --duration 600 --threshold 2048
+
+    # Quick check (1 minute)
+    $0 --app node --duration 60 --interval 5
+
+EOF
+    exit 0
+}
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --app)
+            APP_NAME="$2"
+            shift 2
+            ;;
+        --duration)
+            DURATION="$2"
+            shift 2
+            ;;
+        --interval)
+            INTERVAL="$2"
+            shift 2
+            ;;
+        --threshold)
+            THRESHOLD="$2"
+            shift 2
+            ;;
+        --output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        --no-alert)
+            ALERT_ON_GROWTH=false
+            shift
+            ;;
+        -h|--help)
+            show_help
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}" >&2
+            exit 2
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [ -z "$APP_NAME" ]; then
+    echo -e "${RED}Error: --app is required${NC}" >&2
+    echo "Use --help for usage information"
+    exit 2
+fi
+
+# Functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+alert() {
+    echo -e "${RED}[ALERT]${NC} $1"
+}
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+log_info "Starting memory monitoring for: $APP_NAME"
+log_info "Duration: ${DURATION}s, Interval: ${INTERVAL}s, Threshold: ${THRESHOLD}MB"
+log_info "Output directory: $OUTPUT_DIR"
+
+# Find process ID
+PIDS=$(pgrep -f "$APP_NAME" || echo "")
+if [ -z "$PIDS" ]; then
+    log_error "No process found matching: $APP_NAME"
+    exit 1
+fi
+
+PID=$(echo "$PIDS" | head -1)
+log_info "Found process: PID $PID"
+
+# Output files
+MEMORY_LOG="$OUTPUT_DIR/memory-log-$TIMESTAMP.txt"
+CHART_FILE="$OUTPUT_DIR/memory-chart-$TIMESTAMP.txt"
+REPORT_FILE="$OUTPUT_DIR/memory-report-$TIMESTAMP.txt"
+
+# Write header
+echo "Timestamp,RSS_KB,VSZ_KB,Percent_MEM" > "$MEMORY_LOG"
+
+log_info "Monitoring memory usage..."
+
+# Track min/max
+MIN_RSS=0
+MAX_RSS=0
+READINGS=()
+
+# Collect memory samples
+SAMPLES=$((DURATION / INTERVAL))
+for i in $(seq 1 $SAMPLES); do
+    # Get memory stats
+    MEM_STATS=$(ps -p "$PID" -o rss=,vsz=,%mem= 2>/dev/null || echo "")
+
+    if [ -z "$MEM_STATS" ]; then
+        log_error "Process $PID not found. It may have terminated."
+        break
+    fi
+
+    # Parse values
+    RSS=$(echo "$MEM_STATS" | awk '{print $1}')
+    VSZ=$(echo "$MEM_STATS" | awk '{print $2}')
+    PMEM=$(echo "$MEM_STATS" | awk '{print $3}')
+    TIMESTAMP_NOW=$(date '+%Y-%m-%d %H:%M:%S')
+
+    # Update min/max
+    if [ "$MIN_RSS" -eq 0 ] || [ "$RSS" -lt "$MIN_RSS" ]; then
+        MIN_RSS=$RSS
+    fi
+    if [ "$RSS" -gt "$MAX_RSS" ]; then
+        MAX_RSS=$RSS
+    fi
+
+    # Store reading
+    READINGS+=($RSS)
+
+    # Log to file
+    echo "$TIMESTAMP_NOW,$RSS,$VSZ,$PMEM" >> "$MEMORY_LOG"
+
+    # Convert to MB for display
+    RSS_MB=$(echo "scale=2; $RSS/1024" | bc)
+    VSZ_MB=$(echo "scale=2; $VSZ/1024" | bc)
+
+    # Progress display
+    echo -ne "\r  Sample $i/$SAMPLES: RSS=${RSS_MB}MB, VSZ=${VSZ_MB}MB, %MEM=${PMEM}%  "
+
+    # Check threshold
+    if (( $(echo "$RSS_MB > $THRESHOLD" | bc -l) )); then
+        echo ""  # New line before alert
+        alert "Memory threshold exceeded: ${RSS_MB}MB > ${THRESHOLD}MB"
+    fi
+
+    sleep "$INTERVAL"
+done
+
+echo ""  # New line after progress
+
+log_success "Memory monitoring complete"
+
+# Calculate statistics
+MIN_MB=$(echo "scale=2; $MIN_RSS/1024" | bc)
+MAX_MB=$(echo "scale=2; $MAX_RSS/1024" | bc)
+GROWTH_MB=$(echo "scale=2; ($MAX_RSS-$MIN_RSS)/1024" | bc)
+
+# Calculate average
+TOTAL_RSS=0
+for rss in "${READINGS[@]}"; do
+    TOTAL_RSS=$((TOTAL_RSS + rss))
+done
+AVG_RSS=$((TOTAL_RSS / ${#READINGS[@]}))
+AVG_MB=$(echo "scale=2; $AVG_RSS/1024" | bc)
+
+# Detect leak (memory consistently growing)
+LEAK_DETECTED=false
+if (( $(echo "$GROWTH_MB > 50" | bc -l) )); then
+    # Check if growth is consistent (not just spike)
+    FIRST_HALF_AVG=0
+    SECOND_HALF_AVG=0
+    MID_POINT=$((${#READINGS[@]} / 2))
+
+    for i in $(seq 0 $((MID_POINT - 1))); do
+        FIRST_HALF_AVG=$((FIRST_HALF_AVG + READINGS[$i]))
+    done
+    FIRST_HALF_AVG=$((FIRST_HALF_AVG / MID_POINT))
+
+    for i in $(seq $MID_POINT $((${#READINGS[@]} - 1))); do
+        SECOND_HALF_AVG=$((SECOND_HALF_AVG + READINGS[$i]))
+    done
+    SECOND_HALF_AVG=$((SECOND_HALF_AVG / (${#READINGS[@]} - MID_POINT)))
+
+    CONSISTENT_GROWTH=$((SECOND_HALF_AVG - FIRST_HALF_AVG))
+    CONSISTENT_GROWTH_MB=$(echo "scale=2; $CONSISTENT_GROWTH/1024" | bc)
+
+    if (( $(echo "$CONSISTENT_GROWTH_MB > 25" | bc -l) )); then
+        LEAK_DETECTED=true
+    fi
+fi
+
+# Generate ASCII chart
+log_info "Generating memory chart..."
+
+cat > "$CHART_FILE" << EOF
+Memory Usage Over Time
+═══════════════════════════════════════════════════════════
+
+RSS (Resident Set Size) in MB
+
+EOF
+
+# Simple ASCII chart (40 rows, scale based on max)
+CHART_HEIGHT=20
+SCALE_FACTOR=$(echo "scale=2; $MAX_RSS / $CHART_HEIGHT" | bc)
+
+for row in $(seq $CHART_HEIGHT -1 0); do
+    THRESHOLD_LINE=$(echo "scale=0; $row * $SCALE_FACTOR / 1024" | bc)
+    printf "%4d MB |" "$THRESHOLD_LINE"
+
+    for reading in "${READINGS[@]}"; do
+        READING_ROW=$(echo "scale=0; $reading / $SCALE_FACTOR" | bc)
+
+        if [ "$READING_ROW" -ge "$row" ]; then
+            printf "█"
+        else
+            printf " "
+        fi
+    done
+
+    echo ""
+done
+
+printf "       +"
+for i in $(seq 1 ${#READINGS[@]}); do printf "─"; done
+echo ""
+
+printf "        "
+for i in $(seq 1 ${#READINGS[@]}); do
+    if [ $((i % 10)) -eq 0 ]; then
+        printf "|"
+    else
+        printf " "
+    fi
+done
+echo ""
+
+cat >> "$CHART_FILE" << EOF
+
+Legend: Each column = ${INTERVAL}s interval
+Total duration: ${DURATION}s
+EOF
+
+cat "$CHART_FILE"
+
+# Generate report
+log_info "Generating memory report..."
+
+cat > "$REPORT_FILE" << EOF
+═══════════════════════════════════════════════════════════
+              MEMORY MONITORING REPORT
+═══════════════════════════════════════════════════════════
+
+Application: $APP_NAME
+PID: $PID
+Duration: ${DURATION}s (${SAMPLES} samples)
+Interval: ${INTERVAL}s
+Timestamp: $TIMESTAMP
+
+Memory Statistics:
+─────────────────────────────────────────────────────────
+  Minimum RSS:     ${MIN_MB} MB
+  Maximum RSS:     ${MAX_MB} MB
+  Average RSS:     ${AVG_MB} MB
+  Memory Growth:   ${GROWTH_MB} MB
+  Threshold:       ${THRESHOLD} MB
+
+EOF
+
+# Leak analysis
+if [ "$LEAK_DETECTED" = true ]; then
+    cat >> "$REPORT_FILE" << EOF
+⚠ MEMORY LEAK DETECTED
+─────────────────────────────────────────────────────────
+  Memory grew consistently by ${CONSISTENT_GROWTH_MB} MB
+  First half average: $(echo "scale=2; $FIRST_HALF_AVG/1024" | bc) MB
+  Second half average: $(echo "scale=2; $SECOND_HALF_AVG/1024" | bc) MB
+
+  Recommendations:
+    1. Take heap snapshots for detailed analysis
+    2. Check for:
+       - Event listeners not removed
+       - Timers not cleared (setInterval, setTimeout)
+       - Unbounded caches or arrays
+       - Circular references
+       - Closures holding large objects
+    3. Use memory profiling tools:
+       - Node.js: node --inspect, heap snapshots
+       - Python: memory_profiler, tracemalloc
+    4. Consider using /debug memory operation for deeper analysis
+
+EOF
+
+    if [ "$ALERT_ON_GROWTH" = true ]; then
+        alert "MEMORY LEAK DETECTED! Growth: ${CONSISTENT_GROWTH_MB} MB"
+    fi
+else
+    cat >> "$REPORT_FILE" << EOF
+✓ NO MEMORY LEAK DETECTED
+─────────────────────────────────────────────────────────
+  Memory usage is stable
+  Growth of ${GROWTH_MB} MB is within acceptable range
+
+EOF
+    log_success "No memory leak detected"
+fi
+
+# Threshold warnings
+if (( $(echo "$MAX_MB > $THRESHOLD" | bc -l) )); then
+    cat >> "$REPORT_FILE" << EOF
+⚠ THRESHOLD EXCEEDED
+─────────────────────────────────────────────────────────
+  Peak memory (${MAX_MB} MB) exceeded threshold (${THRESHOLD} MB)
+
+  Recommendations:
+    1. Increase memory allocation if necessary
+    2. Optimize memory usage:
+       - Use streaming for large data
+       - Implement pagination
+       - Use efficient data structures
+       - Clear unused objects
+    3. Set appropriate container/VM memory limits
+
+EOF
+fi
+
+# Output files
+cat >> "$REPORT_FILE" << EOF
+Output Files:
+─────────────────────────────────────────────────────────
+  Memory Log:   $MEMORY_LOG
+  Memory Chart: $CHART_FILE
+  This Report:  $REPORT_FILE
+
+Next Steps:
+─────────────────────────────────────────────────────────
+EOF
+
+if [ "$LEAK_DETECTED" = true ]; then
+    cat >> "$REPORT_FILE" << EOF
+  1. Use /debug memory for heap profiling
+  2. Take heap snapshots before and after operations
+  3. Review code for common leak patterns
+  4. Monitor production with these findings
+EOF
+else
+    cat >> "$REPORT_FILE" << EOF
+  1. Continue monitoring in production
+  2. Set up alerts for memory threshold
+  3. Schedule periodic memory checks
+EOF
+fi
+
+echo "" >> "$REPORT_FILE"
+echo "═══════════════════════════════════════════════════════════" >> "$REPORT_FILE"
+
+log_success "Report saved to: $REPORT_FILE"
+
+# Display report
+cat "$REPORT_FILE"
+
+# Exit with appropriate code
+if [ "$LEAK_DETECTED" = true ]; then
+    exit 1
+else
+    exit 0
+fi
--- a/commands/debug/.scripts/profile.sh
+++ b/commands/debug/.scripts/profile.sh
@@ -0,0 +1,297 @@
+#!/bin/bash
+# Purpose: Profile application performance (CPU, memory, I/O)
+# Version: 1.0.0
+# Usage: ./profile.sh --app <app-name> [options]
+# Returns: 0=success, 1=error, 2=invalid params
+# Dependencies: ps, top, pidstat (optional)
+
+set -euo pipefail
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Default values
+APP_NAME=""
+DURATION=60
+INTERVAL=1
+OUTPUT_DIR="./profile-output"
+PROFILE_TYPE="all"
+ENDPOINT=""
+
+# Help message
+show_help() {
+    cat << EOF
+Application Profiling Utility
+
+Usage: $0 --app <app-name> [options]
+
+Options:
+    --app NAME           Application/process name to profile (required)
+    --duration N         Profile duration in seconds (default: 60)
+    --interval N         Sampling interval in seconds (default: 1)
+    --type TYPE          Profile type: cpu|memory|io|all (default: all)
+    --endpoint URL       Optional: HTTP endpoint to load test during profiling
+    --output DIR         Output directory (default: ./profile-output)
+    -h, --help           Show this help message
+
+Examples:
+    # Profile Node.js app for 2 minutes
+    $0 --app node --duration 120
+
+    # Profile with load test
+    $0 --app node --duration 60 --endpoint http://localhost:3000/api/test
+
+    # Profile only CPU
+    $0 --app node --duration 30 --type cpu
+
+EOF
+    exit 0
+}
+
+# Parse arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --app)
+            APP_NAME="$2"
+            shift 2
+            ;;
+        --duration)
+            DURATION="$2"
+            shift 2
+            ;;
+        --interval)
+            INTERVAL="$2"
+            shift 2
+            ;;
+        --type)
+            PROFILE_TYPE="$2"
+            shift 2
+            ;;
+        --endpoint)
+            ENDPOINT="$2"
+            shift 2
+            ;;
+        --output)
+            OUTPUT_DIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            show_help
+            ;;
+        *)
+            echo -e "${RED}Error: Unknown option $1${NC}" >&2
+            exit 2
+            ;;
+    esac
+done
+
+# Validate required parameters
+if [ -z "$APP_NAME" ]; then
+    echo -e "${RED}Error: --app is required${NC}" >&2
+    echo "Use --help for usage information"
+    exit 2
+fi
+
+# Functions
+log_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+log_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+log_warn() {
+    echo -e "${YELLOW}[WARN]${NC} $1"
+}
+
+log_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+
+log_info "Starting profiling for: $APP_NAME"
+log_info "Duration: ${DURATION}s, Interval: ${INTERVAL}s"
+log_info "Output directory: $OUTPUT_DIR"
+
+# Find process ID
+PIDS=$(pgrep -f "$APP_NAME" || echo "")
+if [ -z "$PIDS" ]; then
+    log_error "No process found matching: $APP_NAME"
+    exit 1
+fi
+
+PID=$(echo "$PIDS" | head -1)
+log_info "Found process: PID $PID"
+
+# Start load test if endpoint provided
+LOAD_TEST_PID=""
+if [ -n "$ENDPOINT" ]; then
+    log_info "Starting load test on: $ENDPOINT"
+
+    if command -v ab &> /dev/null; then
+        # Use Apache Bench
+        ab -n 100000 -c 10 "$ENDPOINT" > "$OUTPUT_DIR/load-test-$TIMESTAMP.log" 2>&1 &
+        LOAD_TEST_PID=$!
+        log_info "Load test started (PID: $LOAD_TEST_PID)"
+    else
+        log_warn "Apache Bench (ab) not found, skipping load test"
+    fi
+fi
+
+# CPU Profiling
+if [ "$PROFILE_TYPE" = "cpu" ] || [ "$PROFILE_TYPE" = "all" ]; then
+    log_info "Profiling CPU usage..."
+
+    CPU_OUTPUT="$OUTPUT_DIR/cpu-profile-$TIMESTAMP.txt"
+
+    # Collect CPU samples
+    for i in $(seq 1 $DURATION); do
+        ps -p "$PID" -o %cpu,rss,vsz,cmd >> "$CPU_OUTPUT" 2>/dev/null || true
+        sleep "$INTERVAL"
+    done
+
+    log_success "CPU profile saved to: $CPU_OUTPUT"
+
+    # Calculate statistics
+    AVG_CPU=$(awk 'NR>1 {sum+=$1; count++} END {if (count>0) print sum/count; else print 0}' "$CPU_OUTPUT")
+    MAX_CPU=$(awk 'NR>1 {if ($1>max) max=$1} END {print max+0}' "$CPU_OUTPUT")
+
+    echo "CPU Statistics:" > "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt"
+    echo "  Average CPU: $AVG_CPU%" >> "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt"
+    echo "  Peak CPU: $MAX_CPU%" >> "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt"
+fi
+
+# Memory Profiling
+if [ "$PROFILE_TYPE" = "memory" ] || [ "$PROFILE_TYPE" = "all" ]; then
+    log_info "Profiling memory usage..."
+
+    MEM_OUTPUT="$OUTPUT_DIR/memory-profile-$TIMESTAMP.txt"
+
+    # Collect memory samples
+    for i in $(seq 1 $DURATION); do
+        ps -p "$PID" -o rss,vsz,%mem,cmd >> "$MEM_OUTPUT" 2>/dev/null || true
+        sleep "$INTERVAL"
+    done
+
+    log_success "Memory profile saved to: $MEM_OUTPUT"
+
+    # Calculate statistics
+    AVG_RSS=$(awk 'NR>1 {sum+=$1; count++} END {if (count>0) print sum/count; else print 0}' "$MEM_OUTPUT")
+    MAX_RSS=$(awk 'NR>1 {if ($1>max) max=$1} END {print max+0}' "$MEM_OUTPUT")
+    MIN_RSS=$(awk 'NR>1 {if (min=="") min=$1; if ($1<min) min=$1} END {print min+0}' "$MEM_OUTPUT")
+
+    echo "Memory Statistics:" > "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt"
+    echo "  Average RSS: $(echo "scale=2; $AVG_RSS/1024" | bc) MB" >> "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt"
+    echo "  Peak RSS: $(echo "scale=2; $MAX_RSS/1024" | bc) MB" >> "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt"
+    echo "  Min RSS: $(echo "scale=2; $MIN_RSS/1024" | bc) MB" >> "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt"
+    echo "  Memory Growth: $(echo "scale=2; ($MAX_RSS-$MIN_RSS)/1024" | bc) MB" >> "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt"
+fi
+
+# I/O Profiling
+if [ "$PROFILE_TYPE" = "io" ] || [ "$PROFILE_TYPE" = "all" ]; then
+    log_info "Profiling I/O usage..."
+
+    IO_OUTPUT="$OUTPUT_DIR/io-profile-$TIMESTAMP.txt"
+
+    # Check if process has I/O stats available
+    if [ -f "/proc/$PID/io" ]; then
+        # Collect I/O samples
+        for i in $(seq 1 $DURATION); do
+            echo "=== Sample $i ===" >> "$IO_OUTPUT"
+            cat "/proc/$PID/io" >> "$IO_OUTPUT" 2>/dev/null || true
+            sleep "$INTERVAL"
+        done
+
+        log_success "I/O profile saved to: $IO_OUTPUT"
+    else
+        log_warn "I/O profiling not available for this process"
+    fi
+fi
+
+# Stop load test if running
+if [ -n "$LOAD_TEST_PID" ]; then
+    log_info "Stopping load test..."
+    kill "$LOAD_TEST_PID" 2>/dev/null || true
+    wait "$LOAD_TEST_PID" 2>/dev/null || true
+fi
+
+# Generate summary report
+REPORT_FILE="$OUTPUT_DIR/profile-report-$TIMESTAMP.txt"
+
+cat > "$REPORT_FILE" << EOF
+═══════════════════════════════════════════════════════════
+              PERFORMANCE PROFILE REPORT
+═══════════════════════════════════════════════════════════
+
+Application: $APP_NAME
+PID: $PID
+Duration: ${DURATION}s
+Interval: ${INTERVAL}s
+Timestamp: $TIMESTAMP
+
+EOF
+
+# Add CPU summary if available
+if [ -f "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt" ]; then
+    cat "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt" >> "$REPORT_FILE"
+    echo "" >> "$REPORT_FILE"
+fi
+
+# Add memory summary if available
+if [ -f "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt" ]; then
+    cat "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt" >> "$REPORT_FILE"
+    echo "" >> "$REPORT_FILE"
+fi
+
+# Add recommendations
+cat >> "$REPORT_FILE" << EOF
+Recommendations:
+─────────────────────────────────────────────────────────
+
+EOF
+
+if [ -f "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt" ]; then
+    MAX_CPU=$(awk '/Peak CPU:/ {print $3}' "$OUTPUT_DIR/cpu-summary-$TIMESTAMP.txt" | sed 's/%//')
+    if [ -n "$MAX_CPU" ] && (( $(echo "$MAX_CPU > 80" | bc -l) )); then
+        echo "  ⚠ High CPU usage detected (${MAX_CPU}%)" >> "$REPORT_FILE"
+        echo "    - Consider optimizing CPU-intensive operations" >> "$REPORT_FILE"
+        echo "    - Profile with flame graphs for detailed analysis" >> "$REPORT_FILE"
+        echo "" >> "$REPORT_FILE"
+    fi
+fi
+
+if [ -f "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt" ]; then
+    GROWTH=$(awk '/Memory Growth:/ {print $3}' "$OUTPUT_DIR/memory-summary-$TIMESTAMP.txt")
+    if [ -n "$GROWTH" ] && (( $(echo "$GROWTH > 100" | bc -l) )); then
+        echo "  ⚠ Significant memory growth detected (${GROWTH} MB)" >> "$REPORT_FILE"
+        echo "    - Possible memory leak" >> "$REPORT_FILE"
+        echo "    - Use heap profiling to identify leak sources" >> "$REPORT_FILE"
+        echo "" >> "$REPORT_FILE"
+    fi
+fi
+
+cat >> "$REPORT_FILE" << EOF
+Output Files:
+─────────────────────────────────────────────────────────
+EOF
+
+ls -lh "$OUTPUT_DIR"/*-$TIMESTAMP.* >> "$REPORT_FILE"
+
+echo "" >> "$REPORT_FILE"
+echo "═══════════════════════════════════════════════════════════" >> "$REPORT_FILE"
+
+log_success "Profile complete!"
+log_info "Report saved to: $REPORT_FILE"
+
+# Display summary
+cat "$REPORT_FILE"
+
+exit 0
--- a/commands/debug/README.md
+++ b/commands/debug/README.md
@@ -0,0 +1,596 @@
+# Debug Skill - Comprehensive Debugging Toolkit
+
+A professional-grade debugging skill for diagnosing, reproducing, fixing, analyzing, and optimizing complex issues across the entire application stack.
+
+## Overview
+
+The debug skill provides systematic debugging operations that work seamlessly with the **10x-fullstack-engineer** agent to deliver cross-stack debugging expertise, production-grade strategies, and prevention-focused solutions.
+
+## Available Operations
+
+### 1. **diagnose** - Comprehensive Diagnosis and Root Cause Analysis
+
+Performs systematic diagnosis across all layers of the application stack to identify root causes of complex issues.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug diagnose issue:"Users getting 500 errors on file upload" environment:"production" logs:"logs/app.log"
+```
+
+**Parameters:**
+- `issue:"description"` (required) - Problem description
+- `environment:"prod|staging|dev"` (optional) - Target environment
+- `logs:"path"` (optional) - Log file location
+- `reproduction:"steps"` (optional) - Steps to reproduce
+- `impact:"severity"` (optional) - Issue severity
+
+**What it does:**
+- Collects diagnostic data from frontend, backend, database, and infrastructure
+- Analyzes symptoms and patterns across all stack layers
+- Forms and tests hypotheses systematically
+- Identifies root cause with supporting evidence
+- Provides actionable recommendations
+
+**Output:**
+- Executive summary of issue and root cause
+- Detailed diagnostic data from each layer
+- Hypothesis analysis with evidence
+- Root cause explanation
+- Recommended immediate actions and permanent fix
+- Prevention measures (monitoring, testing, documentation)
+
+---
+
+### 2. **reproduce** - Create Reliable Reproduction Strategies
+
+Develops reliable strategies to reproduce issues consistently, creating test cases and reproduction documentation.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug reproduce issue:"Payment webhook fails intermittently" environment:"staging" data:"sample-webhook-payload.json"
+```
+
+**Parameters:**
+- `issue:"description"` (required) - Issue to reproduce
+- `environment:"prod|staging|dev"` (optional) - Environment context
+- `data:"path"` (optional) - Test data location
+- `steps:"description"` (optional) - Known reproduction steps
+- `reliability:"percentage"` (optional) - Current reproduction rate
+
+**What it does:**
+- Gathers environment, data, and user context
+- Creates local reproduction strategy
+- Develops automated test cases (unit, integration, E2E)
+- Tests scenario variations and edge cases
+- Verifies reproduction reliability
+- Documents comprehensive reproduction guide
+
+**Output:**
+- Reproduction reliability metrics
+- Prerequisites and setup instructions
+- Detailed reproduction steps (manual and automated)
+- Automated test case code
+- Scenario variations tested
+- Troubleshooting guide for reproduction issues
+
+---
+
+### 3. **fix** - Implement Targeted Fixes with Verification
+
+Implements targeted fixes with comprehensive verification, safeguards, and prevention measures.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug fix issue:"Race condition in order processing" root_cause:"Missing transaction lock" verification:"run-integration-tests"
+```
+
+**Parameters:**
+- `issue:"description"` (required) - Issue being fixed
+- `root_cause:"cause"` (required) - Identified root cause
+- `verification:"strategy"` (optional) - Verification approach
+- `scope:"areas"` (optional) - Affected code areas
+- `rollback:"plan"` (optional) - Rollback strategy
+
+**What it does:**
+- Designs appropriate fix pattern for the issue type
+- Implements fix with safety measures
+- Adds safeguards (validation, rate limiting, circuit breakers)
+- Performs multi-level verification (unit, integration, load, production)
+- Adds prevention measures (tests, monitoring, alerts)
+- Documents fix and deployment plan
+
+**Fix patterns supported:**
+- Missing error handling
+- Race conditions
+- Memory leaks
+- Missing validation
+- N+1 query problems
+- Configuration issues
+- Infrastructure limits
+
+**Output:**
+- Detailed fix implementation with before/after code
+- Safeguards added (validation, error handling, monitoring)
+- Verification results at all levels
+- Prevention measures (tests, alerts, documentation)
+- Deployment plan with rollback strategy
+- Files modified and commits made
+
+---
+
+### 4. **analyze-logs** - Deep Log Analysis with Pattern Detection
+
+Performs deep log analysis with pattern detection, timeline correlation, and anomaly identification.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug analyze-logs path:"logs/application.log" pattern:"ERROR.*timeout" timeframe:"last-24h"
+```
+
+**Parameters:**
+- `path:"log-file-path"` (required) - Log file to analyze
+- `pattern:"regex"` (optional) - Filter pattern
+- `timeframe:"range"` (optional) - Time range to analyze
+- `level:"error|warn|info"` (optional) - Log level filter
+- `context:"lines"` (optional) - Context lines around matches
+
+**What it does:**
+- Discovers and filters relevant logs across all sources
+- Detects error patterns and clusters similar errors
+- Performs timeline analysis and event correlation
+- Traces individual requests across services
+- Identifies statistical anomalies and spikes
+- Analyzes performance, user impact, and security issues
+
+**Utility script:**
+```bash
+./commands/debug/.scripts/analyze-logs.sh \
+  --file logs/application.log \
+  --level ERROR \
+  --since "1 hour ago" \
+  --context 5
+```
+
+**Output:**
+- Summary of findings with key statistics
+- Top errors with frequency and patterns
+- Timeline of critical events
+- Request tracing through distributed system
+- Anomaly detection (spikes, new errors)
+- Performance analysis from logs
+- User impact assessment
+- Root cause analysis based on log patterns
+- Recommendations for fixes and monitoring
+
+---
+
+### 5. **performance** - Performance Debugging and Optimization
+
+Debugs performance issues through profiling, bottleneck identification, and targeted optimization.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug performance component:"api-endpoint:/orders" metric:"response-time" threshold:"200ms"
+```
+
+**Parameters:**
+- `component:"name"` (required) - Component to profile
+- `metric:"type"` (optional) - Metric to measure (response-time, throughput, cpu, memory)
+- `threshold:"value"` (optional) - Target performance threshold
+- `duration:"period"` (optional) - Profiling duration
+- `load:"users"` (optional) - Concurrent users for load testing
+
+**What it does:**
+- Establishes performance baseline
+- Profiles application, database, and network
+- Identifies bottlenecks (CPU, I/O, memory, network)
+- Implements targeted optimizations (queries, caching, algorithms, async)
+- Performs load testing to verify improvements
+- Sets up performance monitoring
+
+**Profiling utility script:**
+```bash
+./commands/debug/.scripts/profile.sh \
+  --app node_app \
+  --duration 60 \
+  --endpoint http://localhost:3000/api/slow
+```
+
+**Optimization strategies:**
+- Query optimization (indexes, query rewriting)
+- Caching (application-level, Redis)
+- Code optimization (algorithms, lazy loading, pagination)
+- Async optimization (parallel execution, batching)
+
+**Output:**
+- Performance baseline and after-optimization metrics
+- Bottlenecks identified with evidence
+- Optimizations implemented with code changes
+- Load testing results
+- Performance improvement percentages
+- Monitoring setup (metrics, dashboards, alerts)
+- Recommendations for additional optimizations
+
+---
+
+### 6. **memory** - Memory Leak Detection and Optimization
+
+Detects memory leaks, analyzes memory usage patterns, and optimizes memory consumption.
+
+**Usage:**
+```bash
+/10x-fullstack-engineer:debug memory component:"background-worker" symptom:"growing-heap" duration:"6h"
+```
+
+**Parameters:**
+- `component:"name"` (required) - Component to analyze
+- `symptom:"type"` (optional) - Memory symptom (growing-heap, high-usage, oom)
+- `duration:"period"` (optional) - Observation period
+- `threshold:"max-mb"` (optional) - Memory threshold in MB
+- `profile:"type"` (optional) - Profile type (heap, allocation)
+
+**What it does:**
+- Identifies memory symptoms (leaks, high usage, OOM)
+- Captures memory profiles (heap snapshots, allocation tracking)
+- Analyzes common leak patterns
+- Implements memory optimizations
+- Performs leak verification under load
+- Tunes garbage collection
+
+**Memory check utility script:**
+```bash
+./commands/debug/.scripts/memory-check.sh \
+  --app node_app \
+  --duration 300 \
+  --interval 10 \
+  --threshold 1024
+```
+
+**Common leak patterns detected:**
+- Event listeners not removed
+- Timers not cleared
+- Closures holding references
+- Unbounded caches
+- Global variable accumulation
+- Detached DOM nodes
+- Infinite promise chains
+
+**Optimization techniques:**
+- Stream large data instead of loading into memory
+- Use efficient data structures (Map vs Array)
+- Paginate database queries
+- Implement LRU caches with size limits
+- Use weak references where appropriate
+- Object pooling for frequently created objects
+
+**Output:**
+- Memory symptoms and baseline metrics
+- Heap snapshot analysis
+- Memory leaks identified with evidence
+- Fixes implemented with before/after code
+- Memory after fixes with improvement percentages
+- Memory stability test results
+- Garbage collection metrics
+- Monitoring setup and alerts
+- Recommendations for memory limits and future monitoring
+
+---
+
+## Utility Scripts
+
+The debug skill includes three utility scripts in `.scripts/` directory:
+
+### analyze-logs.sh
+**Purpose:** Analyze log files for patterns, errors, and anomalies
+
+**Features:**
+- Pattern matching with regex
+- Log level filtering
+- Time-based filtering
+- Context lines around matches
+- Error statistics and top errors
+- Time distribution analysis
+- JSON output support
+
+### profile.sh
+**Purpose:** Profile application performance (CPU, memory, I/O)
+
+**Features:**
+- CPU profiling with statistics
+- Memory profiling with growth detection
+- I/O profiling
+- Concurrent load testing
+- Automated recommendations
+- Comprehensive reports
+
+### memory-check.sh
+**Purpose:** Monitor memory usage and detect leaks
+
+**Features:**
+- Real-time memory monitoring
+- Memory growth detection
+- Leak detection with trend analysis
+- ASCII memory usage charts
+- Threshold alerts
+- Detailed memory reports
+
+---
+
+## Common Debugging Workflows
+
+### Workflow 1: Production Error Investigation
+
+```bash
+# Step 1: Diagnose the issue
+/10x-fullstack-engineer:debug diagnose issue:"500 errors on checkout" environment:"production" logs:"logs/app.log"
+
+# Step 2: Analyze logs for patterns
+/10x-fullstack-engineer:debug analyze-logs path:"logs/app.log" pattern:"checkout.*ERROR" timeframe:"last-1h"
+
+# Step 3: Reproduce locally
+/10x-fullstack-engineer:debug reproduce issue:"Checkout fails with 500" environment:"staging" data:"test-checkout.json"
+
+# Step 4: Implement fix
+/10x-fullstack-engineer:debug fix issue:"Database timeout on checkout" root_cause:"Missing connection pool configuration"
+```
+
+### Workflow 2: Performance Degradation
+
+```bash
+# Step 1: Profile performance
+/10x-fullstack-engineer:debug performance component:"api-endpoint:/checkout" metric:"response-time" threshold:"500ms"
+
+# Step 2: Analyze slow queries
+/10x-fullstack-engineer:debug analyze-logs path:"logs/postgresql.log" pattern:"duration:.*[0-9]{4,}"
+
+# Step 3: Implement optimization
+/10x-fullstack-engineer:debug fix issue:"Slow checkout API" root_cause:"N+1 query on order items"
+```
+
+### Workflow 3: Memory Leak Investigation
+
+```bash
+# Step 1: Diagnose memory symptoms
+/10x-fullstack-engineer:debug diagnose issue:"Memory grows over time" environment:"production"
+
+# Step 2: Profile memory usage
+/10x-fullstack-engineer:debug memory component:"background-processor" symptom:"growing-heap" duration:"1h"
+
+# Step 3: Implement fix
+/10x-fullstack-engineer:debug fix issue:"Memory leak in event handlers" root_cause:"Event listeners not removed"
+```
+
+### Workflow 4: Intermittent Failure
+
+```bash
+# Step 1: Reproduce reliably
+/10x-fullstack-engineer:debug reproduce issue:"Random payment failures" environment:"staging"
+
+# Step 2: Diagnose with reproduction
+/10x-fullstack-engineer:debug diagnose issue:"Payment webhook fails intermittently" reproduction:"steps-from-reproduce"
+
+# Step 3: Analyze timing
+/10x-fullstack-engineer:debug analyze-logs path:"logs/webhooks.log" pattern:"payment.*fail" context:10
+
+# Step 4: Fix race condition
+/10x-fullstack-engineer:debug fix issue:"Race condition in webhook handler" root_cause:"Concurrent webhook processing"
+```
+
+---
+
+## Integration with 10x-fullstack-engineer Agent
+
+All debugging operations are designed to work with the **10x-fullstack-engineer** agent, which provides:
+
+- **Cross-stack debugging expertise** - Systematic analysis across frontend, backend, database, and infrastructure
+- **Systematic root cause analysis** - Hypothesis formation, testing, and evidence-based conclusions
+- **Production-grade debugging strategies** - Safe, reliable approaches suitable for production environments
+- **Performance and security awareness** - Considers performance impact and security implications
+- **Prevention-focused mindset** - Not just fixing issues, but preventing future occurrences
+
+The agent brings deep expertise in:
+- Full-stack architecture patterns
+- Performance optimization techniques
+- Memory management and leak detection
+- Database query optimization
+- Distributed systems debugging
+- Production safety and deployment strategies
+
+---
+
+## Debugging Best Practices
+
+### 1. Start with Diagnosis
+Always begin with `/debug diagnose` to understand the full scope of the issue before attempting fixes.
+
+### 2. Reproduce Reliably
+Use `/debug reproduce` to create reproducible test cases. A bug that can't be reliably reproduced is hard to fix and verify.
+
+### 3. Analyze Logs Systematically
+Use `/debug analyze-logs` to find patterns and correlations. Look for:
+- Error frequency and distribution
+- Timeline correlation with deployments
+- Anomalies and spikes
+- Request tracing across services
+
+### 4. Profile Before Optimizing
+Use `/debug performance` and `/debug memory` to identify actual bottlenecks. Don't optimize based on assumptions.
+
+### 5. Fix with Verification
+Use `/debug fix` which includes:
+- Proper error handling
+- Comprehensive testing
+- Monitoring and alerts
+- Documentation
+
+### 6. Add Prevention Measures
+Every fix should include:
+- Regression tests
+- Monitoring metrics
+- Alerts on thresholds
+- Documentation updates
+
+---
+
+## Output Documentation
+
+Each operation generates comprehensive reports in markdown format:
+
+- **Executive summaries** for stakeholders
+- **Detailed technical analysis** for engineers
+- **Code snippets** with before/after comparisons
+- **Evidence and metrics** supporting conclusions
+- **Actionable recommendations** with priorities
+- **Next steps** with clear instructions
+
+Reports include:
+- Issue description and symptoms
+- Analysis methodology and findings
+- Root cause explanation with evidence
+- Fixes implemented with code
+- Verification results
+- Prevention measures added
+- Files modified and commits
+- Monitoring and alerting setup
+
+---
+
+## Error Handling
+
+All operations include robust error handling:
+
+- **Insufficient information** - Lists what's needed and how to gather it
+- **Cannot reproduce** - Suggests alternative debugging approaches
+- **Fix verification fails** - Provides re-diagnosis steps
+- **Optimization degrades performance** - Includes rollback procedures
+- **Environment differences** - Helps bridge local vs production gaps
+
+---
+
+## Common Debugging Scenarios
+
+### Database Performance Issues
+1. Use `/debug performance` to establish baseline
+2. Use `/debug analyze-logs` on database slow query logs
+3. Identify missing indexes or inefficient queries
+4. Use `/debug fix` to implement optimization
+5. Verify with load testing
+
+### Memory Leaks
+1. Use `/debug diagnose` to identify symptoms
+2. Use `/debug memory` to capture heap profiles
+3. Identify leak patterns (event listeners, timers, caches)
+4. Use `/debug fix` to implement cleanup
+5. Verify with sustained load testing
+
+### Intermittent Errors
+1. Use `/debug analyze-logs` to find error patterns
+2. Use `/debug reproduce` to create reliable reproduction
+3. Use `/debug diagnose` with reproduction steps
+4. Identify timing or concurrency issues
+5. Use `/debug fix` to implement proper synchronization
+
+### Production Incidents
+1. Use `/debug diagnose` for rapid root cause analysis
+2. Use `/debug analyze-logs` for recent time period
+3. Implement immediate mitigation (rollback, circuit breaker)
+4. Use `/debug reproduce` to prevent recurrence
+5. Use `/debug fix` for permanent solution
+
+### Performance Degradation
+1. Use `/debug performance` to compare against baseline
+2. Identify bottlenecks (CPU, I/O, memory, network)
+3. Use `/debug analyze-logs` for slow operations
+4. Implement targeted optimizations
+5. Verify improvements with load testing
+
+---
+
+## Tips and Tricks
+
+### Effective Log Analysis
+- Use pattern matching to find related errors
+- Look for request IDs to trace across services
+- Check timestamps for correlation with deployments
+- Compare error rates before and after changes
+- Use context lines to understand error conditions
+
+### Performance Profiling
+- Profile production-like workloads
+- Use realistic data sizes
+- Test under sustained load, not just peak
+- Profile both CPU and memory together
+- Use flame graphs for visual analysis
+
+### Memory Debugging
+- Force GC between measurements for accuracy
+- Take multiple heap snapshots over time
+- Look for objects that never get collected
+- Check for consistent growth, not just spikes
+- Verify fixes with extended monitoring
+
+### Reproduction Strategies
+- Minimize reproduction to essential steps
+- Control timing with explicit delays
+- Use specific test data that triggers issue
+- Document environment differences
+- Aim for >80% reproduction reliability
+
+---
+
+## File Locations
+
+```
+plugins/10x-fullstack-engineer/commands/debug/
+├── skill.md                 # Router/orchestrator
+├── diagnose.md             # Diagnosis operation
+├── reproduce.md            # Reproduction operation
+├── fix.md                  # Fix implementation operation
+├── analyze-logs.md         # Log analysis operation
+├── performance.md          # Performance debugging operation
+├── memory.md               # Memory debugging operation
+├── .scripts/
+│   ├── analyze-logs.sh     # Log analysis utility
+│   ├── profile.sh          # Performance profiling utility
+│   └── memory-check.sh     # Memory monitoring utility
+└── README.md               # This file
+```
+
+---
+
+## Requirements
+
+- **Node.js operations**: Node.js runtime with `--inspect` or `--prof` flags for profiling
+- **Log analysis**: Standard Unix tools (awk, grep, sed), optional jq for JSON logs
+- **Performance profiling**: Apache Bench (ab), k6, or Artillery for load testing
+- **Memory profiling**: Chrome DevTools, clinic.js, or memwatch for Node.js
+- **Database profiling**: Access to database query logs and EXPLAIN ANALYZE capability
+
+---
+
+## Support and Troubleshooting
+
+If operations fail:
+1. Check that required parameters are provided
+2. Verify file paths and permissions
+3. Ensure utility scripts are executable (`chmod +x .scripts/*.sh`)
+4. Check that prerequisite tools are installed
+5. Review error messages for specific issues
+
+For complex debugging scenarios:
+- Start with `/debug diagnose` for systematic analysis
+- Use multiple operations in sequence for comprehensive investigation
+- Leverage the 10x-fullstack-engineer agent's expertise
+- Document findings and share with team
+
+---
+
+## Version
+
+Debug Skill v1.0.0
+
+---
+
+## License
+
+Part of the 10x-fullstack-engineer plugin for Claude Code.
--- a/commands/debug/analyze-logs.md
+++ b/commands/debug/analyze-logs.md
@@ -0,0 +1,842 @@
+# Analyze Logs Operation - Deep Log Analysis
+
+You are executing the **analyze-logs** operation to perform deep log analysis with pattern detection, timeline correlation, and anomaly identification.
+
+## Parameters
+
+**Received**: `$ARGUMENTS` (after removing 'analyze-logs' operation name)
+
+Expected format: `path:"log-file-path" [pattern:"regex-pattern"] [timeframe:"time-range"] [level:"error|warn|info"] [context:"lines-before-after"]`
+
+## Workflow
+
+### 1. Discover and Locate Logs
+
+Identify all relevant log sources:
+
+**Application Logs**:
+```bash
+# Common log locations
+ls -lh /var/log/application/
+ls -lh logs/
+ls -lh ~/.pm2/logs/
+
+# Find log files
+find /var/log -name "*.log" -type f
+find . -name "*.log" -mtime -1  # Modified in last 24 hours
+
+# Check log rotation
+ls -lh /var/log/application/*.log*
+zcat /var/log/application/app.log.*.gz  # Read rotated logs
+```
+
+**System Logs**:
+```bash
+# Systemd service logs
+journalctl -u application.service --since "1 hour ago"
+journalctl -u application.service --since "2024-10-14 14:00:00"
+
+# Syslog
+tail -f /var/log/syslog
+tail -f /var/log/messages
+
+# Kernel logs
+dmesg -T
+```
+
+**Container Logs**:
+```bash
+# Docker
+docker logs container-name --since 1h
+docker logs container-name --timestamps
+docker logs --tail 1000 container-name > container-logs.txt
+
+# Kubernetes
+kubectl logs pod-name -c container-name
+kubectl logs pod-name --previous  # Previous container
+kubectl logs -l app=myapp --all-containers=true
+```
+
+**Web Server Logs**:
+```bash
+# Nginx
+tail -f /var/log/nginx/access.log
+tail -f /var/log/nginx/error.log
+
+# Apache
+tail -f /var/log/apache2/access.log
+tail -f /var/log/apache2/error.log
+```
+
+**Database Logs**:
+```bash
+# PostgreSQL
+tail -f /var/log/postgresql/postgresql-*.log
+
+# MySQL
+tail -f /var/log/mysql/error.log
+tail -f /var/log/mysql/slow-query.log
+
+# MongoDB
+tail -f /var/log/mongodb/mongod.log
+```
+
+### 2. Filter and Extract Relevant Logs
+
+Use the `.scripts/analyze-logs.sh` utility to extract relevant log entries:
+
+**Basic Extraction**:
+```bash
+# Extract errors from last hour
+./commands/debug/.scripts/analyze-logs.sh \
+  --file logs/application.log \
+  --level ERROR \
+  --since "1 hour ago"
+
+# Extract with pattern matching
+./commands/debug/.scripts/analyze-logs.sh \
+  --file logs/application.log \
+  --pattern "timeout|connection.*refused" \
+  --context 5
+
+# Extract specific timeframe
+./commands/debug/.scripts/analyze-logs.sh \
+  --file logs/application.log \
+  --start "2024-10-14 14:00:00" \
+  --end "2024-10-14 15:00:00"
+```
+
+**Manual Filtering**:
+```bash
+# Find errors with context
+grep -i "error" logs/application.log -A 5 -B 5
+
+# Find specific error patterns
+grep -E "(timeout|refused|failed)" logs/application.log
+
+# Find errors in timeframe
+awk '/2024-10-14 14:/ && /ERROR/ {print}' logs/application.log
+
+# Count errors by type
+grep "ERROR" logs/application.log | awk '{print $5}' | sort | uniq -c | sort -rn
+
+# Extract JSON logs with jq
+cat logs/application.log | jq 'select(.level == "error")'
+cat logs/application.log | jq 'select(.message | contains("timeout"))'
+```
+
+### 3. Pattern Detection
+
+Identify patterns in log data:
+
+#### Error Patterns
+
+**Frequency Analysis**:
+```bash
+# Error frequency over time
+grep "ERROR" logs/application.log | \
+  awk '{print $1, $2}' | \
+  cut -d: -f1 | \
+  uniq -c
+
+# Most common errors
+grep "ERROR" logs/application.log | \
+  awk -F'ERROR' '{print $2}' | \
+  sort | uniq -c | sort -rn | head -20
+
+# Error rate calculation
+total_lines=$(wc -l < logs/application.log)
+error_lines=$(grep -c "ERROR" logs/application.log)
+echo "Error rate: $(echo "scale=4; $error_lines / $total_lines * 100" | bc)%"
+```
+
+**Error Clustering**:
+```python
+# Group similar errors
+import re
+from collections import Counter
+
+def normalize_error(error_msg):
+    # Remove numbers, IDs, timestamps
+    error_msg = re.sub(r'\d+', 'N', error_msg)
+    error_msg = re.sub(r'[a-f0-9-]{36}', 'UUID', error_msg)
+    error_msg = re.sub(r'\d{4}-\d{2}-\d{2}', 'DATE', error_msg)
+    return error_msg
+
+errors = []
+with open('logs/application.log') as f:
+    for line in f:
+        if 'ERROR' in line:
+            normalized = normalize_error(line)
+            errors.append(normalized)
+
+# Count error types
+error_counts = Counter(errors)
+for error, count in error_counts.most_common(10):
+    print(f"{count}: {error}")
+```
+
+#### Request Patterns
+
+**Request Analysis**:
+```bash
+# Requests per minute
+awk '{print $1}' /var/log/nginx/access.log | \
+  cut -d: -f1-2 | \
+  uniq -c
+
+# Most requested endpoints
+awk '{print $7}' /var/log/nginx/access.log | \
+  sort | uniq -c | sort -rn | head -20
+
+# Response code distribution
+awk '{print $9}' /var/log/nginx/access.log | \
+  sort | uniq -c | sort -rn
+
+# Slow requests (>1 second)
+awk '$10 > 1.0 {print $0}' /var/log/nginx/access.log
+
+# Top user agents
+awk -F'"' '{print $6}' /var/log/nginx/access.log | \
+  sort | uniq -c | sort -rn | head -10
+```
+
+#### Performance Patterns
+
+**Response Time Analysis**:
+```bash
+# Average response time
+awk '{sum+=$10; count++} END {print "Average:", sum/count}' \
+  /var/log/nginx/access.log
+
+# Response time percentiles
+awk '{print $10}' /var/log/nginx/access.log | \
+  sort -n | \
+  awk '{
+    times[NR] = $1
+  }
+  END {
+    print "P50:", times[int(NR*0.5)]
+    print "P95:", times[int(NR*0.95)]
+    print "P99:", times[int(NR*0.99)]
+  }'
+
+# Response time over time
+awk '{print $4, $10}' /var/log/nginx/access.log | \
+  awk -F'[:]' '{print $1":"$2, $NF}' | \
+  awk '{sum[$1]+=$2; count[$1]++} END {
+    for (time in sum) print time, sum[time]/count[time]
+  }' | sort
+```
+
+### 4. Timeline Analysis
+
+Create timeline of events:
+
+**Timeline Construction**:
+```bash
+# Merge multiple log sources by timestamp
+sort -m -k1,2 \
+  logs/application.log \
+  logs/database.log \
+  logs/nginx.log \
+  > merged-timeline.log
+
+# Extract timeline around specific event
+event_time="2024-10-14 14:30:15"
+grep "$event_time" logs/application.log -B 100 -A 100
+
+# Timeline with multiple sources
+for log in logs/*.log; do
+  echo "=== $(basename $log) ==="
+  grep "$event_time" "$log" -B 10 -A 10
+  echo ""
+done
+```
+
+**Event Correlation**:
+```python
+# Correlate events across log sources
+import re
+from datetime import datetime, timedelta
+
+def parse_log_line(line):
+    # Extract timestamp and message
+    match = re.match(r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', line)
+    if match:
+        timestamp = datetime.strptime(match.group(1), '%Y-%m-%d %H:%M:%S')
+        return timestamp, line
+    return None, None
+
+# Load events from multiple logs
+events = []
+for log_file in ['app.log', 'db.log', 'nginx.log']:
+    with open(f'logs/{log_file}') as f:
+        for line in f:
+            timestamp, message = parse_log_line(line)
+            if timestamp:
+                events.append((timestamp, log_file, message))
+
+# Sort by timestamp
+events.sort(key=lambda x: x[0])
+
+# Find events within time window
+def find_related_events(target_time, window_seconds=10):
+    window = timedelta(seconds=window_seconds)
+    start_time = target_time - window
+    end_time = target_time + window
+
+    related = [
+        event for event in events
+        if start_time <= event[0] <= end_time
+    ]
+
+    return related
+
+# Analyze error event
+error_time = datetime(2024, 10, 14, 14, 30, 15)
+related = find_related_events(error_time)
+
+for timestamp, source, message in related:
+    print(f"[{source}] {timestamp}: {message.strip()}")
+```
+
+### 5. Request Tracing
+
+Trace individual requests across services:
+
+**Request ID Tracing**:
+```bash
+# Extract request ID from error
+error_line=$(grep "ERROR" logs/application.log | head -1)
+request_id=$(echo "$error_line" | grep -oP 'request_id=\K[a-f0-9-]+')
+
+echo "Tracing request: $request_id"
+
+# Find all log entries for this request
+grep "$request_id" logs/application.log
+
+# Across multiple services
+for log in logs/*.log; do
+  echo "=== $(basename $log) ==="
+  grep "$request_id" "$log"
+done
+
+# With timestamps for timeline
+grep "$request_id" logs/*.log | sort -k1,2
+```
+
+**Distributed Tracing Correlation**:
+```bash
+# Extract trace ID from logs
+trace_id=$(grep "ERROR" logs/application.log | \
+  head -1 | \
+  grep -oP 'trace_id=\K[a-f0-9]+')
+
+# Query distributed tracing system
+# Jaeger
+curl "http://jaeger:16686/api/traces/$trace_id"
+
+# Zipkin
+curl "http://zipkin:9411/api/v2/trace/$trace_id"
+```
+
+### 6. Anomaly Detection
+
+Identify unusual patterns:
+
+**Statistical Anomalies**:
+```python
+import statistics
+from collections import defaultdict
+
+# Analyze error rates per hour
+hourly_errors = defaultdict(int)
+
+with open('logs/application.log') as f:
+    for line in f:
+        if 'ERROR' in line:
+            # Extract hour
+            hour = line[:13]  # YYYY-MM-DD HH
+            hourly_errors[hour] += 1
+
+# Calculate statistics
+error_counts = list(hourly_errors.values())
+mean = statistics.mean(error_counts)
+stdev = statistics.stdev(error_counts)
+
+# Find anomalies (>2 standard deviations)
+print("Anomalous hours (>2 std dev from mean):")
+for hour, count in sorted(hourly_errors.items()):
+    z_score = (count - mean) / stdev
+    if abs(z_score) > 2:
+        print(f"{hour}: {count} errors (z-score: {z_score:.2f})")
+```
+
+**New Error Types**:
+```bash
+# Compare today's errors with baseline
+grep "ERROR" logs/application.log.1 | \
+  awk -F'ERROR' '{print $2}' | \
+  sort -u > baseline_errors.txt
+
+grep "ERROR" logs/application.log | \
+  awk -F'ERROR' '{print $2}' | \
+  sort -u > current_errors.txt
+
+# Find new error types
+comm -13 baseline_errors.txt current_errors.txt > new_errors.txt
+
+echo "New error types detected:"
+cat new_errors.txt
+```
+
+**Spike Detection**:
+```python
+# Detect sudden spikes in error rate
+from collections import deque
+
+def detect_spikes(values, window_size=10, threshold=3):
+    """Detect values that are >threshold times the rolling average"""
+    window = deque(maxlen=window_size)
+    spikes = []
+
+    for i, value in enumerate(values):
+        if len(window) == window_size:
+            avg = sum(window) / len(window)
+            if value > avg * threshold:
+                spikes.append((i, value, avg))
+
+        window.append(value)
+
+    return spikes
+
+# Analyze minute-by-minute error counts
+minute_errors = {}  # {minute: error_count}
+
+with open('logs/application.log') as f:
+    for line in f:
+        if 'ERROR' in line:
+            minute = line[:16]  # YYYY-MM-DD HH:MM
+            minute_errors[minute] = minute_errors.get(minute, 0) + 1
+
+# Detect spikes
+error_counts = [minute_errors.get(m, 0) for m in sorted(minute_errors.keys())]
+spikes = detect_spikes(error_counts, window_size=10, threshold=3)
+
+print("Error spikes detected:")
+for idx, value, avg in spikes:
+    print(f"Minute {idx}: {value} errors (avg was {avg:.1f})")
+```
+
+### 7. Performance Analysis
+
+Analyze performance from logs:
+
+**Slow Query Analysis**:
+```bash
+# PostgreSQL slow query log
+cat /var/log/postgresql/postgresql.log | \
+  grep "duration:" | \
+  awk '{print $13, $0}' | \
+  sort -rn | \
+  head -20
+
+# Extract slow queries
+awk '/duration:/ && $13 > 1000 {print $0}' \
+  /var/log/postgresql/postgresql.log
+```
+
+**Endpoint Performance**:
+```bash
+# Average response time per endpoint
+awk '{endpoint[$7] += $10; count[$7]++}
+END {
+  for (e in endpoint) {
+    printf "%s: %.2fms\n", e, endpoint[e]/count[e]
+  }
+}' /var/log/nginx/access.log | sort -t: -k2 -rn
+
+# Slowest endpoints
+awk '{print $10, $7}' /var/log/nginx/access.log | \
+  sort -rn | \
+  head -20
+```
+
+### 8. User Impact Analysis
+
+Assess user-facing impact:
+
+**Affected Users**:
+```bash
+# Extract unique users experiencing errors
+grep "ERROR" logs/application.log | \
+  grep -oP 'user_id=\K[a-zA-Z0-9]+' | \
+  sort -u | \
+  wc -l
+
+# Error rate by user
+grep "ERROR" logs/application.log | \
+  grep -oP 'user_id=\K[a-zA-Z0-9]+' | \
+  sort | uniq -c | sort -rn | head -20
+
+# Users with most errors
+grep "user_id=" logs/application.log | \
+  awk '{
+    total[$0]++
+    if (/ERROR/) errors[$0]++
+  }
+  END {
+    for (user in total) {
+      print user, errors[user]/total[user]*100"%"
+    }
+  }' | sort -t% -k2 -rn
+```
+
+**Failed Requests**:
+```bash
+# 5xx errors
+grep " 5[0-9][0-9] " /var/log/nginx/access.log
+
+# Failed endpoints
+awk '$9 >= 500 {print $7}' /var/log/nginx/access.log | \
+  sort | uniq -c | sort -rn
+
+# Failed request details
+awk '$9 >= 500 {print $4, $7, $9, $10}' \
+  /var/log/nginx/access.log
+```
+
+### 9. Resource Usage from Logs
+
+Extract resource usage patterns:
+
+**Memory Usage**:
+```bash
+# Extract memory logs
+grep -i "memory\|heap\|oom" logs/application.log
+
+# Parse memory usage
+grep "heap_used" logs/application.log | \
+  awk '{print $1, $2, $NF}' | \
+  sed 's/MB$//'
+```
+
+**Connection Pool**:
+```bash
+# Database connection logs
+grep "connection" logs/application.log | \
+  grep -oP 'pool_size=\K\d+|active=\K\d+|idle=\K\d+'
+
+# Connection exhaustion
+grep "connection.*timeout\|pool.*exhausted" logs/application.log -A 5
+```
+
+### 10. Security Analysis
+
+Look for security-related issues:
+
+**Authentication Failures**:
+```bash
+# Failed login attempts
+grep -i "authentication.*failed\|login.*failed" logs/application.log
+
+# By IP address
+grep "authentication.*failed" logs/application.log | \
+  grep -oP 'ip=\K[\d.]+' | \
+  sort | uniq -c | sort -rn
+
+# Brute force detection
+grep "authentication.*failed" logs/application.log | \
+  grep -oP 'ip=\K[\d.]+' | \
+  uniq -c | \
+  awk '$1 > 10 {print $2, $1 " attempts"}'
+```
+
+**Suspicious Patterns**:
+```bash
+# SQL injection attempts
+grep -iE "union.*select|drop.*table|; --" /var/log/nginx/access.log
+
+# Path traversal attempts
+grep -E "\.\./|\.\.%2F" /var/log/nginx/access.log
+
+# XSS attempts
+grep -iE "<script|javascript:|onerror=" /var/log/nginx/access.log
+
+# Command injection attempts
+grep -E ";\s*(cat|ls|wget|curl)" /var/log/nginx/access.log
+```
+
+## Output Format
+
+```markdown
+# Log Analysis Report: [Issue/Time Period]
+
+## Summary
+[High-level summary of findings]
+
+## Analysis Period
+- **Start**: [start timestamp]
+- **End**: [end timestamp]
+- **Duration**: [duration]
+- **Log Sources**: [list of logs analyzed]
+- **Total Lines**: [number of log lines]
+
+## Key Findings
+
+### Error Analysis
+- **Total Errors**: [count]
+- **Error Rate**: [percentage]%
+- **Error Types**: [number of unique error types]
+- **Most Common Error**: [error type] ([count] occurrences)
+
+### Top Errors
+
+1. **[Error Type 1]** - [count] occurrences
+   ```
+   [sample log line]
+   ```
+   - First seen: [timestamp]
+   - Last seen: [timestamp]
+   - Peak: [timestamp with highest frequency]
+
+2. **[Error Type 2]** - [count] occurrences
+   ```
+   [sample log line]
+   ```
+   - [similar details]
+
+### Patterns Detected
+
+#### Pattern 1: [Pattern Name]
+- **Description**: [what pattern is]
+- **Frequency**: [how often it occurs]
+- **Impact**: [user/system impact]
+- **Example**:
+  ```
+  [log excerpt showing pattern]
+  ```
+
+#### Pattern 2: [Pattern Name]
+[similar structure]
+
+## Timeline Analysis
+
+### Critical Events Timeline
+
+\`\`\`
+14:25:30 [APP] Normal operation, avg response time 50ms
+14:28:45 [APP] Response time increasing to 150ms
+14:29:10 [DB] Connection pool usage at 90%
+14:29:30 [APP] First timeout errors appear
+14:29:45 [DB] Connection pool exhausted
+14:30:00 [APP] Error rate spikes to 25%
+14:30:15 [APP] Circuit breaker opens
+14:30:30 [OPS] Auto-scaling triggers
+14:32:00 [APP] New instances online
+14:33:00 [APP] Error rate decreases to 5%
+14:35:00 [APP] Full recovery, normal operation
+\`\`\`
+
+### Event Correlation
+
+**Root Event**: Database connection pool exhaustion at 14:29:45
+
+**Contributing Factors**:
+- High traffic spike (+300% at 14:28:00)
+- Long-running queries (>5s queries detected)
+- Insufficient connection pool size (max: 20)
+
+**Cascading Effects**:
+- API timeouts (starting 14:29:30)
+- Cache misses due to timeouts
+- Increased load from retries
+- Circuit breaker activation
+
+## Request Tracing
+
+### Example Failed Request
+
+**Request ID**: req_abc123def456
+
+**Timeline**:
+\`\`\`
+14:30:15.123 [NGINX] Request received: POST /api/orders
+14:30:15.125 [APP] Request processing started
+14:30:15.130 [APP] Database query started: SELECT orders...
+14:30:20.131 [DB] Query timeout after 5s
+14:30:20.135 [APP] Error: Database timeout
+14:30:20.137 [APP] Response: 500 Internal Server Error
+14:30:20.140 [NGINX] Response sent (5017ms)
+\`\`\`
+
+**User Impact**: Order creation failed for user_123
+
+## Anomalies Detected
+
+### Anomaly 1: Error Rate Spike
+- **Time**: 14:30:00 - 14:35:00
+- **Severity**: High
+- **Details**: Error rate jumped from 0.1% to 25%
+- **Affected Users**: ~500 users
+- **Root Cause**: Database connection pool exhaustion
+
+### Anomaly 2: New Error Type
+- **Error**: "ConnectionPoolExhausted"
+- **First Seen**: 14:29:45
+- **Frequency**: 1,234 occurrences in 5 minutes
+- **Status**: Previously unseen in baseline
+
+## Performance Analysis
+
+### Response Time Statistics
+- **Average**: 150ms (baseline: 50ms)
+- **P50**: 80ms
+- **P95**: 500ms
+- **P99**: 2000ms
+- **Max**: 5000ms
+
+### Slowest Endpoints
+1. `/api/orders` - avg 450ms (1,200 requests)
+2. `/api/users/profile` - avg 380ms (800 requests)
+3. `/api/reports` - avg 320ms (200 requests)
+
+### Database Performance
+- **Slow Queries**: 45 queries >1s
+- **Slowest Query**: 5.2s (SELECT with missing index)
+- **Average Query Time**: 85ms (baseline: 25ms)
+
+## User Impact
+
+### Affected Users
+- **Total Affected**: ~500 users
+- **Error Rate by User Type**:
+  - Premium users: 5% error rate
+  - Free users: 30% error rate
+- **Most Affected User**: user_789 (25 errors)
+
+### Failed Operations
+- **Order Creation**: 234 failures
+- **Payment Processing**: 89 failures
+- **Profile Updates**: 45 failures
+
+## Resource Analysis
+
+### Connection Pool
+- **Max Size**: 20 connections
+- **Peak Usage**: 20/20 (100%)
+- **Average Wait Time**: 2.5s
+- **Recommendation**: Increase to 50 connections
+
+### Memory Usage
+- **Average**: 450MB
+- **Peak**: 890MB
+- **Trend**: Stable (no leak detected)
+
+## Security Findings
+
+### Authentication
+- **Failed Logins**: 12
+- **Suspicious IPs**: 2 IPs with >5 failed attempts
+- **Brute Force Attempts**: None detected
+
+### Attack Patterns
+- **SQL Injection Attempts**: 0
+- **XSS Attempts**: 0
+- **Path Traversal**: 0
+
+## Root Cause Analysis
+
+Based on log analysis:
+
+**Primary Cause**: Database connection pool too small for traffic volume
+
+**Contributing Factors**:
+1. Traffic spike (+300%)
+2. Slow queries consuming connections
+3. No connection timeout configured
+
+**Evidence**:
+- Connection pool exhausted at 14:29:45
+- Immediate correlation with error spike
+- Recovery after auto-scaling added capacity
+
+## Recommendations
+
+### Immediate Actions
+1. Increase database connection pool to 50
+2. Add connection timeout (30s)
+3. Optimize slow queries identified
+
+### Monitoring Improvements
+1. Alert on connection pool usage >80%
+2. Track query duration P95
+3. Monitor error rate per endpoint
+
+### Code Changes
+1. Add query timeouts to all database calls
+2. Implement connection retry logic
+3. Add circuit breaker for database calls
+
+## Next Steps
+
+1. **Fix**: Use `/debug fix` to implement connection pool increase
+2. **Performance**: Use `/debug performance` to optimize slow queries
+3. **Monitoring**: Add alerts for connection pool usage
+
+## Appendices
+
+### A. Full Error Log Excerpt
+\`\`\`
+[Relevant log excerpts]
+\`\`\`
+
+### B. Query Performance Data
+\`\`\`sql
+[Slow query details]
+\`\`\`
+
+### C. Traffic Pattern Graph
+\`\`\`
+[ASCII graph or description of traffic pattern]
+\`\`\`
+```
+
+## Error Handling
+
+**Logs Not Found**:
+If specified log files don't exist:
+1. List available log files
+2. Suggest alternative log locations
+3. Provide commands to locate logs
+
+**Logs Too Large**:
+If logs are too large to analyze:
+1. Focus on most recent data
+2. Use sampling techniques
+3. Analyze specific time windows
+4. Suggest log aggregation tools
+
+**Insufficient Context**:
+If logs lack necessary information:
+1. Document what information is missing
+2. Suggest additional logging
+3. Recommend structured logging format
+4. Propose log enrichment strategies
+
+## Integration with Other Operations
+
+- **Before**: Use `/debug diagnose` to identify time period to analyze
+- **After**: Use `/debug fix` to address issues found in logs
+- **Related**: Use `/debug performance` for performance issues
+- **Related**: Use `/debug reproduce` to recreate issues found in logs
+
+## Agent Utilization
+
+This operation leverages the **10x-fullstack-engineer** agent for:
+- Pattern recognition across large log volumes
+- Correlating events across multiple log sources
+- Statistical analysis and anomaly detection
+- Root cause inference from log patterns
+- Actionable recommendations based on findings
--- a/commands/debug/diagnose.md
+++ b/commands/debug/diagnose.md
@@ -0,0 +1,759 @@
+# Diagnose Operation - Comprehensive Diagnosis and Root Cause Analysis
+
+You are executing the **diagnose** operation to perform comprehensive diagnosis and root cause analysis for complex issues spanning multiple layers of the application stack.
+
+## Parameters
+
+**Received**: `$ARGUMENTS` (after removing 'diagnose' operation name)
+
+Expected format: `issue:"problem description" [environment:"prod|staging|dev"] [logs:"log-location"] [reproduction:"steps"] [impact:"severity"]`
+
+## Workflow
+
+### 1. Issue Understanding
+
+Gather and analyze comprehensive information about the issue:
+
+**Information to Collect**:
+- **Symptom**: What is the observable problem? What exactly is failing?
+- **Impact**: Who is affected? How many users? Business impact?
+- **Frequency**: Consistent, intermittent, or rare? Percentage of occurrences?
+- **Environment**: Production, staging, or development? Specific regions/zones?
+- **Timeline**: When did it start? Any correlation with deployments?
+- **Recent Changes**: Deployments, config changes, infrastructure changes?
+- **Error Messages**: Complete error messages, stack traces, error codes
+
+**Questions to Answer**:
+```markdown
+- What is the user experiencing?
+- What should be happening instead?
+- How widespread is the issue?
+- Is it getting worse over time?
+- Are there any patterns (time of day, user types, specific actions)?
+```
+
+### 2. Data Collection Across All Layers
+
+Systematically collect diagnostic data from each layer of the stack:
+
+#### Frontend Diagnostics
+
+**Browser Console Analysis**:
+```javascript
+// Check for JavaScript errors
+console.error logs
+console.warn logs
+
+// Inspect unhandled promise rejections
+window.addEventListener('unhandledrejection', event => {
+  console.error('Unhandled promise rejection:', event.reason);
+});
+
+// Check for resource loading failures
+performance.getEntriesByType('resource').filter(r => r.transferSize === 0)
+```
+
+**Network Request Analysis**:
+```javascript
+// Analyze failed requests
+// Open DevTools > Network tab
+// Filter: Status code 4xx, 5xx
+// Check: Request headers, payload, response body, timing
+
+// Performance timing
+const perfEntries = performance.getEntriesByType('navigation')[0];
+console.log('DNS lookup:', perfEntries.domainLookupEnd - perfEntries.domainLookupStart);
+console.log('TCP connection:', perfEntries.connectEnd - perfEntries.connectStart);
+console.log('Request time:', perfEntries.responseStart - perfEntries.requestStart);
+console.log('Response time:', perfEntries.responseEnd - perfEntries.responseStart);
+```
+
+**State Inspection**:
+```javascript
+// React DevTools: Component state at error time
+// Redux DevTools: Action history, state snapshots
+// Vue DevTools: Vuex state, component hierarchy
+
+// Add error boundary to capture React errors
+class ErrorBoundary extends React.Component {
+  componentDidCatch(error, errorInfo) {
+    console.error('Component error:', {
+      error: error.toString(),
+      componentStack: errorInfo.componentStack,
+      currentState: this.props.reduxState
+    });
+  }
+}
+```
+
+#### Backend Diagnostics
+
+**Application Logs**:
+```bash
+# Real-time application logs
+tail -f logs/application.log
+
+# Error logs with context
+grep -i "error\|exception\|fatal" logs/*.log -A 10 -B 5
+
+# Filter by request ID to trace single request
+grep "request-id-12345" logs/*.log
+
+# Find patterns in errors
+awk '/ERROR/ {print $0}' logs/application.log | sort | uniq -c | sort -rn
+
+# Time-based analysis
+grep "2024-10-14 14:" logs/application.log | grep ERROR
+```
+
+**System Logs**:
+```bash
+# Service logs (systemd)
+journalctl -u application-service.service -f
+journalctl -u application-service.service --since "1 hour ago"
+
+# Syslog
+tail -f /var/log/syslog | grep application
+
+# Kernel logs (for system-level issues)
+dmesg -T | tail -50
+```
+
+**Application Metrics**:
+```bash
+# Request rate and response times
+# Check APM tools: New Relic, Datadog, Elastic APM
+
+# HTTP response codes over time
+awk '{print $9}' /var/log/nginx/access.log | sort | uniq -c
+
+# Slow requests
+awk '$10 > 1000 {print $0}' /var/log/nginx/access.log
+
+# Error rate calculation
+errors=$(grep -c "ERROR" logs/application.log)
+total=$(wc -l < logs/application.log)
+echo "Error rate: $(echo "scale=4; $errors / $total * 100" | bc)%"
+```
+
+#### Database Diagnostics
+
+**Active Queries and Locks**:
+```sql
+-- PostgreSQL: Active queries
+SELECT
+  pid,
+  now() - query_start AS duration,
+  state,
+  query
+FROM pg_stat_activity
+WHERE state != 'idle'
+ORDER BY duration DESC;
+
+-- Long-running queries
+SELECT
+  pid,
+  now() - query_start AS duration,
+  query
+FROM pg_stat_activity
+WHERE state = 'active'
+  AND now() - query_start > interval '1 minute';
+
+-- Blocking queries
+SELECT
+  blocked_locks.pid AS blocked_pid,
+  blocked_activity.usename AS blocked_user,
+  blocking_locks.pid AS blocking_pid,
+  blocking_activity.usename AS blocking_user,
+  blocked_activity.query AS blocked_statement,
+  blocking_activity.query AS blocking_statement
+FROM pg_catalog.pg_locks blocked_locks
+JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid
+JOIN pg_catalog.pg_locks blocking_locks
+  ON blocking_locks.locktype = blocked_locks.locktype
+  AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database
+  AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation
+  AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page
+  AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple
+  AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid
+  AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid
+  AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid
+  AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid
+  AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid
+  AND blocking_locks.pid != blocked_locks.pid
+JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid
+WHERE NOT blocked_locks.granted;
+
+-- Deadlock information (from logs)
+-- Look for "deadlock detected" in PostgreSQL logs
+```
+
+**Database Performance**:
+```sql
+-- Table statistics
+SELECT
+  schemaname,
+  tablename,
+  n_live_tup AS live_rows,
+  n_dead_tup AS dead_rows,
+  last_vacuum,
+  last_autovacuum
+FROM pg_stat_user_tables
+ORDER BY n_dead_tup DESC;
+
+-- Index usage
+SELECT
+  schemaname,
+  tablename,
+  indexname,
+  idx_scan,
+  idx_tup_read,
+  idx_tup_fetch
+FROM pg_stat_user_indexes
+ORDER BY idx_scan ASC;
+
+-- Connection count
+SELECT
+  count(*) AS connections,
+  state,
+  usename
+FROM pg_stat_activity
+GROUP BY state, usename;
+
+-- Cache hit ratio
+SELECT
+  sum(heap_blks_read) AS heap_read,
+  sum(heap_blks_hit) AS heap_hit,
+  sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) AS cache_hit_ratio
+FROM pg_statio_user_tables;
+```
+
+**Slow Query Log Analysis**:
+```bash
+# PostgreSQL: Enable log_min_duration_statement
+# Check postgresql.conf: log_min_duration_statement = 1000 (1 second)
+
+# Analyze slow queries
+grep "duration:" /var/log/postgresql/postgresql.log | awk '{print $3, $6}' | sort -rn | head -20
+```
+
+#### Infrastructure Diagnostics
+
+**Resource Usage**:
+```bash
+# CPU usage
+top -bn1 | head -20
+mpstat 1 5  # CPU stats every 1 second, 5 times
+
+# Memory usage
+free -h
+vmstat 1 5
+
+# Disk I/O
+iostat -x 1 5
+iotop -o  # Only show processes doing I/O
+
+# Disk space
+df -h
+du -sh /* | sort -rh | head -10
+
+# Network connections
+netstat -an | grep ESTABLISHED | wc -l
+ss -s  # Socket statistics
+
+# Open files
+lsof | wc -l
+lsof -u application-user | wc -l
+```
+
+**Container Diagnostics (Docker/Kubernetes)**:
+```bash
+# Docker container logs
+docker logs container-name --tail 100 -f
+docker stats container-name
+
+# Docker container inspection
+docker inspect container-name
+docker exec container-name ps aux
+docker exec container-name df -h
+
+# Kubernetes pod logs
+kubectl logs pod-name -f
+kubectl logs pod-name --previous  # Previous container logs
+
+# Kubernetes pod resource usage
+kubectl top pods
+kubectl describe pod pod-name
+
+# Kubernetes events
+kubectl get events --sort-by='.lastTimestamp'
+```
+
+**Cloud Provider Metrics**:
+```bash
+# AWS CloudWatch
+aws cloudwatch get-metric-statistics \
+  --namespace AWS/EC2 \
+  --metric-name CPUUtilization \
+  --dimensions Name=InstanceId,Value=i-1234567890abcdef0 \
+  --start-time 2024-10-14T00:00:00Z \
+  --end-time 2024-10-14T23:59:59Z \
+  --period 3600 \
+  --statistics Average
+
+# Check application logs
+aws logs tail /aws/application/logs --follow
+
+# GCP Stackdriver
+gcloud logging read "resource.type=gce_instance AND severity>=ERROR" --limit 50
+
+# Azure Monitor
+az monitor metrics list --resource <resource-id> --metric "Percentage CPU"
+```
+
+### 3. Hypothesis Formation
+
+Based on collected data, form testable hypotheses about the root cause:
+
+**Common Issue Patterns to Consider**:
+
+#### Race Conditions
+**Symptoms**:
+- Intermittent failures
+- Works sometimes, fails other times
+- Timing-dependent behavior
+- "Cannot read property of undefined" on objects that should exist
+
+**What to Check**:
+```javascript
+// Look for async operations without proper waiting
+async function problematic() {
+  let data;
+  fetchData().then(result => data = result);  // ❌ Race condition
+  return processData(data);  // May execute before data is set
+}
+
+// Proper async/await
+async function correct() {
+  const data = await fetchData();  // ✅ Wait for data
+  return processData(data);
+}
+
+// Multiple parallel operations
+Promise.all([op1(), op2(), op3()])  // Check for interdependencies
+```
+
+#### Memory Leaks
+**Symptoms**:
+- Degrading performance over time
+- Increasing memory usage
+- Eventually crashes with OOM errors
+- Slow garbage collection
+
+**What to Check**:
+```javascript
+// Event listeners not removed
+componentDidMount() {
+  window.addEventListener('resize', this.handleResize);
+  // ❌ Missing removeEventListener in componentWillUnmount
+}
+
+// Closures holding references
+function createLeak() {
+  const largeData = new Array(1000000);
+  return () => console.log(largeData[0]);  // Holds entire array
+}
+
+// Timers not cleared
+setInterval(() => fetchData(), 1000);  // ❌ Never cleared
+
+// Cache without eviction
+const cache = {};
+cache[key] = value;  // ❌ Grows indefinitely
+```
+
+#### Database Issues
+**Symptoms**:
+- Slow queries
+- Timeouts
+- Deadlocks
+- Connection pool exhausted
+
+**What to Check**:
+```sql
+-- Missing indexes
+EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
+-- Look for "Seq Scan" on large tables
+
+-- N+1 queries
+-- Check if ORM is making one query per item in a loop
+
+-- Long transactions
+-- Find transactions open for extended periods
+
+-- Lock contention
+-- Check for blocking queries and deadlocks
+```
+
+#### Network Issues
+**Symptoms**:
+- Timeouts
+- Intermittent connectivity
+- DNS resolution failures
+- SSL/TLS handshake errors
+
+**What to Check**:
+```bash
+# DNS resolution
+dig api.example.com
+nslookup api.example.com
+
+# Network latency
+ping api.example.com
+traceroute api.example.com
+
+# TCP connection
+telnet api.example.com 443
+nc -zv api.example.com 443
+
+# SSL/TLS verification
+openssl s_client -connect api.example.com:443 -servername api.example.com
+```
+
+#### Authentication/Authorization
+**Symptoms**:
+- 401 Unauthorized errors
+- 403 Forbidden errors
+- Intermittent authentication failures
+- Session expired errors
+
+**What to Check**:
+```javascript
+// Token expiration
+const token = jwt.decode(authToken);
+console.log('Token expires:', new Date(token.exp * 1000));
+
+// Session state
+console.log('Session:', sessionStorage, localStorage);
+
+// Cookie issues
+console.log('Cookies:', document.cookie);
+
+// CORS issues (browser console)
+// Look for: "CORS policy: No 'Access-Control-Allow-Origin' header"
+```
+
+#### Configuration Issues
+**Symptoms**:
+- Works locally, fails in environment
+- "Environment variable not set" errors
+- Connection refused errors
+- Permission denied errors
+
+**What to Check**:
+```bash
+# Environment variables
+printenv | grep APPLICATION
+env | sort
+
+# Configuration files
+cat config/production.json
+diff config/development.json config/production.json
+
+# File permissions
+ls -la config/
+ls -la /var/application/
+
+# Network configuration
+cat /etc/hosts
+cat /etc/resolv.conf
+```
+
+### 4. Hypothesis Testing
+
+Systematically test each hypothesis:
+
+**Testing Strategy**:
+
+1. **Isolation**: Test each component in isolation
+2. **Instrumentation**: Add detailed logging around suspected areas
+3. **Reproduction**: Create minimal reproduction case
+4. **Elimination**: Rule out hypotheses systematically
+
+**Add Diagnostic Instrumentation**:
+```javascript
+// Detailed logging with context
+console.log('[DIAG] Before operation:', {
+  timestamp: new Date().toISOString(),
+  user: currentUser,
+  state: JSON.stringify(currentState),
+  params: params
+});
+
+try {
+  const result = await operation(params);
+  console.log('[DIAG] Operation success:', {
+    timestamp: new Date().toISOString(),
+    result: result,
+    duration: Date.now() - startTime
+  });
+} catch (error) {
+  console.error('[DIAG] Operation failed:', {
+    timestamp: new Date().toISOString(),
+    error: error.message,
+    stack: error.stack,
+    context: { user, state, params }
+  });
+  throw error;
+}
+
+// Performance timing
+console.time('operation');
+await operation();
+console.timeEnd('operation');
+
+// Memory usage tracking
+if (global.gc) {
+  global.gc();
+  const usage = process.memoryUsage();
+  console.log('[MEMORY]', {
+    heapUsed: Math.round(usage.heapUsed / 1024 / 1024) + 'MB',
+    heapTotal: Math.round(usage.heapTotal / 1024 / 1024) + 'MB',
+    external: Math.round(usage.external / 1024 / 1024) + 'MB'
+  });
+}
+```
+
+**Binary Search Debugging**:
+```javascript
+// Comment out half the code
+// Determine which half has the bug
+// Repeat until isolated
+
+// Example: Large function with error
+function complexOperation() {
+  // Part 1: Data fetching
+  const data = fetchData();
+
+  // Part 2: Data processing
+  const processed = processData(data);
+
+  // Part 3: Data validation
+  const validated = validateData(processed);
+
+  // Part 4: Data saving
+  return saveData(validated);
+}
+
+// Test each part independently
+const data = fetchData();
+console.log('[TEST] Data fetched:', data);  // ✅ Works
+
+const processed = processData(testData);
+console.log('[TEST] Data processed:', processed);  // ❌ Fails here
+// Now investigate processData() specifically
+```
+
+### 5. Root Cause Identification
+
+Once hypotheses are tested and narrowed down:
+
+**Confirm Root Cause**:
+1. Can you consistently reproduce the issue?
+2. Does fixing this cause resolve the symptom?
+3. Are there other instances of the same issue?
+4. Does the fix have any side effects?
+
+**Document Evidence**:
+- Specific code/config that causes the issue
+- Exact conditions required for issue to manifest
+- Why this causes the observed symptom
+- Related code that might have same issue
+
+### 6. Impact Assessment
+
+Evaluate the full impact:
+
+**User Impact**:
+- Number of users affected
+- Severity of impact (blocking, degraded, minor)
+- User actions affected
+- Business metrics impacted
+
+**System Impact**:
+- Performance degradation
+- Resource consumption
+- Downstream service effects
+- Data integrity concerns
+
+**Risk Assessment**:
+- Can it cause data loss?
+- Can it cause security issues?
+- Can it cause cascading failures?
+- Is it getting worse over time?
+
+## Output Format
+
+```markdown
+# Diagnosis Report: [Issue Summary]
+
+## Executive Summary
+[One-paragraph summary of issue, root cause, and recommended action]
+
+## Issue Description
+
+### Symptoms
+- [Observable symptom 1]
+- [Observable symptom 2]
+- [Observable symptom 3]
+
+### Impact
+- **Affected Users**: [number/percentage of users]
+- **Severity**: [critical|high|medium|low]
+- **Frequency**: [always|often|sometimes|rarely - with percentage]
+- **Business Impact**: [revenue loss, user experience, etc.]
+
+### Environment
+- **Environment**: [production|staging|development]
+- **Version**: [application version]
+- **Infrastructure**: [relevant infrastructure details]
+- **Region**: [if applicable]
+
+### Timeline
+- **First Observed**: [date/time]
+- **Recent Changes**: [deployments, config changes]
+- **Pattern**: [time-based, load-based, user-based]
+
+## Diagnostic Data Collected
+
+### Frontend Analysis
+[Console errors, network requests, performance data, state inspection results]
+
+### Backend Analysis
+[Application logs, error traces, system metrics, request patterns]
+
+### Database Analysis
+[Query logs, lock information, performance metrics, connection pool status]
+
+### Infrastructure Analysis
+[Resource usage, container logs, cloud metrics, network diagnostics]
+
+## Hypothesis Analysis
+
+### Hypotheses Considered
+1. **[Hypothesis 1]**: [Description]
+   - **Evidence For**: [supporting evidence]
+   - **Evidence Against**: [contradicting evidence]
+   - **Conclusion**: [Ruled out|Confirmed|Needs more investigation]
+
+2. **[Hypothesis 2]**: [Description]
+   - **Evidence For**: [supporting evidence]
+   - **Evidence Against**: [contradicting evidence]
+   - **Conclusion**: [Ruled out|Confirmed|Needs more investigation]
+
+3. **[Hypothesis 3]**: [Description]
+   - **Evidence For**: [supporting evidence]
+   - **Evidence Against**: [contradicting evidence]
+   - **Conclusion**: [Ruled out|Confirmed|Needs more investigation]
+
+## Root Cause
+
+### Root Cause Identified
+[Detailed explanation of the root cause with specific code/config references]
+
+### Why It Causes the Symptom
+[Technical explanation of how the root cause leads to the observed behavior]
+
+### Why It Wasn't Caught Earlier
+[Explanation of why tests/monitoring didn't catch this]
+
+### Related Issues
+[Any similar issues that might exist or could be fixed with similar approach]
+
+## Evidence
+
+### Code/Configuration
+```[language]
+[Specific code or configuration causing the issue]
+```
+
+### Reproduction
+[Exact steps to reproduce the issue consistently]
+
+### Verification
+[Steps taken to confirm this is the root cause]
+
+## Recommended Actions
+
+### Immediate Actions
+1. [Immediate action 1 - e.g., rollback, circuit breaker]
+2. [Immediate action 2]
+
+### Permanent Fix
+[Description of the permanent fix needed]
+
+### Prevention
+- **Monitoring**: [What monitoring to add]
+- **Testing**: [What tests to add]
+- **Code Review**: [What to look for in code reviews]
+- **Documentation**: [What to document]
+
+## Next Steps
+
+1. **Fix Implementation**: [Use /debug fix operation]
+2. **Verification**: [Testing strategy]
+3. **Deployment**: [Rollout plan]
+4. **Monitoring**: [What to watch]
+
+## Appendices
+
+### A. Detailed Logs
+[Relevant log excerpts with context]
+
+### B. Metrics and Graphs
+[Performance metrics, error rates, resource usage]
+
+### C. Related Tickets
+[Links to related issues or tickets]
+```
+
+## Error Handling
+
+**Insufficient Information**:
+If diagnosis cannot be completed due to missing information:
+1. List specific information needed
+2. Explain why each piece is important
+3. Provide instructions for gathering data
+4. Suggest interim monitoring
+
+**Cannot Reproduce**:
+If issue cannot be reproduced:
+1. Document reproduction attempts
+2. Request more detailed reproduction steps
+3. Suggest environment comparison
+4. Propose production debugging approach
+
+**Multiple Root Causes**:
+If multiple root causes are identified:
+1. Prioritize by impact
+2. Explain interdependencies
+3. Provide fix sequence
+4. Suggest monitoring between fixes
+
+## Integration with Other Operations
+
+After diagnosis is complete:
+- **For fixes**: Use `/debug fix` with identified root cause
+- **For reproduction**: Use `/debug reproduce` to create reliable test case
+- **For log analysis**: Use `/debug analyze-logs` for deeper log investigation
+- **For performance**: Use `/debug performance` if performance-related
+- **For memory**: Use `/debug memory` if memory-related
+
+## Agent Utilization
+
+This operation leverages the **10x-fullstack-engineer** agent for:
+- Systematic cross-layer analysis
+- Pattern recognition across stack
+- Hypothesis formation and testing
+- Production debugging expertise
+- Prevention-focused thinking
--- a/commands/debug/fix.md
+++ b/commands/debug/fix.md
@@ -0,0 +1,967 @@
+# Fix Operation - Targeted Fix Implementation
+
+You are executing the **fix** operation to implement targeted fixes with comprehensive verification and prevention measures.
+
+## Parameters
+
+**Received**: `$ARGUMENTS` (after removing 'fix' operation name)
+
+Expected format: `issue:"problem description" root_cause:"identified-cause" [verification:"test-strategy"] [scope:"affected-areas"] [rollback:"rollback-plan"]`
+
+## Workflow
+
+### 1. Understand the Fix Requirements
+
+Clarify what needs to be fixed and constraints:
+
+**Key Information**:
+- **Root Cause**: Exact cause to address (from diagnosis)
+- **Scope**: What code/config/infrastructure needs changing
+- **Constraints**: Performance, backwards compatibility, security
+- **Verification**: How to verify the fix works
+- **Rollback**: Plan if fix causes problems
+
+**Fix Strategy Questions**:
+```markdown
+- Is this a code fix, configuration fix, or infrastructure fix?
+- Are there multiple ways to fix this? Which is best?
+- What are the side effects of the fix?
+- Can we fix just the symptom or must we fix the root cause?
+- Is there existing code doing this correctly we can learn from?
+- What is the blast radius if the fix goes wrong?
+```
+
+### 2. Design the Fix
+
+Plan the implementation approach:
+
+#### Fix Pattern Selection
+
+**Code Fix Patterns**:
+
+**1. Add Missing Error Handling**
+```javascript
+// Before (causes crashes)
+async function processPayment(orderId) {
+  const order = await db.orders.findById(orderId);
+  return await paymentGateway.charge(order.amount);
+}
+
+// After (handles errors properly)
+async function processPayment(orderId) {
+  try {
+    const order = await db.orders.findById(orderId);
+
+    if (!order) {
+      throw new Error(`Order ${orderId} not found`);
+    }
+
+    if (order.status !== 'pending') {
+      throw new Error(`Order ${orderId} is not in pending status`);
+    }
+
+    const result = await paymentGateway.charge(order.amount);
+
+    if (!result.success) {
+      throw new Error(`Payment failed: ${result.error}`);
+    }
+
+    return result;
+  } catch (error) {
+    logger.error('Payment processing failed', {
+      orderId,
+      error: error.message,
+      stack: error.stack
+    });
+    throw new PaymentError(`Failed to process payment for order ${orderId}`, error);
+  }
+}
+```
+
+**2. Fix Race Condition**
+```javascript
+// Before (race condition)
+let cache = null;
+
+async function getData() {
+  if (!cache) {
+    cache = await fetchFromDatabase();  // Multiple concurrent calls
+  }
+  return cache;
+}
+
+// After (properly synchronized)
+let cache = null;
+let cachePromise = null;
+
+async function getData() {
+  if (!cache) {
+    if (!cachePromise) {
+      cachePromise = fetchFromDatabase();
+    }
+    cache = await cachePromise;
+    cachePromise = null;
+  }
+  return cache;
+}
+
+// Or use a proper caching library
+const { promiseMemoize } = require('promise-memoize');
+const getData = promiseMemoize(async () => {
+  return await fetchFromDatabase();
+}, { maxAge: 60000 });
+```
+
+**3. Fix Memory Leak**
+```javascript
+// Before (memory leak)
+class Component extends React.Component {
+  componentDidMount() {
+    window.addEventListener('resize', this.handleResize);
+    this.interval = setInterval(this.fetchData, 5000);
+  }
+
+  // componentWillUnmount missing - listeners never removed
+}
+
+// After (properly cleaned up)
+class Component extends React.Component {
+  componentDidMount() {
+    window.addEventListener('resize', this.handleResize);
+    this.interval = setInterval(this.fetchData, 5000);
+  }
+
+  componentWillUnmount() {
+    window.removeEventListener('resize', this.handleResize);
+    clearInterval(this.interval);
+  }
+}
+```
+
+**4. Add Missing Validation**
+```javascript
+// Before (no validation)
+app.post('/api/users', async (req, res) => {
+  const user = await db.users.create(req.body);
+  res.json(user);
+});
+
+// After (proper validation)
+const { body, validationResult } = require('express-validator');
+
+app.post('/api/users',
+  // Validation middleware
+  body('email').isEmail().normalizeEmail(),
+  body('password').isLength({ min: 8 }).matches(/[A-Z]/).matches(/[0-9]/),
+  body('age').optional().isInt({ min: 0, max: 150 }),
+
+  async (req, res) => {
+    // Check validation results
+    const errors = validationResult(req);
+    if (!errors.isEmpty()) {
+      return res.status(400).json({ errors: errors.array() });
+    }
+
+    try {
+      const user = await db.users.create({
+        email: req.body.email,
+        password: await hashPassword(req.body.password),
+        age: req.body.age
+      });
+
+      res.json(user);
+    } catch (error) {
+      logger.error('User creation failed', error);
+      res.status(500).json({ error: 'Failed to create user' });
+    }
+  }
+);
+```
+
+**5. Fix N+1 Query Problem**
+```javascript
+// Before (N+1 queries)
+async function getUsersWithOrders() {
+  const users = await db.users.findAll();
+
+  for (const user of users) {
+    user.orders = await db.orders.findByUserId(user.id);  // N queries
+  }
+
+  return users;
+}
+
+// After (single query with join)
+async function getUsersWithOrders() {
+  const users = await db.users.findAll({
+    include: [
+      { model: db.orders, as: 'orders' }
+    ]
+  });
+
+  return users;
+}
+
+// Or with eager loading
+async function getUsersWithOrders() {
+  const users = await db.users.findAll();
+  const userIds = users.map(u => u.id);
+  const orders = await db.orders.findAll({
+    where: { userId: userIds }
+  });
+
+  // Group orders by userId
+  const ordersByUser = orders.reduce((acc, order) => {
+    if (!acc[order.userId]) acc[order.userId] = [];
+    acc[order.userId].push(order);
+    return acc;
+  }, {});
+
+  // Attach to users
+  users.forEach(user => {
+    user.orders = ordersByUser[user.id] || [];
+  });
+
+  return users;
+}
+```
+
+**Configuration Fix Patterns**:
+
+**1. Fix Missing Environment Variable**
+```bash
+# Before (hardcoded)
+DATABASE_URL=postgresql://localhost/myapp
+
+# After (environment-specific)
+# .env.production
+DATABASE_URL=postgresql://prod-db.example.com:5432/myapp_prod?sslmode=require
+
+# Application code should validate required vars
+const requiredEnvVars = ['DATABASE_URL', 'API_KEY', 'SECRET_KEY'];
+for (const envVar of requiredEnvVars) {
+  if (!process.env[envVar]) {
+    throw new Error(`Required environment variable ${envVar} is not set`);
+  }
+}
+```
+
+**2. Fix Resource Limits**
+```yaml
+# Before (no limits - causes OOM)
+apiVersion: apps/v1
+kind: Deployment
+spec:
+  containers:
+    - name: app
+      image: myapp:latest
+
+# After (proper resource limits)
+apiVersion: apps/v1
+kind: Deployment
+spec:
+  containers:
+    - name: app
+      image: myapp:latest
+      resources:
+        requests:
+          memory: "256Mi"
+          cpu: "250m"
+        limits:
+          memory: "512Mi"
+          cpu: "500m"
+```
+
+**Infrastructure Fix Patterns**:
+
+**1. Fix Nginx Upload Size Limit**
+```nginx
+# Before (default 1MB limit)
+server {
+  listen 80;
+  server_name example.com;
+
+  location / {
+    proxy_pass http://localhost:3000;
+  }
+}
+
+# After (increased limit)
+server {
+  listen 80;
+  server_name example.com;
+
+  # Increase max body size
+  client_max_body_size 50M;
+
+  location / {
+    proxy_pass http://localhost:3000;
+
+    # Increase timeouts for large uploads
+    proxy_read_timeout 300s;
+    proxy_connect_timeout 75s;
+  }
+}
+```
+
+**2. Add Missing Database Index**
+```sql
+-- Before (slow query)
+EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
+-- Seq Scan on users  (cost=0.00..1234.56 rows=1 width=123) (actual time=45.123..45.124 rows=1 loops=1)
+
+-- After (add index)
+CREATE INDEX idx_users_email ON users(email);
+
+EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'user@example.com';
+-- Index Scan using idx_users_email on users  (cost=0.29..8.30 rows=1 width=123) (actual time=0.012..0.013 rows=1 loops=1)
+```
+
+### 3. Implement the Fix
+
+Execute the implementation with safety measures:
+
+#### Implementation Checklist
+
+**Pre-Implementation**:
+- [ ] Create feature branch from main
+- [ ] Review related code for similar issues
+- [ ] Identify all affected areas
+- [ ] Plan rollback strategy
+- [ ] Prepare monitoring queries
+
+**During Implementation**:
+```bash
+# Create feature branch
+git checkout -b fix/issue-description
+
+# Make changes incrementally
+# Test after each change
+
+# Commit with clear messages
+git add file1.js
+git commit -m "fix: add error handling to payment processing"
+
+git add file2.js
+git commit -m "fix: add validation for order status"
+```
+
+**Code Changes with Safety**:
+```javascript
+// Add defensive checks
+function processOrder(order) {
+  // Validate inputs
+  if (!order) {
+    throw new Error('Order is required');
+  }
+
+  if (!order.id) {
+    throw new Error('Order must have an id');
+  }
+
+  // Log for debugging
+  logger.debug('Processing order', { orderId: order.id });
+
+  try {
+    // Main logic
+    const result = doProcessing(order);
+
+    // Validate output
+    if (!result || !result.success) {
+      throw new Error('Processing did not return success');
+    }
+
+    return result;
+  } catch (error) {
+    // Enhanced error context
+    logger.error('Order processing failed', {
+      orderId: order.id,
+      error: error.message,
+      stack: error.stack
+    });
+
+    // Re-throw with context
+    throw new ProcessingError(`Failed to process order ${order.id}`, error);
+  }
+}
+```
+
+**Configuration Changes with Rollback**:
+```bash
+# Backup current config
+cp /etc/nginx/nginx.conf /etc/nginx/nginx.conf.backup.$(date +%Y%m%d)
+
+# Make changes
+sudo vim /etc/nginx/nginx.conf
+
+# Test configuration before applying
+sudo nginx -t
+
+# If test passes, reload
+sudo nginx -s reload
+
+# If issues occur, rollback
+# sudo cp /etc/nginx/nginx.conf.backup.YYYYMMDD /etc/nginx/nginx.conf
+# sudo nginx -s reload
+```
+
+**Database Changes with Safety**:
+```sql
+-- Start transaction
+BEGIN;
+
+-- Create index concurrently (doesn't lock table)
+CREATE INDEX CONCURRENTLY idx_users_email ON users(email);
+
+-- Verify index was created
+\d users
+
+-- Test query with new index
+EXPLAIN ANALYZE SELECT * FROM users WHERE email = 'test@example.com';
+
+-- If all looks good, commit
+COMMIT;
+
+-- If issues, rollback
+-- ROLLBACK;
+-- DROP INDEX idx_users_email;
+```
+
+### 4. Add Safeguards
+
+Implement safeguards to prevent recurrence:
+
+**Safeguard Types**:
+
+**1. Input Validation**
+```javascript
+// Add schema validation
+const Joi = require('joi');
+
+const orderSchema = Joi.object({
+  id: Joi.string().uuid().required(),
+  userId: Joi.string().uuid().required(),
+  amount: Joi.number().positive().required(),
+  currency: Joi.string().length(3).required(),
+  status: Joi.string().valid('pending', 'processing', 'completed', 'failed').required()
+});
+
+function validateOrder(order) {
+  const { error, value } = orderSchema.validate(order);
+  if (error) {
+    throw new ValidationError(`Invalid order: ${error.message}`);
+  }
+  return value;
+}
+```
+
+**2. Rate Limiting**
+```javascript
+const rateLimit = require('express-rate-limit');
+
+// Prevent abuse
+const limiter = rateLimit({
+  windowMs: 15 * 60 * 1000, // 15 minutes
+  max: 100, // limit each IP to 100 requests per windowMs
+  message: 'Too many requests from this IP'
+});
+
+app.use('/api/', limiter);
+```
+
+**3. Circuit Breaker**
+```javascript
+const CircuitBreaker = require('opossum');
+
+// Protect against cascading failures
+const breaker = new CircuitBreaker(externalApiCall, {
+  timeout: 3000, // 3 seconds
+  errorThresholdPercentage: 50,
+  resetTimeout: 30000 // 30 seconds
+});
+
+breaker.fallback(() => {
+  return { cached: true, data: getCachedData() };
+});
+
+async function callExternalApi(params) {
+  return await breaker.fire(params);
+}
+```
+
+**4. Retry Logic**
+```javascript
+const retry = require('async-retry');
+
+async function robustApiCall(params) {
+  return await retry(
+    async (bail) => {
+      try {
+        return await apiCall(params);
+      } catch (error) {
+        // Don't retry client errors
+        if (error.statusCode >= 400 && error.statusCode < 500) {
+          bail(error);
+          return;
+        }
+        // Retry server errors
+        throw error;
+      }
+    },
+    {
+      retries: 3,
+      minTimeout: 1000,
+      maxTimeout: 5000,
+      factor: 2
+    }
+  );
+}
+```
+
+**5. Graceful Degradation**
+```javascript
+async function getRecommendations(userId) {
+  try {
+    // Try ML-based recommendations
+    return await mlRecommendationService.getRecommendations(userId);
+  } catch (error) {
+    logger.warn('ML recommendations failed, falling back to rule-based', error);
+
+    try {
+      // Fallback to rule-based
+      return await ruleBasedRecommendations(userId);
+    } catch (error2) {
+      logger.error('All recommendation methods failed', error2);
+
+      // Final fallback to popular items
+      return await getPopularItems();
+    }
+  }
+}
+```
+
+### 5. Verification
+
+Thoroughly verify the fix works:
+
+**Verification Levels**:
+
+**Level 1: Unit Tests**
+```javascript
+describe('processPayment', () => {
+  it('should handle missing order gracefully', async () => {
+    await expect(processPayment('nonexistent-id'))
+      .rejects
+      .toThrow('Order nonexistent-id not found');
+  });
+
+  it('should reject orders not in pending status', async () => {
+    const completedOrder = await createTestOrder({ status: 'completed' });
+
+    await expect(processPayment(completedOrder.id))
+      .rejects
+      .toThrow('is not in pending status');
+  });
+
+  it('should process valid pending orders', async () => {
+    const order = await createTestOrder({ status: 'pending', amount: 100 });
+
+    const result = await processPayment(order.id);
+
+    expect(result.success).toBe(true);
+    expect(result.transactionId).toBeDefined();
+  });
+});
+```
+
+**Level 2: Integration Tests**
+```javascript
+describe('Payment Integration', () => {
+  it('should handle full payment flow', async () => {
+    // Create order
+    const order = await createOrder({ amount: 100 });
+    expect(order.status).toBe('pending');
+
+    // Process payment
+    const result = await processPayment(order.id);
+    expect(result.success).toBe(true);
+
+    // Verify order updated
+    const updatedOrder = await getOrder(order.id);
+    expect(updatedOrder.status).toBe('completed');
+
+    // Verify transaction recorded
+    const transaction = await getTransaction(result.transactionId);
+    expect(transaction.orderId).toBe(order.id);
+  });
+});
+```
+
+**Level 3: Manual Testing**
+```bash
+# Test the fix manually
+npm start
+
+# In another terminal, reproduce the original issue
+curl -X POST http://localhost:3000/api/orders/12345/payment
+
+# Verify fix
+# - Check response is successful
+# - Check logs for proper error handling
+# - Check database state is consistent
+```
+
+**Level 4: Load Testing**
+```javascript
+// Use k6 for load testing
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+
+export let options = {
+  stages: [
+    { duration: '2m', target: 100 }, // Ramp up to 100 users
+    { duration: '5m', target: 100 }, // Stay at 100 users
+    { duration: '2m', target: 0 },   // Ramp down
+  ],
+};
+
+export default function () {
+  let response = http.post('http://localhost:3000/api/orders/payment', {
+    orderId: '12345'
+  });
+
+  check(response, {
+    'status is 200': (r) => r.status === 200,
+    'no errors': (r) => !r.json('error')
+  });
+
+  sleep(1);
+}
+```
+
+**Level 5: Production Smoke Test**
+```bash
+# After deployment, test in production
+# Use feature flag if possible
+
+# Test with low traffic
+curl https://api.production.com/health
+curl https://api.production.com/api/test-endpoint
+
+# Monitor metrics
+# - Error rate
+# - Response time
+# - Resource usage
+
+# If issues detected, rollback immediately
+```
+
+### 6. Prevention Measures
+
+Add measures to prevent similar issues:
+
+**Prevention Strategies**:
+
+**1. Add Regression Tests**
+```javascript
+// This test would have caught the bug
+describe('Regression: Order Processing Bug #1234', () => {
+  it('should not crash when order is missing', async () => {
+    // This used to cause a crash
+    await expect(processPayment('missing-order'))
+      .rejects
+      .toThrow('Order missing-order not found');
+      // No crash, proper error thrown
+  });
+});
+```
+
+**2. Add Monitoring**
+```javascript
+// Add custom metrics
+const { Counter, Histogram } = require('prom-client');
+
+const paymentErrors = new Counter({
+  name: 'payment_processing_errors_total',
+  help: 'Total payment processing errors',
+  labelNames: ['error_type']
+});
+
+const paymentDuration = new Histogram({
+  name: 'payment_processing_duration_seconds',
+  help: 'Payment processing duration'
+});
+
+async function processPayment(orderId) {
+  const end = paymentDuration.startTimer();
+
+  try {
+    const result = await _processPayment(orderId);
+    end({ status: 'success' });
+    return result;
+  } catch (error) {
+    paymentErrors.inc({ error_type: error.constructor.name });
+    end({ status: 'error' });
+    throw error;
+  }
+}
+```
+
+**3. Add Alerting**
+```yaml
+# Prometheus alert rules
+groups:
+  - name: payment_processing
+    rules:
+      - alert: HighPaymentErrorRate
+        expr: rate(payment_processing_errors_total[5m]) > 0.1
+        for: 5m
+        labels:
+          severity: critical
+        annotations:
+          summary: "High payment error rate detected"
+          description: "Payment error rate is {{ $value }} errors/sec"
+```
+
+**4. Improve Logging**
+```javascript
+// Add structured logging
+logger.info('Processing payment', {
+  orderId: order.id,
+  amount: order.amount,
+  userId: order.userId,
+  timestamp: new Date().toISOString()
+});
+
+// Log key decision points
+logger.debug('Order validation passed', { orderId });
+logger.debug('Calling payment gateway', { orderId, amount });
+logger.debug('Payment gateway responded', { orderId, success: result.success });
+```
+
+**5. Update Documentation**
+```markdown
+# Common Issues and Solutions
+
+## Issue: Payment Processing Fails Silently
+
+**Symptoms**: Orders stuck in pending status
+
+**Root Cause**: Missing error handling in payment processor
+
+**Solution**: Added comprehensive error handling and logging
+
+**Prevention**:
+- All payment operations now have try-catch blocks
+- Errors are logged with full context
+- Alerts trigger on error rate > 10%
+
+**Related Code**: src/services/payment-processor.js
+**Tests**: tests/integration/payment-processing.test.js
+**Monitoring**: Grafana dashboard "Payment Processing"
+```
+
+## Output Format
+
+```markdown
+# Fix Report: [Issue Summary]
+
+## Summary
+[Brief description of the fix implemented]
+
+## Root Cause Addressed
+[Detailed explanation of what root cause this fix addresses]
+
+## Changes Made
+
+### Code Changes
+
+#### File: [path/to/file1]
+**Purpose**: [Why this file was changed]
+
+\`\`\`[language]
+// Before
+[original code]
+
+// After
+[fixed code]
+
+// Why this works
+[explanation]
+\`\`\`
+
+#### File: [path/to/file2]
+**Purpose**: [Why this file was changed]
+
+\`\`\`[language]
+[changes with before/after]
+\`\`\`
+
+### Configuration Changes
+
+#### File: [config/file]
+\`\`\`
+[configuration changes]
+\`\`\`
+**Impact**: [What this configuration change affects]
+
+### Infrastructure Changes
+
+#### Component: [infrastructure component]
+\`\`\`
+[infrastructure changes]
+\`\`\`
+**Impact**: [What this infrastructure change affects]
+
+## Safeguards Added
+
+### Input Validation
+[Validation added to prevent bad inputs]
+
+### Error Handling
+[Error handling added for failure scenarios]
+
+### Rate Limiting
+[Rate limiting or throttling added]
+
+### Monitoring
+[Monitoring/metrics added]
+
+### Alerting
+[Alerts configured]
+
+## Verification Results
+
+### Unit Tests
+\`\`\`
+[test results]
+\`\`\`
+**Status**: ✅ All tests passing
+
+### Integration Tests
+\`\`\`
+[test results]
+\`\`\`
+**Status**: ✅ All tests passing
+
+### Manual Testing
+[Description of manual testing performed]
+**Status**: ✅ Issue no longer reproduces
+
+### Load Testing
+[Results of load testing]
+**Status**: ✅ Performs well under load
+
+## Prevention Measures
+
+### Tests Added
+- [Test 1]: Prevents regression
+- [Test 2]: Covers edge case
+
+### Monitoring Added
+- [Metric 1]: Tracks error rate
+- [Metric 2]: Tracks performance
+
+### Alerts Configured
+- [Alert 1]: Fires when error rate exceeds threshold
+- [Alert 2]: Fires when performance degrades
+
+### Documentation Updated
+- [Doc 1]: Troubleshooting guide
+- [Doc 2]: Runbook for oncall
+
+## Deployment Plan
+
+### Pre-Deployment
+1. [Step 1]
+2. [Step 2]
+
+### Deployment
+1. [Step 1]
+2. [Step 2]
+
+### Post-Deployment
+1. [Step 1 - monitoring]
+2. [Step 2 - verification]
+
+### Rollback Plan
+\`\`\`bash
+[commands to rollback if needed]
+\`\`\`
+
+## Verification Steps
+
+### How to Verify the Fix
+1. [Verification step 1]
+2. [Verification step 2]
+
+### Expected Behavior After Fix
+[Description of expected behavior]
+
+### Monitoring Queries
+\`\`\`
+[queries to monitor fix effectiveness]
+\`\`\`
+
+## Related Issues
+
+### Similar Issues Fixed
+- [Related issue 1]
+- [Related issue 2]
+
+### Potential Similar Issues
+- [Potential issue 1 to check]
+- [Potential issue 2 to check]
+
+## Lessons Learned
+[Key insights from implementing this fix]
+
+## Files Modified
+- [file1]
+- [file2]
+- [file3]
+
+## Commits
+\`\`\`
+[git log output showing fix commits]
+\`\`\`
+```
+
+## Error Handling
+
+**Fix Fails Verification**:
+If fix doesn't resolve the issue:
+1. Re-examine root cause analysis
+2. Check if multiple issues present
+3. Verify fix was implemented correctly
+4. Add more diagnostic logging
+
+**Fix Causes New Issues**:
+If fix introduces side effects:
+1. Rollback immediately
+2. Analyze side effect cause
+3. Redesign fix to avoid side effect
+4. Add tests for side effect scenario
+
+**Cannot Deploy Fix**:
+If deployment blocked:
+1. Implement workaround if possible
+2. Document deployment blockers
+3. Create deployment plan to address blockers
+4. Consider feature flag for gradual rollout
+
+## Integration with Other Operations
+
+- **Before**: Use `/debug diagnose` to identify root cause
+- **Before**: Use `/debug reproduce` to create test case
+- **After**: Use `/debug performance` if fix affects performance
+- **After**: Use `/debug memory` if fix affects memory usage
+
+## Agent Utilization
+
+This operation leverages the **10x-fullstack-engineer** agent for:
+- Designing robust fixes that address root causes
+- Implementing comprehensive safeguards
+- Creating thorough verification strategies
+- Considering performance and security implications
+- Planning prevention measures
--- a/commands/debug/memory.md
+++ b/commands/debug/memory.md
--- a/commands/debug/performance.md
+++ b/commands/debug/performance.md
@@ -0,0 +1,965 @@
+# Performance Operation - Performance Debugging and Profiling
+
+You are executing the **performance** operation to debug performance issues, profile application behavior, and optimize system performance.
+
+## Parameters
+
+**Received**: `$ARGUMENTS` (after removing 'performance' operation name)
+
+Expected format: `component:"component-name" [metric:"response-time|throughput|cpu|memory"] [threshold:"target-value"] [duration:"profile-duration"] [load:"concurrent-users"]`
+
+## Workflow
+
+### 1. Establish Performance Baseline
+
+Measure current performance before optimization:
+
+**Baseline Metrics to Capture**:
+```bash
+# Response time baseline
+curl -w "@curl-format.txt" -o /dev/null -s http://localhost:3000/api/endpoint
+
+# Create curl-format.txt
+cat > curl-format.txt <<'EOF'
+    time_namelookup:  %{time_namelookup}\n
+       time_connect:  %{time_connect}\n
+    time_appconnect:  %{time_appconnect}\n
+   time_pretransfer:  %{time_pretransfer}\n
+      time_redirect:  %{time_redirect}\n
+ time_starttransfer:  %{time_starttransfer}\n
+                    ----------\n
+         time_total:  %{time_total}\n
+EOF
+
+# Throughput baseline
+ab -n 1000 -c 10 http://localhost:3000/api/endpoint
+
+# Resource usage baseline
+# CPU
+mpstat 1 60 > baseline_cpu.txt
+
+# Memory
+free -m && ps aux --sort=-%mem | head -20
+
+# Disk I/O
+iostat -x 1 60 > baseline_io.txt
+```
+
+**Application Metrics**:
+```javascript
+// Add timing middleware
+app.use((req, res, next) => {
+  const start = Date.now();
+
+  res.on('finish', () => {
+    const duration = Date.now() - start;
+    console.log({
+      method: req.method,
+      path: req.path,
+      status: res.statusCode,
+      duration: duration,
+      timestamp: new Date().toISOString()
+    });
+  });
+
+  next();
+});
+
+// Track key operations
+const startTime = Date.now();
+await operation();
+const duration = Date.now() - startTime;
+metrics.histogram('operation_duration', duration);
+```
+
+### 2. Identify Performance Bottlenecks
+
+Use profiling to find slow components:
+
+#### Application Profiling
+
+**Node.js Profiling**:
+```bash
+# CPU profiling
+node --prof app.js
+# Run load test
+ab -n 10000 -c 100 http://localhost:3000/
+# Stop app, process profile
+node --prof-process isolate-*-v8.log > processed.txt
+
+# Chrome DevTools profiling
+node --inspect app.js
+# Open chrome://inspect
+# Click "Open dedicated DevTools for Node"
+# Go to Profiler tab, start profiling
+
+# Clinic.js for comprehensive profiling
+npm install -g clinic
+clinic doctor -- node app.js
+# Run load test
+clinic doctor --visualize-only PID.clinic-doctor
+```
+
+**Python Profiling**:
+```python
+import cProfile
+import pstats
+
+# Profile a function
+cProfile.run('my_function()', 'profile_stats')
+
+# Analyze results
+p = pstats.Stats('profile_stats')
+p.sort_stats('cumulative')
+p.print_stats(20)
+
+# Line profiler for detailed profiling
+from line_profiler import LineProfiler
+
+profiler = LineProfiler()
+profiler.add_function(my_function)
+profiler.run('my_function()')
+profiler.print_stats()
+
+# Memory profiling
+from memory_profiler import profile
+
+@profile
+def my_function():
+    large_list = [i for i in range(1000000)]
+    return sum(large_list)
+```
+
+**Use profiling utility script**:
+```bash
+# Run comprehensive profiling
+./commands/debug/.scripts/profile.sh \
+  --app node_app \
+  --duration 60 \
+  --endpoint http://localhost:3000/api/slow
+
+# Output: CPU profile, memory profile, flamegraph
+```
+
+#### Database Profiling
+
+**Query Performance**:
+```sql
+-- PostgreSQL: Enable query timing
+\timing on
+
+-- Analyze query plan
+EXPLAIN ANALYZE
+SELECT u.*, o.*
+FROM users u
+LEFT JOIN orders o ON u.id = o.user_id
+WHERE u.created_at > '2024-01-01';
+
+-- Look for:
+-- - Seq Scan (sequential scan - bad for large tables)
+-- - High cost estimates
+-- - Large number of rows processed
+-- - Missing indexes
+
+-- Check slow queries
+SELECT
+  query,
+  calls,
+  total_time,
+  mean_time,
+  max_time
+FROM pg_stat_statements
+ORDER BY mean_time DESC
+LIMIT 20;
+
+-- Find missing indexes
+SELECT
+  schemaname,
+  tablename,
+  seq_scan,
+  seq_tup_read,
+  idx_scan,
+  seq_tup_read / seq_scan AS avg_seq_read
+FROM pg_stat_user_tables
+WHERE seq_scan > 0
+ORDER BY seq_tup_read DESC
+LIMIT 20;
+```
+
+**Connection Pool Analysis**:
+```javascript
+// Monitor connection pool
+pool.on('acquire', (client) => {
+  console.log('Client acquired:', {
+    poolSize: pool.totalCount,
+    idleCount: pool.idleCount,
+    waitingCount: pool.waitingCount
+  });
+});
+
+pool.on('remove', (client) => {
+  console.log('Client removed from pool');
+});
+
+// Check pool stats periodically
+setInterval(() => {
+  console.log('Pool stats:', {
+    total: pool.totalCount,
+    idle: pool.idleCount,
+    waiting: pool.waitingCount
+  });
+}, 10000);
+```
+
+#### Network Profiling
+
+**API Call Analysis**:
+```bash
+# Trace network calls
+strace -c -p PID  # System call tracing
+
+# Detailed network timing
+tcpdump -i any -w capture.pcap port 3000
+# Analyze with Wireshark
+
+# HTTP request tracing
+curl -w "@curl-format.txt" -v http://localhost:3000/api/endpoint
+
+# Check DNS resolution
+time nslookup api.example.com
+
+# Check network latency
+ping -c 10 api.example.com
+```
+
+**Browser Performance**:
+```javascript
+// Use Performance API
+performance.mark('start-operation');
+await operation();
+performance.mark('end-operation');
+performance.measure('operation', 'start-operation', 'end-operation');
+
+const measure = performance.getEntriesByName('operation')[0];
+console.log('Operation took:', measure.duration, 'ms');
+
+// Navigation timing
+const perfData = performance.getEntriesByType('navigation')[0];
+console.log({
+  dns: perfData.domainLookupEnd - perfData.domainLookupStart,
+  tcp: perfData.connectEnd - perfData.connectStart,
+  ttfb: perfData.responseStart - perfData.requestStart,
+  download: perfData.responseEnd - perfData.responseStart,
+  domReady: perfData.domContentLoadedEventEnd - perfData.domContentLoadedEventStart,
+  load: perfData.loadEventEnd - perfData.loadEventStart
+});
+
+// Resource timing
+performance.getEntriesByType('resource').forEach(resource => {
+  console.log(resource.name, resource.duration);
+});
+```
+
+### 3. Analyze Bottlenecks
+
+Understand why components are slow:
+
+#### CPU Bottlenecks
+
+**Identify CPU-intensive operations**:
+```javascript
+// Find CPU-heavy code
+const { performance } = require('perf_hooks');
+
+function analyzePerformance() {
+  const start = performance.now();
+
+  // Suspect operation
+  const result = expensiveOperation();
+
+  const duration = performance.now() - start;
+  if (duration > 100) {  // More than 100ms
+    console.warn('CPU-intensive operation detected:', {
+      operation: 'expensiveOperation',
+      duration: duration
+    });
+  }
+
+  return result;
+}
+```
+
+**Common CPU bottlenecks**:
+- Complex regex operations
+- Large array/object operations
+- JSON parsing/stringifying large objects
+- Synchronous file operations
+- Cryptographic operations
+- Image processing
+
+**Solutions**:
+```javascript
+// Before: Synchronous blocking
+const data = JSON.parse(largeJsonString);
+
+// After: Async in worker thread
+const { Worker } = require('worker_threads');
+
+function parseJsonAsync(jsonString) {
+  return new Promise((resolve, reject) => {
+    const worker = new Worker(`
+      const { parentPort } = require('worker_threads');
+      parentPort.on('message', (data) => {
+        const parsed = JSON.parse(data);
+        parentPort.postMessage(parsed);
+      });
+    `, { eval: true });
+
+    worker.on('message', resolve);
+    worker.on('error', reject);
+    worker.postMessage(jsonString);
+  });
+}
+```
+
+#### I/O Bottlenecks
+
+**Identify I/O-bound operations**:
+```javascript
+// Monitor I/O operations
+const fs = require('fs').promises;
+
+async function monitoredFileRead(path) {
+  const start = Date.now();
+  try {
+    const data = await fs.readFile(path);
+    const duration = Date.now() - start;
+
+    console.log('File read:', { path, duration, size: data.length });
+
+    if (duration > 50) {
+      console.warn('Slow file read detected:', path);
+    }
+
+    return data;
+  } catch (error) {
+    console.error('File read failed:', { path, error });
+    throw error;
+  }
+}
+```
+
+**Common I/O bottlenecks**:
+- Multiple database queries in sequence (N+1 problem)
+- Synchronous file operations
+- External API calls in sequence
+- Large file uploads/downloads
+
+**Solutions**:
+```javascript
+// Before: Sequential queries (N+1)
+const users = await User.findAll();
+for (const user of users) {
+  user.posts = await Post.findByUserId(user.id);  // N queries
+}
+
+// After: Single query with join
+const users = await User.findAll({
+  include: [{ model: Post }]
+});
+
+// Before: Sequential API calls
+const user = await fetchUser(userId);
+const orders = await fetchOrders(userId);
+const profile = await fetchProfile(userId);
+
+// After: Parallel execution
+const [user, orders, profile] = await Promise.all([
+  fetchUser(userId),
+  fetchOrders(userId),
+  fetchProfile(userId)
+]);
+```
+
+#### Memory Bottlenecks
+
+**Identify memory issues**:
+```javascript
+// Monitor memory usage
+function logMemoryUsage(label) {
+  const usage = process.memoryUsage();
+  console.log(`[${label}] Memory:`, {
+    rss: Math.round(usage.rss / 1024 / 1024) + 'MB',
+    heapTotal: Math.round(usage.heapTotal / 1024 / 1024) + 'MB',
+    heapUsed: Math.round(usage.heapUsed / 1024 / 1024) + 'MB',
+    external: Math.round(usage.external / 1024 / 1024) + 'MB'
+  });
+}
+
+logMemoryUsage('before-operation');
+await operation();
+logMemoryUsage('after-operation');
+```
+
+**Common memory bottlenecks**:
+- Loading large datasets into memory
+- Caching without size limits
+- Memory leaks (event listeners, closures)
+- Large object allocations
+
+**Solutions**:
+```javascript
+// Before: Load entire file into memory
+const data = await fs.readFile('large-file.csv', 'utf8');
+const lines = data.split('\n');
+
+// After: Stream processing
+const readline = require('readline');
+const stream = fs.createReadStream('large-file.csv');
+const rl = readline.createInterface({ input: stream });
+
+for await (const line of rl) {
+  processLine(line);  // Process one line at a time
+}
+
+// Before: Unbounded cache
+const cache = {};
+cache[key] = value;  // Grows forever
+
+// After: LRU cache with size limit
+const LRU = require('lru-cache');
+const cache = new LRU({
+  max: 1000,  // Max items
+  maxSize: 50 * 1024 * 1024,  // 50MB
+  sizeCalculation: (value) => JSON.stringify(value).length
+});
+```
+
+### 4. Implement Optimizations
+
+Apply targeted optimizations:
+
+#### Query Optimization
+
+**Add Indexes**:
+```sql
+-- Before: Slow query
+EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;
+-- Seq Scan on orders  (cost=0.00..1234.56 rows=10 width=100) (actual time=45.123..45.456 rows=10 loops=1)
+
+-- After: Add index
+CREATE INDEX idx_orders_user_id ON orders(user_id);
+
+EXPLAIN ANALYZE SELECT * FROM orders WHERE user_id = 123;
+-- Index Scan using idx_orders_user_id on orders  (cost=0.29..8.30 rows=10 width=100) (actual time=0.012..0.015 rows=10 loops=1)
+```
+
+**Optimize Queries**:
+```sql
+-- Before: Inefficient
+SELECT * FROM orders o
+LEFT JOIN users u ON o.user_id = u.id
+WHERE o.created_at > NOW() - INTERVAL '7 days';
+
+-- After: Select only needed columns, add index
+CREATE INDEX idx_orders_created_at ON orders(created_at);
+
+SELECT o.id, o.amount, u.name
+FROM orders o
+INNER JOIN users u ON o.user_id = u.id
+WHERE o.created_at > NOW() - INTERVAL '7 days';
+```
+
+#### Caching
+
+**Application-level caching**:
+```javascript
+const cache = new Map();
+
+async function getCachedData(key) {
+  // Check cache first
+  if (cache.has(key)) {
+    console.log('Cache hit:', key);
+    return cache.get(key);
+  }
+
+  // Cache miss - fetch from database
+  console.log('Cache miss:', key);
+  const data = await fetchFromDatabase(key);
+
+  // Store in cache
+  cache.set(key, data);
+
+  // Expire after 5 minutes
+  setTimeout(() => cache.delete(key), 5 * 60 * 1000);
+
+  return data;
+}
+
+// Redis caching
+const redis = require('redis');
+const client = redis.createClient();
+
+async function getCachedDataRedis(key) {
+  // Try cache
+  const cached = await client.get(key);
+  if (cached) {
+    return JSON.parse(cached);
+  }
+
+  // Fetch and cache
+  const data = await fetchFromDatabase(key);
+  await client.setEx(key, 300, JSON.stringify(data));  // 5 min TTL
+
+  return data;
+}
+```
+
+#### Code Optimization
+
+**Optimize algorithms**:
+```javascript
+// Before: O(n²) - slow for large arrays
+function findDuplicates(arr) {
+  const duplicates = [];
+  for (let i = 0; i < arr.length; i++) {
+    for (let j = i + 1; j < arr.length; j++) {
+      if (arr[i] === arr[j]) {
+        duplicates.push(arr[i]);
+      }
+    }
+  }
+  return duplicates;
+}
+
+// After: O(n) - much faster
+function findDuplicates(arr) {
+  const seen = new Set();
+  const duplicates = new Set();
+
+  for (const item of arr) {
+    if (seen.has(item)) {
+      duplicates.add(item);
+    } else {
+      seen.add(item);
+    }
+  }
+
+  return Array.from(duplicates);
+}
+```
+
+**Lazy loading**:
+```javascript
+// Before: Load all data upfront
+const allUsers = await User.findAll();
+const allPosts = await Post.findAll();
+
+// After: Load on demand
+async function getUserWithPosts(userId) {
+  const user = await User.findById(userId);
+  // Only load posts when needed
+  if (needsPosts) {
+    user.posts = await Post.findByUserId(userId);
+  }
+  return user;
+}
+```
+
+**Pagination**:
+```javascript
+// Before: Load all results
+const results = await db.query('SELECT * FROM large_table');
+
+// After: Paginate
+const page = 1;
+const pageSize = 100;
+const results = await db.query(
+  'SELECT * FROM large_table LIMIT $1 OFFSET $2',
+  [pageSize, (page - 1) * pageSize]
+);
+```
+
+#### Async Optimization
+
+**Parallel execution**:
+```javascript
+// Before: Sequential (slow)
+const user = await fetchUser();
+const orders = await fetchOrders();
+const payments = await fetchPayments();
+// Total time: time(user) + time(orders) + time(payments)
+
+// After: Parallel (fast)
+const [user, orders, payments] = await Promise.all([
+  fetchUser(),
+  fetchOrders(),
+  fetchPayments()
+]);
+// Total time: max(time(user), time(orders), time(payments))
+```
+
+**Batch processing**:
+```javascript
+// Before: Process one at a time
+for (const item of items) {
+  await processItem(item);  // Slow for many items
+}
+
+// After: Process in batches
+const batchSize = 10;
+for (let i = 0; i < items.length; i += batchSize) {
+  const batch = items.slice(i, i + batchSize);
+  await Promise.all(batch.map(item => processItem(item)));
+}
+```
+
+### 5. Load Testing
+
+Verify optimizations under load:
+
+**Load Testing Tools**:
+
+**Apache Bench**:
+```bash
+# Simple load test
+ab -n 10000 -c 100 http://localhost:3000/api/endpoint
+
+# With keep-alive
+ab -n 10000 -c 100 -k http://localhost:3000/api/endpoint
+
+# POST with data
+ab -n 1000 -c 10 -p data.json -T application/json http://localhost:3000/api/endpoint
+```
+
+**k6 (recommended)**:
+```javascript
+// load-test.js
+import http from 'k6/http';
+import { check, sleep } from 'k6';
+
+export let options = {
+  stages: [
+    { duration: '2m', target: 100 },   // Ramp up to 100 users
+    { duration: '5m', target: 100 },   // Stay at 100 users
+    { duration: '2m', target: 200 },   // Ramp up to 200 users
+    { duration: '5m', target: 200 },   // Stay at 200 users
+    { duration: '2m', target: 0 },     // Ramp down to 0
+  ],
+  thresholds: {
+    http_req_duration: ['p(95)<500'],  // 95% of requests < 500ms
+    http_req_failed: ['rate<0.01'],    // Error rate < 1%
+  },
+};
+
+export default function () {
+  const response = http.get('http://localhost:3000/api/endpoint');
+
+  check(response, {
+    'status is 200': (r) => r.status === 200,
+    'response time < 500ms': (r) => r.timings.duration < 500,
+  });
+
+  sleep(1);
+}
+```
+
+```bash
+# Run load test
+k6 run load-test.js
+
+# With real-time monitoring
+k6 run --out influxdb=http://localhost:8086/k6 load-test.js
+```
+
+**Artillery**:
+```yaml
+# load-test.yml
+config:
+  target: 'http://localhost:3000'
+  phases:
+    - duration: 120
+      arrivalRate: 10
+      name: "Warm up"
+    - duration: 300
+      arrivalRate: 50
+      name: "Sustained load"
+    - duration: 120
+      arrivalRate: 100
+      name: "Peak load"
+
+scenarios:
+  - name: "API endpoints"
+    flow:
+      - get:
+          url: "/api/users"
+      - get:
+          url: "/api/orders"
+      - post:
+          url: "/api/orders"
+          json:
+            userId: 123
+            amount: 100
+```
+
+```bash
+# Run test
+artillery run load-test.yml
+
+# With report
+artillery run --output report.json load-test.yml
+artillery report report.json
+```
+
+### 6. Monitor Performance Improvements
+
+Compare before and after:
+
+**Metrics to Compare**:
+```markdown
+## Before Optimization
+- Response time P50: 200ms
+- Response time P95: 800ms
+- Response time P99: 2000ms
+- Throughput: 100 req/s
+- Error rate: 2%
+- CPU usage: 80%
+- Memory usage: 1.5GB
+
+## After Optimization
+- Response time P50: 50ms ✅ 75% improvement
+- Response time P95: 200ms ✅ 75% improvement
+- Response time P99: 500ms ✅ 75% improvement
+- Throughput: 400 req/s ✅ 4x improvement
+- Error rate: 0.1% ✅ 20x improvement
+- CPU usage: 40% ✅ 50% reduction
+- Memory usage: 800MB ✅ 47% reduction
+```
+
+**Monitoring Dashboard**:
+```javascript
+// Expose metrics for Prometheus
+const promClient = require('prom-client');
+
+// Response time histogram
+const httpDuration = new promClient.Histogram({
+  name: 'http_request_duration_seconds',
+  help: 'HTTP request duration',
+  labelNames: ['method', 'route', 'status_code'],
+  buckets: [0.01, 0.05, 0.1, 0.5, 1, 2, 5]
+});
+
+// Throughput counter
+const httpRequests = new promClient.Counter({
+  name: 'http_requests_total',
+  help: 'Total HTTP requests',
+  labelNames: ['method', 'route', 'status_code']
+});
+
+// Middleware to track metrics
+app.use((req, res, next) => {
+  const start = Date.now();
+
+  res.on('finish', () => {
+    const duration = (Date.now() - start) / 1000;
+
+    httpDuration.observe(
+      { method: req.method, route: req.route?.path || req.path, status_code: res.statusCode },
+      duration
+    );
+
+    httpRequests.inc({
+      method: req.method,
+      route: req.route?.path || req.path,
+      status_code: res.statusCode
+    });
+  });
+
+  next();
+});
+
+// Metrics endpoint
+app.get('/metrics', async (req, res) => {
+  res.set('Content-Type', promClient.register.contentType);
+  res.end(await promClient.register.metrics());
+});
+```
+
+## Output Format
+
+```markdown
+# Performance Optimization Report: [Component Name]
+
+## Summary
+[Brief summary of optimization results]
+
+## Performance Baseline
+
+### Before Optimization
+- **Response Time P50**: [value]ms
+- **Response Time P95**: [value]ms
+- **Response Time P99**: [value]ms
+- **Throughput**: [value] req/s
+- **Error Rate**: [value]%
+- **CPU Usage**: [value]%
+- **Memory Usage**: [value]MB
+
+## Bottlenecks Identified
+
+### Bottleneck 1: [Name]
+- **Type**: [CPU|I/O|Memory|Network]
+- **Location**: [file:line or component]
+- **Impact**: [% of total time or resource usage]
+- **Evidence**:
+  \`\`\`
+  [profiling data or logs showing bottleneck]
+  \`\`\`
+
+### Bottleneck 2: [Name]
+[similar structure]
+
+## Optimizations Implemented
+
+### Optimization 1: [Name]
+**Problem**: [what was slow]
+
+**Solution**: [what was done]
+
+**Code Changes**:
+\`\`\`[language]
+// Before
+[original slow code]
+
+// After
+[optimized code]
+\`\`\`
+
+**Impact**:
+- Response time: [before]ms → [after]ms ([%] improvement)
+- Resource usage: [before] → [after] ([%] improvement)
+
+### Optimization 2: [Name]
+[similar structure]
+
+## Performance After Optimization
+
+### After Optimization
+- **Response Time P50**: [value]ms ✅ [%] improvement
+- **Response Time P95**: [value]ms ✅ [%] improvement
+- **Response Time P99**: [value]ms ✅ [%] improvement
+- **Throughput**: [value] req/s ✅ [x]x improvement
+- **Error Rate**: [value]% ✅ [%] improvement
+- **CPU Usage**: [value]% ✅ [%] reduction
+- **Memory Usage**: [value]MB ✅ [%] reduction
+
+## Load Testing Results
+
+### Test Configuration
+- **Tool**: [k6|artillery|ab]
+- **Duration**: [duration]
+- **Peak Load**: [number] concurrent users
+- **Total Requests**: [number]
+
+### Results
+\`\`\`
+[load test output]
+\`\`\`
+
+### Performance Under Load
+[Description of how system performed under sustained load]
+
+## Profiling Data
+
+### CPU Profile
+[Flame graph or top CPU-consuming functions]
+
+### Memory Profile
+[Heap snapshots or memory allocation patterns]
+
+### Query Performance
+[Database query analysis results]
+
+## Monitoring Setup
+
+### Metrics Added
+- [Metric 1]: Tracks [what]
+- [Metric 2]: Tracks [what]
+
+### Dashboards Created
+- [Dashboard 1]: [URL and description]
+- [Dashboard 2]: [URL and description]
+
+### Alerts Configured
+- [Alert 1]: Triggers when [condition]
+- [Alert 2]: Triggers when [condition]
+
+## Recommendations
+
+### Additional Optimizations
+1. [Optimization 1]: [Expected impact]
+2. [Optimization 2]: [Expected impact]
+
+### Monitoring
+1. [What to monitor]
+2. [What thresholds to set]
+
+### Future Improvements
+1. [Long-term improvement 1]
+2. [Long-term improvement 2]
+
+## Files Modified
+- [file1]: [what was changed]
+- [file2]: [what was changed]
+
+## Verification Steps
+
+### How to Verify
+1. [Step 1]
+2. [Step 2]
+
+### Expected Behavior
+[What should be observed]
+
+## Next Steps
+1. [Next step 1]
+2. [Next step 2]
+```
+
+## Error Handling
+
+**Optimization Degrades Performance**:
+If optimization makes things slower:
+1. Rollback immediately
+2. Re-profile to understand why
+3. Check for introduced overhead
+4. Verify test methodology
+
+**Cannot Reproduce Performance Issue**:
+If issue only occurs in production:
+1. Compare production vs test environment
+2. Check production load patterns
+3. Analyze production metrics
+4. Consider production data characteristics
+
+**Optimization Introduces Bugs**:
+If optimization causes errors:
+1. Rollback optimization
+2. Add comprehensive tests
+3. Implement optimization incrementally
+4. Verify correctness at each step
+
+## Integration with Other Operations
+
+- **Before**: Use `/debug diagnose` to identify performance issues
+- **Before**: Use `/debug analyze-logs` to understand performance patterns
+- **After**: Use `/debug fix` to implement optimizations
+- **Related**: Use `/debug memory` for memory-specific optimization
+
+## Agent Utilization
+
+This operation leverages the **10x-fullstack-engineer** agent for:
+- Identifying performance bottlenecks across the stack
+- Suggesting appropriate optimization strategies
+- Implementing code optimizations
+- Designing comprehensive load tests
+- Interpreting profiling data
--- a/commands/debug/reproduce.md
+++ b/commands/debug/reproduce.md
@@ -0,0 +1,695 @@
+# Reproduce Operation - Issue Reproduction Strategies
+
+You are executing the **reproduce** operation to create reliable reproduction strategies and test cases for debugging issues.
+
+## Parameters
+
+**Received**: `$ARGUMENTS` (after removing 'reproduce' operation name)
+
+Expected format: `issue:"problem description" [environment:"prod|staging|dev"] [data:"test-data-location"] [steps:"reproduction-steps"] [reliability:"percentage"]`
+
+## Workflow
+
+### 1. Understand Reproduction Requirements
+
+Gather information about the issue's behavior:
+
+**Key Questions**:
+- How often does the issue occur? (100%, 50%, 5%, etc.)
+- Under what conditions? (specific data, timing, load, etc.)
+- In which environments? (prod only, all environments)
+- What is the expected vs actual behavior?
+- Are there known workarounds?
+
+**Reproduction Challenges to Identify**:
+- **Timing-dependent** (race conditions, timeouts)
+- **Data-dependent** (specific user data, edge cases)
+- **Environment-dependent** (prod-only config, specific infrastructure)
+- **Load-dependent** (only under high load or concurrency)
+- **State-dependent** (requires specific sequence of actions)
+
+### 2. Gather Reproduction Context
+
+Collect all information needed to reproduce:
+
+#### Environment Context
+
+**Application State**:
+```bash
+# Get application version
+git log -1 --oneline
+npm list  # Node dependencies
+pip freeze  # Python dependencies
+
+# Get configuration
+cat .env.production
+echo $ENVIRONMENT_VARS
+
+# Get deployed version in production
+kubectl get deployment app-name -o jsonpath='{.spec.template.spec.containers[0].image}'
+```
+
+**Infrastructure State**:
+```bash
+# System resources
+free -m
+df -h
+ulimit -a
+
+# Network configuration
+ip addr show
+cat /etc/resolv.conf
+
+# Service status
+systemctl status application-service
+docker ps
+kubectl get pods
+```
+
+#### Data Context
+
+**Database State**:
+```sql
+-- Get relevant data schema
+\d+ table_name
+
+-- Get sample data that triggers issue
+SELECT * FROM users WHERE id = 'problematic-user-id';
+
+-- Get data statistics
+SELECT count(*), min(created_at), max(created_at) FROM table_name;
+
+-- Export test data
+COPY (SELECT * FROM users WHERE id IN ('user1', 'user2')) TO '/tmp/test_data.csv' CSV HEADER;
+```
+
+**Request/Response Data**:
+```bash
+# Capture failing request
+# Use browser DevTools > Network > Copy as cURL
+
+curl 'https://api.example.com/endpoint' \
+  -H 'Authorization: Bearer TOKEN' \
+  -H 'Content-Type: application/json' \
+  --data-raw '{"key":"value"}' \
+  -v  # Verbose output
+
+# Capture webhook payload
+# Check logs for incoming webhook data
+grep "webhook_payload" logs/application.log | jq .
+```
+
+#### User Context
+
+**User Session**:
+```javascript
+// Browser state
+console.log('LocalStorage:', localStorage);
+console.log('SessionStorage:', sessionStorage);
+console.log('Cookies:', document.cookie);
+console.log('User Agent:', navigator.userAgent);
+
+// Authentication state
+console.log('Auth Token:', authToken);
+console.log('Token Payload:', jwt.decode(authToken));
+console.log('Session ID:', sessionId);
+```
+
+**User Actions**:
+```markdown
+1. User logs in as user@example.com
+2. Navigates to /dashboard
+3. Clicks "Upload File" button
+4. Selects file > 10MB
+5. Clicks "Submit"
+6. Error occurs: "Request Entity Too Large"
+```
+
+### 3. Create Local Reproduction
+
+Develop a strategy to reproduce the issue locally:
+
+#### Strategy 1: Direct Reproduction
+
+**For Simple Issues**:
+```javascript
+// Create minimal test case
+function reproduceBug() {
+  // Setup
+  const testData = {
+    userId: 'test-user',
+    file: createLargeFile(15 * 1024 * 1024)  // 15MB
+  };
+
+  // Execute problematic operation
+  const result = await uploadFile(testData);
+
+  // Verify issue occurs
+  assert(result.status === 413, 'Expected 413 error');
+}
+```
+
+#### Strategy 2: Environment Simulation
+
+**For Environment-Specific Issues**:
+```bash
+# Replicate production configuration locally
+cp .env.production .env.local
+sed -i 's/prod-database/localhost:5432/g' .env.local
+
+# Use production data dump
+psql local_db < production_data_dump.sql
+
+# Run with production-like settings
+NODE_ENV=production npm start
+```
+
+#### Strategy 3: Data-Driven Reproduction
+
+**For Data-Specific Issues**:
+```javascript
+// Load production data that triggers issue
+const testData = require('./test-data/problematic-user-data.json');
+
+// Seed database with specific data
+await db.users.insert(testData.user);
+await db.orders.insertMany(testData.orders);
+
+// Execute operation
+const result = await processOrder(testData.orders[0].id);
+```
+
+#### Strategy 4: Timing-Based Reproduction
+
+**For Race Conditions**:
+```javascript
+// Add delays to expose race condition
+async function reproduceRaceCondition() {
+  // Start two operations simultaneously
+  const [result1, result2] = await Promise.all([
+    operation1(),
+    operation2()
+  ]);
+
+  // Or use setTimeout to control timing
+  setTimeout(() => operation1(), 0);
+  setTimeout(() => operation2(), 1);  // 1ms delay
+}
+
+// Add intentional delays to expose timing issues
+async function operation() {
+  await fetchData();
+  await sleep(100);  // Artificial delay
+  await processData();  // May fail if timing-dependent
+}
+```
+
+#### Strategy 5: Load-Based Reproduction
+
+**For Performance/Concurrency Issues**:
+```javascript
+// Simulate concurrent requests
+async function reproduceUnderLoad() {
+  const concurrentRequests = 100;
+  const requests = Array(concurrentRequests)
+    .fill(null)
+    .map(() => makeRequest());
+
+  const results = await Promise.allSettled(requests);
+  const failures = results.filter(r => r.status === 'rejected');
+
+  console.log(`Failure rate: ${failures.length}/${concurrentRequests}`);
+}
+```
+
+```bash
+# Use load testing tools
+ab -n 1000 -c 100 http://localhost:3000/api/endpoint
+
+# Use k6 for more complex scenarios
+k6 run load-test.js
+
+# Monitor during load test
+watch -n 1 'ps aux | grep node'
+```
+
+### 4. Verify Reproduction Reliability
+
+Test that reproduction is reliable:
+
+**Reliability Testing**:
+```javascript
+async function testReproductionReliability() {
+  const iterations = 50;
+  let failures = 0;
+
+  for (let i = 0; i < iterations; i++) {
+    try {
+      await reproduceIssue();
+      failures++;  // Issue reproduced
+    } catch (error) {
+      // Issue did not reproduce
+    }
+  }
+
+  const reliability = (failures / iterations) * 100;
+  console.log(`Reproduction reliability: ${reliability}%`);
+
+  if (reliability < 80) {
+    console.warn('Reproduction is not reliable enough. Need to refine.');
+  }
+}
+```
+
+**Improve Reliability**:
+```javascript
+// If reliability is low, add more constraints
+async function improvedReproduction() {
+  // 1. Reset state between attempts
+  await resetDatabase();
+  await clearCache();
+
+  // 2. Add specific data constraints
+  const testUser = await createUserWithSpecificProfile({
+    accountAge: 30,  // days
+    orderCount: 5,
+    subscriptionTier: 'premium'
+  });
+
+  // 3. Control timing precisely
+  await sleep(100);  // Ensure service is ready
+
+  // 4. Set specific environment conditions
+  process.env.FEATURE_FLAG_X = 'true';
+
+  // Execute
+  await reproduceIssue();
+}
+```
+
+### 5. Create Automated Test Case
+
+Convert reproduction into automated test:
+
+**Unit Test Example**:
+```javascript
+describe('File Upload Bug', () => {
+  beforeEach(async () => {
+    // Setup test environment
+    await resetTestDatabase();
+    await clearUploadDirectory();
+  });
+
+  it('should handle files larger than 10MB', async () => {
+    // Arrange
+    const largeFile = createTestFile(15 * 1024 * 1024);
+    const user = await createTestUser();
+
+    // Act
+    const response = await uploadFile(user.id, largeFile);
+
+    // Assert
+    expect(response.status).toBe(413);
+    expect(response.body.error).toContain('File too large');
+  });
+
+  it('should succeed with files under 10MB', async () => {
+    // Verify issue is specifically about size
+    const smallFile = createTestFile(5 * 1024 * 1024);
+    const user = await createTestUser();
+
+    const response = await uploadFile(user.id, smallFile);
+
+    expect(response.status).toBe(200);
+  });
+});
+```
+
+**Integration Test Example**:
+```javascript
+describe('Order Processing Race Condition', () => {
+  it('should handle concurrent order updates safely', async () => {
+    // Setup
+    const order = await createTestOrder({ status: 'pending' });
+
+    // Simulate race condition
+    const updatePromises = [
+      updateOrderStatus(order.id, 'processing'),
+      updateOrderStatus(order.id, 'confirmed')
+    ];
+
+    // Both should complete without error
+    await Promise.all(updatePromises);
+
+    // Verify final state is consistent
+    const finalOrder = await getOrder(order.id);
+    expect(['processing', 'confirmed']).toContain(finalOrder.status);
+
+    // Verify no data corruption
+    const auditLogs = await getOrderAuditLogs(order.id);
+    expect(auditLogs).toHaveLength(2);
+  });
+});
+```
+
+**E2E Test Example**:
+```javascript
+describe('Dashboard Load Performance', () => {
+  it('should load dashboard under 2 seconds', async () => {
+    // Setup user with large dataset
+    const user = await createUserWithLargeDataset({
+      orders: 1000,
+      documents: 500
+    });
+
+    // Login
+    await page.goto('/login');
+    await page.fill('#email', user.email);
+    await page.fill('#password', 'testpass123');
+    await page.click('#login-button');
+
+    // Navigate to dashboard and measure time
+    const startTime = Date.now();
+    await page.goto('/dashboard');
+    await page.waitForSelector('.dashboard-loaded');
+    const loadTime = Date.now() - startTime;
+
+    // Assert performance
+    expect(loadTime).toBeLessThan(2000);
+  });
+});
+```
+
+### 6. Document Reproduction Steps
+
+Create comprehensive reproduction documentation:
+
+**Reproduction Guide Template**:
+```markdown
+# Reproduction Guide: [Issue Name]
+
+## Prerequisites
+- Node.js v18.x
+- PostgreSQL 14+
+- Docker (optional)
+- Test account credentials
+
+## Environment Setup
+
+### 1. Clone and Install
+\`\`\`bash
+git clone https://github.com/org/repo.git
+cd repo
+npm install
+\`\`\`
+
+### 2. Database Setup
+\`\`\`bash
+# Create test database
+createdb test_app
+
+# Load test data
+psql test_app < test-data/problematic_data.sql
+\`\`\`
+
+### 3. Configuration
+\`\`\`bash
+# Copy test environment file
+cp .env.test .env
+
+# Update with test database URL
+echo "DATABASE_URL=postgresql://localhost/test_app" >> .env
+\`\`\`
+
+## Reproduction Steps
+
+### Manual Reproduction
+1. Start the application:
+   \`\`\`bash
+   npm start
+   \`\`\`
+
+2. Login with test user:
+   - Email: test@example.com
+   - Password: testpass123
+
+3. Navigate to Dashboard: http://localhost:3000/dashboard
+
+4. Click "Upload File" button
+
+5. Select file larger than 10MB from test-data/
+
+6. Click "Submit"
+
+7. **Expected**: File uploads successfully
+   **Actual**: 413 Request Entity Too Large error
+
+### Automated Reproduction
+\`\`\`bash
+# Run reproduction test
+npm test -- tests/reproduction/file-upload-bug.test.js
+
+# Expected output:
+# ✓ reproduces 413 error with files > 10MB
+# ✓ succeeds with files < 10MB
+\`\`\`
+
+## Reproduction Reliability
+- **Success Rate**: 100% (fails every time)
+- **Environment**: All environments
+- **Conditions**: File size > 10MB
+
+## Key Observations
+- Issue occurs consistently with files > 10MB
+- Works fine with files ≤ 10MB
+- Error comes from Nginx, not application
+- Content-Length header shows correct size
+
+## Debugging Hints
+- Check Nginx configuration: `/etc/nginx/nginx.conf`
+- Look for `client_max_body_size` directive
+- Application code may be fine, infrastructure issue
+
+## Related Files
+- test-data/large-file.bin (15MB test file)
+- test-data/problematic_data.sql (test database dump)
+- tests/reproduction/file-upload-bug.test.js (automated test)
+```
+
+### 7. Validate Different Scenarios
+
+Test edge cases and variations:
+
+**Scenario Matrix**:
+```javascript
+const testScenarios = [
+  // Vary file sizes
+  { fileSize: '1MB', expected: 'success' },
+  { fileSize: '10MB', expected: 'success' },
+  { fileSize: '11MB', expected: 'failure' },
+  { fileSize: '50MB', expected: 'failure' },
+
+  // Vary file types
+  { fileType: 'image/jpeg', expected: 'success' },
+  { fileType: 'application/pdf', expected: 'success' },
+  { fileType: 'video/mp4', expected: 'failure' },
+
+  // Vary user types
+  { userType: 'free', expected: 'failure' },
+  { userType: 'premium', expected: 'success' },
+
+  // Vary environments
+  { environment: 'local', expected: 'success' },
+  { environment: 'staging', expected: 'failure' },
+  { environment: 'production', expected: 'failure' }
+];
+
+for (const scenario of testScenarios) {
+  const result = await testScenario(scenario);
+  console.log(`Scenario ${JSON.stringify(scenario)}: ${result}`);
+}
+```
+
+## Output Format
+
+```markdown
+# Reproduction Report: [Issue Name]
+
+## Summary
+[Brief description of reproduction strategy and success]
+
+## Reproduction Reliability
+- **Success Rate**: [percentage]%
+- **Environment**: [local|staging|production|all]
+- **Conditions**: [specific conditions needed]
+- **Timing**: [immediate|delayed|intermittent]
+
+## Prerequisites
+
+### Environment Requirements
+- [Software requirement 1]
+- [Software requirement 2]
+- [Configuration requirement 1]
+
+### Data Requirements
+- [Test data 1]
+- [Test data 2]
+- [Database state]
+
+### Access Requirements
+- [Credentials needed]
+- [Permissions needed]
+- [Resources needed]
+
+## Reproduction Steps
+
+### Quick Reproduction
+\`\`\`bash
+# Fastest way to reproduce
+[commands to quickly reproduce the issue]
+\`\`\`
+
+### Detailed Reproduction
+
+#### Step 1: [Setup]
+\`\`\`bash
+[detailed commands]
+\`\`\`
+[Expected result]
+
+#### Step 2: [Preparation]
+\`\`\`bash
+[detailed commands]
+\`\`\`
+[Expected result]
+
+#### Step 3: [Trigger Issue]
+\`\`\`bash
+[detailed commands]
+\`\`\`
+**Expected**: [expected behavior]
+**Actual**: [actual behavior with issue]
+
+## Automated Test Case
+
+### Test Code
+\`\`\`[language]
+[Complete automated test that reproduces the issue]
+\`\`\`
+
+### Running the Test
+\`\`\`bash
+[command to run the test]
+\`\`\`
+
+### Expected Output
+\`\`\`
+[what the test output should show]
+\`\`\`
+
+## Scenario Variations
+
+### Variation 1: [Description]
+- **Conditions**: [conditions]
+- **Result**: [occurs|does not occur]
+- **Notes**: [observations]
+
+### Variation 2: [Description]
+- **Conditions**: [conditions]
+- **Result**: [occurs|does not occur]
+- **Notes**: [observations]
+
+## Key Observations
+
+### What Triggers the Issue
+- [Trigger 1]
+- [Trigger 2]
+- [Trigger 3]
+
+### What Prevents the Issue
+- [Prevention 1]
+- [Prevention 2]
+
+### Minimal Reproduction
+[Simplest possible way to reproduce]
+
+## Test Data Files
+
+### File 1: [filename]
+**Location**: [path]
+**Purpose**: [what this file is for]
+**Contents**: [brief description]
+
+### File 2: [filename]
+**Location**: [path]
+**Purpose**: [what this file is for]
+**Contents**: [brief description]
+
+## Troubleshooting Reproduction
+
+### If Reproduction Fails
+1. [Check 1]
+2. [Check 2]
+3. [Check 3]
+
+### Common Issues
+- **Issue**: [problem with reproduction]
+  **Solution**: [how to fix]
+
+- **Issue**: [problem with reproduction]
+  **Solution**: [how to fix]
+
+## Next Steps
+
+1. **Diagnosis**: Use `/debug diagnose` with reproduction steps
+2. **Fix**: Use `/debug fix` once root cause is identified
+3. **Verification**: Re-run reproduction after fix to verify resolution
+
+## Appendices
+
+### A. Test Data
+[Links to or contents of test data files]
+
+### B. Environment Configuration
+[Complete environment configuration needed]
+
+### C. Video/Screenshots
+[If applicable, links to recordings showing the issue]
+```
+
+## Error Handling
+
+**Cannot Reproduce Locally**:
+If issue cannot be reproduced in local environment:
+1. Document what was tried
+2. List environment differences
+3. Suggest production debugging approach
+4. Create monitoring to capture more data
+
+**Unreliable Reproduction**:
+If reproduction is intermittent:
+1. Identify factors affecting reliability
+2. Add more constraints to increase reliability
+3. Document reliability percentage
+4. Suggest statistical testing approach
+
+**Missing Prerequisites**:
+If prerequisites are unavailable:
+1. List what's missing
+2. Suggest alternatives
+3. Propose workaround strategies
+4. Document assumptions
+
+## Integration with Other Operations
+
+- **Before**: Use `/debug diagnose` to understand the issue first
+- **After**: Use `/debug fix` to implement the fix
+- **Related**: Use `/debug analyze-logs` to gather more reproduction context
+
+## Agent Utilization
+
+This operation leverages the **10x-fullstack-engineer** agent for:
+- Creating reliable reproduction strategies
+- Designing comprehensive test cases
+- Identifying edge cases and variations
+- Documenting reproduction steps clearly
--- a/commands/debug/skill.md
+++ b/commands/debug/skill.md
@@ -0,0 +1,83 @@
+---
+description: Comprehensive debugging toolkit for complex issues - diagnosis, reproduction, log analysis, performance, and memory debugging
+argument-hint: <operation> [parameters...]
+model: inherit
+---
+
+# Debug Skill - Advanced Debugging Operations
+
+You are routing requests to specialized debugging operations. Parse the `$ARGUMENTS` to determine which debugging operation to execute.
+
+## Available Operations
+
+- **diagnose** - Comprehensive diagnosis and root cause analysis across all stack layers
+- **reproduce** - Create reliable reproduction strategies and test cases for issues
+- **fix** - Implement targeted fixes with verification and prevention measures
+- **analyze-logs** - Deep log analysis with pattern detection and timeline correlation
+- **performance** - Performance debugging, profiling, and optimization
+- **memory** - Memory leak detection, analysis, and optimization
+
+## Routing Logic
+
+Extract the first word from `$ARGUMENTS` as the operation name, and pass the remainder as operation parameters.
+
+**Arguments received**: `$ARGUMENTS`
+
+**Routing Instructions**:
+
+1. **Parse the operation**: Extract the first word from `$ARGUMENTS`
+2. **Load operation instructions**: Read the corresponding operation file from `.claude/commands/debug/`
+3. **Execute with context**: Follow the operation's instructions with the remaining parameters
+4. **Leverage agent**: All operations can leverage the 10x-fullstack-engineer agent for deep expertise
+
+## Operation Routing
+
+```
+diagnose → Read and follow: .claude/commands/debug/diagnose.md
+reproduce → Read and follow: .claude/commands/debug/reproduce.md
+fix → Read and follow: .claude/commands/debug/fix.md
+analyze-logs → Read and follow: .claude/commands/debug/analyze-logs.md
+performance → Read and follow: .claude/commands/debug/performance.md
+memory → Read and follow: .claude/commands/debug/memory.md
+```
+
+## Base Directory
+
+All operation files are located at: `.claude/commands/debug/`
+
+## Error Handling
+
+If no operation is specified or the operation is not recognized:
+
+**Available debugging operations**:
+- `/debug diagnose issue:"..." [environment:"..."] [logs:"..."]` - Comprehensive diagnosis
+- `/debug reproduce issue:"..." [environment:"..."] [data:"..."]` - Create reproduction strategy
+- `/debug fix issue:"..." root_cause:"..." [verification:"..."]` - Implement targeted fix
+- `/debug analyze-logs path:"..." [pattern:"..."] [timeframe:"..."]` - Deep log analysis
+- `/debug performance component:"..." [metric:"..."] [threshold:"..."]` - Performance debugging
+- `/debug memory component:"..." [symptom:"..."] [duration:"..."]` - Memory debugging
+
+**Example usage**:
+```
+/debug diagnose issue:"Users getting 500 errors on file upload" environment:"production" logs:"logs/app.log"
+/debug reproduce issue:"Payment webhook fails intermittently" environment:"staging" data:"sample-webhook-payload.json"
+/debug fix issue:"Race condition in order processing" root_cause:"Missing transaction lock" verification:"run-integration-tests"
+/debug analyze-logs path:"logs/application.log" pattern:"ERROR.*timeout" timeframe:"last-24h"
+/debug performance component:"api-endpoint:/orders" metric:"response-time" threshold:"200ms"
+/debug memory component:"background-worker" symptom:"growing-heap" duration:"6h"
+```
+
+Please specify an operation and provide the necessary parameters.
+
+## Integration with 10x-fullstack-engineer Agent
+
+All debugging operations are designed to work seamlessly with the 10x-fullstack-engineer agent, which provides:
+- Cross-stack debugging expertise
+- Systematic root cause analysis
+- Production-grade debugging strategies
+- Performance and security awareness
+- Prevention-focused mindset
+
+## Execution
+
+Based on the parsed operation from `$ARGUMENTS`, read the appropriate operation file and follow its instructions with the remaining parameters.