Initial commit

2025-11-29 18:20:21 +08:00
commit bbbaf7acad
63 changed files with 38552 additions and 0 deletions
--- a/commands/optimize/.scripts/query-profiler.sh
+++ b/commands/optimize/.scripts/query-profiler.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# Purpose: Profile database queries and identify slow operations
+# Version: 1.0.0
+# Usage: ./query-profiler.sh <database-url> [threshold-ms] [output-dir]
+# Returns: 0=success, 1=profiling failed, 2=invalid arguments
+# Dependencies: psql (PostgreSQL) or mysql (MySQL)
+
+set -euo pipefail
+
+# Color output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+# Arguments
+DATABASE_URL="${1:-}"
+THRESHOLD_MS="${2:-500}"
+OUTPUT_DIR="${3:-./query-profiles}"
+
+# Validate arguments
+if [ -z "$DATABASE_URL" ]; then
+    echo -e "${RED}Error: Database URL is required${NC}"
+    echo "Usage: $0 <database-url> [threshold-ms] [output-dir]"
+    echo "Example: $0 postgresql://user:pass@localhost:5432/dbname 500 ./reports"
+    exit 2
+fi
+
+# Create output directory
+mkdir -p "$OUTPUT_DIR"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+
+echo -e "${GREEN}Starting database query profiling${NC}"
+echo "Threshold: ${THRESHOLD_MS}ms"
+echo "Output directory: $OUTPUT_DIR"
+
+# Detect database type
+if [[ "$DATABASE_URL" == postgresql://* ]] || [[ "$DATABASE_URL" == postgres://* ]]; then
+    DB_TYPE="postgresql"
+elif [[ "$DATABASE_URL" == mysql://* ]]; then
+    DB_TYPE="mysql"
+else
+    echo -e "${YELLOW}Warning: Could not detect database type from URL${NC}"
+    DB_TYPE="unknown"
+fi
+
+echo "Database type: $DB_TYPE"
+
+# PostgreSQL profiling
+if [ "$DB_TYPE" = "postgresql" ]; then
+    echo -e "\n${YELLOW}Running PostgreSQL query analysis...${NC}"
+
+    # Enable pg_stat_statements if not already enabled
+    psql "$DATABASE_URL" -c "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;" 2>/dev/null || true
+
+    # Get slow queries
+    echo "Finding slow queries (>${THRESHOLD_MS}ms)..."
+    psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" <<EOF
+SELECT
+    substring(query, 1, 100) AS short_query,
+    calls,
+    round(mean_exec_time::numeric, 2) AS avg_time_ms,
+    round(max_exec_time::numeric, 2) AS max_time_ms,
+    round(total_exec_time::numeric, 2) AS total_time_ms,
+    round((100 * total_exec_time / sum(total_exec_time) OVER ())::numeric, 2) AS pct_total_time
+FROM pg_stat_statements
+WHERE mean_exec_time > ${THRESHOLD_MS}
+    AND query NOT LIKE '%pg_stat_statements%'
+ORDER BY mean_exec_time DESC
+LIMIT 50;
+EOF
+
+    # Get most called queries
+    echo "Finding most frequently called queries..."
+    psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" <<EOF
+SELECT
+    substring(query, 1, 100) AS short_query,
+    calls,
+    round(mean_exec_time::numeric, 2) AS avg_time_ms,
+    round(total_exec_time::numeric, 2) AS total_time_ms
+FROM pg_stat_statements
+WHERE query NOT LIKE '%pg_stat_statements%'
+ORDER BY calls DESC
+LIMIT 50;
+EOF
+
+    # Get index usage statistics
+    echo "Analyzing index usage..."
+    psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" <<EOF
+SELECT
+    schemaname,
+    tablename,
+    indexname,
+    idx_scan,
+    idx_tup_read,
+    idx_tup_fetch,
+    pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
+FROM pg_stat_user_indexes
+ORDER BY idx_scan ASC
+LIMIT 50;
+EOF
+
+    # Find missing indexes (tables with sequential scans)
+    echo "Finding potential missing indexes..."
+    psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" <<EOF
+SELECT
+    schemaname,
+    tablename,
+    seq_scan,
+    seq_tup_read,
+    idx_scan,
+    CASE WHEN seq_scan > 0 THEN seq_tup_read / seq_scan ELSE 0 END AS avg_seq_read,
+    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS table_size
+FROM pg_stat_user_tables
+WHERE seq_scan > 1000
+    AND (idx_scan = 0 OR seq_scan > idx_scan)
+ORDER BY seq_tup_read DESC
+LIMIT 30;
+EOF
+
+    # Table statistics
+    echo "Gathering table statistics..."
+    psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" <<EOF
+SELECT
+    schemaname,
+    tablename,
+    pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
+    n_live_tup,
+    n_dead_tup,
+    CASE WHEN n_live_tup > 0 THEN round((n_dead_tup::float / n_live_tup::float) * 100, 2) ELSE 0 END AS dead_pct,
+    last_vacuum,
+    last_autovacuum
+FROM pg_stat_user_tables
+ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
+LIMIT 30;
+EOF
+
+    # Generate text report
+    echo -e "\n${GREEN}=== Slow Queries Summary ===${NC}"
+    echo "Queries slower than ${THRESHOLD_MS}ms:"
+    head -10 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | column -t -s','
+
+    echo -e "\n${GREEN}=== Most Frequent Queries ===${NC}"
+    head -10 "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" | column -t -s','
+
+    echo -e "\n${GREEN}=== Potential Missing Indexes ===${NC}"
+    head -10 "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" | column -t -s','
+
+    echo -e "\n${YELLOW}=== Recommendations ===${NC}"
+
+    # Check for unused indexes
+    UNUSED_INDEXES=$(awk -F',' '$4 == 0' "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" | wc -l)
+    if [ "$UNUSED_INDEXES" -gt 0 ]; then
+        echo -e "${YELLOW}⚠ Found $UNUSED_INDEXES unused indexes (0 scans)${NC}"
+        echo "  Consider removing to save space and improve write performance"
+    fi
+
+    # Check for missing indexes
+    MISSING_INDEXES=$(wc -l < "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv")
+    if [ "$MISSING_INDEXES" -gt 1 ]; then
+        echo -e "${YELLOW}⚠ Found $((MISSING_INDEXES - 1)) tables with high sequential scans${NC}"
+        echo "  Consider adding indexes on frequently queried columns"
+    fi
+
+    # Check for bloated tables
+    BLOATED_TABLES=$(awk -F',' '$6 > 20' "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" | wc -l)
+    if [ "$BLOATED_TABLES" -gt 0 ]; then
+        echo -e "${YELLOW}⚠ Found $BLOATED_TABLES tables with >20% dead tuples${NC}"
+        echo "  Run VACUUM ANALYZE on these tables"
+    fi
+
+# MySQL profiling
+elif [ "$DB_TYPE" = "mysql" ]; then
+    echo -e "\n${YELLOW}Running MySQL query analysis...${NC}"
+
+    # Enable slow query log temporarily
+    mysql "$DATABASE_URL" -e "SET GLOBAL slow_query_log = 'ON';" 2>/dev/null || true
+    mysql "$DATABASE_URL" -e "SET GLOBAL long_query_time = $(echo "scale=3; $THRESHOLD_MS/1000" | bc);" 2>/dev/null || true
+
+    echo "Analyzing query performance..."
+    mysql "$DATABASE_URL" -e "
+    SELECT
+        DIGEST_TEXT AS query,
+        COUNT_STAR AS exec_count,
+        ROUND(AVG_TIMER_WAIT/1000000000, 2) AS avg_time_ms,
+        ROUND(MAX_TIMER_WAIT/1000000000, 2) AS max_time_ms,
+        ROUND(SUM_TIMER_WAIT/1000000000, 2) AS total_time_ms
+    FROM performance_schema.events_statements_summary_by_digest
+    WHERE AVG_TIMER_WAIT/1000000000 > ${THRESHOLD_MS}
+    ORDER BY AVG_TIMER_WAIT DESC
+    LIMIT 50;
+    " > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt"
+
+    echo -e "${GREEN}Query analysis complete${NC}"
+    cat "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt"
+
+else
+    echo -e "${RED}Error: Unsupported database type${NC}"
+    exit 1
+fi
+
+# Generate JSON summary
+SLOW_QUERY_COUNT=$([ -f "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" ] && tail -n +1 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | wc -l || echo "0")
+
+cat > "${OUTPUT_DIR}/summary-${TIMESTAMP}.json" <<EOF
+{
+    "timestamp": "${TIMESTAMP}",
+    "databaseType": "${DB_TYPE}",
+    "thresholdMs": ${THRESHOLD_MS},
+    "slowQueryCount": ${SLOW_QUERY_COUNT},
+    "unusedIndexes": ${UNUSED_INDEXES:-0},
+    "potentialMissingIndexes": $((${MISSING_INDEXES:-1} - 1)),
+    "bloatedTables": ${BLOATED_TABLES:-0}
+}
+EOF
+
+echo -e "\n${GREEN}✓ Query profiling complete${NC}"
+echo "Results saved to:"
+echo "  - ${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv"
+echo "  - ${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv"
+echo "  - ${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv"
+echo "  - ${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv"
+echo "  - ${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv"
+echo "  - ${OUTPUT_DIR}/summary-${TIMESTAMP}.json"
+
+exit 0