Files
gh-dhofheinz-open-plugins-p…/commands/optimize/.scripts/query-profiler.sh
2025-11-29 18:20:21 +08:00

227 lines
7.6 KiB
Bash
Executable File

#!/bin/bash
# Purpose: Profile database queries and identify slow operations
# Version: 1.0.0
# Usage: ./query-profiler.sh <database-url> [threshold-ms] [output-dir]
# Returns: 0=success, 1=profiling failed, 2=invalid arguments
# Dependencies: psql (PostgreSQL) or mysql (MySQL)
set -euo pipefail
# Color output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Arguments
DATABASE_URL="${1:-}"
THRESHOLD_MS="${2:-500}"
OUTPUT_DIR="${3:-./query-profiles}"
# Validate arguments
if [ -z "$DATABASE_URL" ]; then
echo -e "${RED}Error: Database URL is required${NC}"
echo "Usage: $0 <database-url> [threshold-ms] [output-dir]"
echo "Example: $0 postgresql://user:pass@localhost:5432/dbname 500 ./reports"
exit 2
fi
# Create output directory
mkdir -p "$OUTPUT_DIR"
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
echo -e "${GREEN}Starting database query profiling${NC}"
echo "Threshold: ${THRESHOLD_MS}ms"
echo "Output directory: $OUTPUT_DIR"
# Detect database type
if [[ "$DATABASE_URL" == postgresql://* ]] || [[ "$DATABASE_URL" == postgres://* ]]; then
DB_TYPE="postgresql"
elif [[ "$DATABASE_URL" == mysql://* ]]; then
DB_TYPE="mysql"
else
echo -e "${YELLOW}Warning: Could not detect database type from URL${NC}"
DB_TYPE="unknown"
fi
echo "Database type: $DB_TYPE"
# PostgreSQL profiling
if [ "$DB_TYPE" = "postgresql" ]; then
echo -e "\n${YELLOW}Running PostgreSQL query analysis...${NC}"
# Enable pg_stat_statements if not already enabled
psql "$DATABASE_URL" -c "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;" 2>/dev/null || true
# Get slow queries
echo "Finding slow queries (>${THRESHOLD_MS}ms)..."
psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" <<EOF
SELECT
substring(query, 1, 100) AS short_query,
calls,
round(mean_exec_time::numeric, 2) AS avg_time_ms,
round(max_exec_time::numeric, 2) AS max_time_ms,
round(total_exec_time::numeric, 2) AS total_time_ms,
round((100 * total_exec_time / sum(total_exec_time) OVER ())::numeric, 2) AS pct_total_time
FROM pg_stat_statements
WHERE mean_exec_time > ${THRESHOLD_MS}
AND query NOT LIKE '%pg_stat_statements%'
ORDER BY mean_exec_time DESC
LIMIT 50;
EOF
# Get most called queries
echo "Finding most frequently called queries..."
psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" <<EOF
SELECT
substring(query, 1, 100) AS short_query,
calls,
round(mean_exec_time::numeric, 2) AS avg_time_ms,
round(total_exec_time::numeric, 2) AS total_time_ms
FROM pg_stat_statements
WHERE query NOT LIKE '%pg_stat_statements%'
ORDER BY calls DESC
LIMIT 50;
EOF
# Get index usage statistics
echo "Analyzing index usage..."
psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" <<EOF
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) AS index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC
LIMIT 50;
EOF
# Find missing indexes (tables with sequential scans)
echo "Finding potential missing indexes..."
psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" <<EOF
SELECT
schemaname,
tablename,
seq_scan,
seq_tup_read,
idx_scan,
CASE WHEN seq_scan > 0 THEN seq_tup_read / seq_scan ELSE 0 END AS avg_seq_read,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS table_size
FROM pg_stat_user_tables
WHERE seq_scan > 1000
AND (idx_scan = 0 OR seq_scan > idx_scan)
ORDER BY seq_tup_read DESC
LIMIT 30;
EOF
# Table statistics
echo "Gathering table statistics..."
psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" <<EOF
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS total_size,
n_live_tup,
n_dead_tup,
CASE WHEN n_live_tup > 0 THEN round((n_dead_tup::float / n_live_tup::float) * 100, 2) ELSE 0 END AS dead_pct,
last_vacuum,
last_autovacuum
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
LIMIT 30;
EOF
# Generate text report
echo -e "\n${GREEN}=== Slow Queries Summary ===${NC}"
echo "Queries slower than ${THRESHOLD_MS}ms:"
head -10 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | column -t -s','
echo -e "\n${GREEN}=== Most Frequent Queries ===${NC}"
head -10 "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" | column -t -s','
echo -e "\n${GREEN}=== Potential Missing Indexes ===${NC}"
head -10 "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" | column -t -s','
echo -e "\n${YELLOW}=== Recommendations ===${NC}"
# Check for unused indexes
UNUSED_INDEXES=$(awk -F',' '$4 == 0' "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" | wc -l)
if [ "$UNUSED_INDEXES" -gt 0 ]; then
echo -e "${YELLOW}⚠ Found $UNUSED_INDEXES unused indexes (0 scans)${NC}"
echo " Consider removing to save space and improve write performance"
fi
# Check for missing indexes
MISSING_INDEXES=$(wc -l < "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv")
if [ "$MISSING_INDEXES" -gt 1 ]; then
echo -e "${YELLOW}⚠ Found $((MISSING_INDEXES - 1)) tables with high sequential scans${NC}"
echo " Consider adding indexes on frequently queried columns"
fi
# Check for bloated tables
BLOATED_TABLES=$(awk -F',' '$6 > 20' "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" | wc -l)
if [ "$BLOATED_TABLES" -gt 0 ]; then
echo -e "${YELLOW}⚠ Found $BLOATED_TABLES tables with >20% dead tuples${NC}"
echo " Run VACUUM ANALYZE on these tables"
fi
# MySQL profiling
elif [ "$DB_TYPE" = "mysql" ]; then
echo -e "\n${YELLOW}Running MySQL query analysis...${NC}"
# Enable slow query log temporarily
mysql "$DATABASE_URL" -e "SET GLOBAL slow_query_log = 'ON';" 2>/dev/null || true
mysql "$DATABASE_URL" -e "SET GLOBAL long_query_time = $(echo "scale=3; $THRESHOLD_MS/1000" | bc);" 2>/dev/null || true
echo "Analyzing query performance..."
mysql "$DATABASE_URL" -e "
SELECT
DIGEST_TEXT AS query,
COUNT_STAR AS exec_count,
ROUND(AVG_TIMER_WAIT/1000000000, 2) AS avg_time_ms,
ROUND(MAX_TIMER_WAIT/1000000000, 2) AS max_time_ms,
ROUND(SUM_TIMER_WAIT/1000000000, 2) AS total_time_ms
FROM performance_schema.events_statements_summary_by_digest
WHERE AVG_TIMER_WAIT/1000000000 > ${THRESHOLD_MS}
ORDER BY AVG_TIMER_WAIT DESC
LIMIT 50;
" > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt"
echo -e "${GREEN}Query analysis complete${NC}"
cat "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt"
else
echo -e "${RED}Error: Unsupported database type${NC}"
exit 1
fi
# Generate JSON summary
SLOW_QUERY_COUNT=$([ -f "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" ] && tail -n +1 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | wc -l || echo "0")
cat > "${OUTPUT_DIR}/summary-${TIMESTAMP}.json" <<EOF
{
"timestamp": "${TIMESTAMP}",
"databaseType": "${DB_TYPE}",
"thresholdMs": ${THRESHOLD_MS},
"slowQueryCount": ${SLOW_QUERY_COUNT},
"unusedIndexes": ${UNUSED_INDEXES:-0},
"potentialMissingIndexes": $((${MISSING_INDEXES:-1} - 1)),
"bloatedTables": ${BLOATED_TABLES:-0}
}
EOF
echo -e "\n${GREEN}✓ Query profiling complete${NC}"
echo "Results saved to:"
echo " - ${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv"
echo " - ${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv"
echo " - ${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv"
echo " - ${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv"
echo " - ${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv"
echo " - ${OUTPUT_DIR}/summary-${TIMESTAMP}.json"
exit 0