#!/bin/bash # Purpose: Profile database queries and identify slow operations # Version: 1.0.0 # Usage: ./query-profiler.sh [threshold-ms] [output-dir] # Returns: 0=success, 1=profiling failed, 2=invalid arguments # Dependencies: psql (PostgreSQL) or mysql (MySQL) set -euo pipefail # Color output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # Arguments DATABASE_URL="${1:-}" THRESHOLD_MS="${2:-500}" OUTPUT_DIR="${3:-./query-profiles}" # Validate arguments if [ -z "$DATABASE_URL" ]; then echo -e "${RED}Error: Database URL is required${NC}" echo "Usage: $0 [threshold-ms] [output-dir]" echo "Example: $0 postgresql://user:pass@localhost:5432/dbname 500 ./reports" exit 2 fi # Create output directory mkdir -p "$OUTPUT_DIR" TIMESTAMP=$(date +%Y%m%d-%H%M%S) echo -e "${GREEN}Starting database query profiling${NC}" echo "Threshold: ${THRESHOLD_MS}ms" echo "Output directory: $OUTPUT_DIR" # Detect database type if [[ "$DATABASE_URL" == postgresql://* ]] || [[ "$DATABASE_URL" == postgres://* ]]; then DB_TYPE="postgresql" elif [[ "$DATABASE_URL" == mysql://* ]]; then DB_TYPE="mysql" else echo -e "${YELLOW}Warning: Could not detect database type from URL${NC}" DB_TYPE="unknown" fi echo "Database type: $DB_TYPE" # PostgreSQL profiling if [ "$DB_TYPE" = "postgresql" ]; then echo -e "\n${YELLOW}Running PostgreSQL query analysis...${NC}" # Enable pg_stat_statements if not already enabled psql "$DATABASE_URL" -c "CREATE EXTENSION IF NOT EXISTS pg_stat_statements;" 2>/dev/null || true # Get slow queries echo "Finding slow queries (>${THRESHOLD_MS}ms)..." psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" < ${THRESHOLD_MS} AND query NOT LIKE '%pg_stat_statements%' ORDER BY mean_exec_time DESC LIMIT 50; EOF # Get most called queries echo "Finding most frequently called queries..." psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" < "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" < "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" < 0 THEN seq_tup_read / seq_scan ELSE 0 END AS avg_seq_read, pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS table_size FROM pg_stat_user_tables WHERE seq_scan > 1000 AND (idx_scan = 0 OR seq_scan > idx_scan) ORDER BY seq_tup_read DESC LIMIT 30; EOF # Table statistics echo "Gathering table statistics..." psql "$DATABASE_URL" -t -A -F"," > "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" < 0 THEN round((n_dead_tup::float / n_live_tup::float) * 100, 2) ELSE 0 END AS dead_pct, last_vacuum, last_autovacuum FROM pg_stat_user_tables ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC LIMIT 30; EOF # Generate text report echo -e "\n${GREEN}=== Slow Queries Summary ===${NC}" echo "Queries slower than ${THRESHOLD_MS}ms:" head -10 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | column -t -s',' echo -e "\n${GREEN}=== Most Frequent Queries ===${NC}" head -10 "${OUTPUT_DIR}/frequent-queries-${TIMESTAMP}.csv" | column -t -s',' echo -e "\n${GREEN}=== Potential Missing Indexes ===${NC}" head -10 "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv" | column -t -s',' echo -e "\n${YELLOW}=== Recommendations ===${NC}" # Check for unused indexes UNUSED_INDEXES=$(awk -F',' '$4 == 0' "${OUTPUT_DIR}/index-usage-${TIMESTAMP}.csv" | wc -l) if [ "$UNUSED_INDEXES" -gt 0 ]; then echo -e "${YELLOW}⚠ Found $UNUSED_INDEXES unused indexes (0 scans)${NC}" echo " Consider removing to save space and improve write performance" fi # Check for missing indexes MISSING_INDEXES=$(wc -l < "${OUTPUT_DIR}/missing-indexes-${TIMESTAMP}.csv") if [ "$MISSING_INDEXES" -gt 1 ]; then echo -e "${YELLOW}⚠ Found $((MISSING_INDEXES - 1)) tables with high sequential scans${NC}" echo " Consider adding indexes on frequently queried columns" fi # Check for bloated tables BLOATED_TABLES=$(awk -F',' '$6 > 20' "${OUTPUT_DIR}/table-stats-${TIMESTAMP}.csv" | wc -l) if [ "$BLOATED_TABLES" -gt 0 ]; then echo -e "${YELLOW}⚠ Found $BLOATED_TABLES tables with >20% dead tuples${NC}" echo " Run VACUUM ANALYZE on these tables" fi # MySQL profiling elif [ "$DB_TYPE" = "mysql" ]; then echo -e "\n${YELLOW}Running MySQL query analysis...${NC}" # Enable slow query log temporarily mysql "$DATABASE_URL" -e "SET GLOBAL slow_query_log = 'ON';" 2>/dev/null || true mysql "$DATABASE_URL" -e "SET GLOBAL long_query_time = $(echo "scale=3; $THRESHOLD_MS/1000" | bc);" 2>/dev/null || true echo "Analyzing query performance..." mysql "$DATABASE_URL" -e " SELECT DIGEST_TEXT AS query, COUNT_STAR AS exec_count, ROUND(AVG_TIMER_WAIT/1000000000, 2) AS avg_time_ms, ROUND(MAX_TIMER_WAIT/1000000000, 2) AS max_time_ms, ROUND(SUM_TIMER_WAIT/1000000000, 2) AS total_time_ms FROM performance_schema.events_statements_summary_by_digest WHERE AVG_TIMER_WAIT/1000000000 > ${THRESHOLD_MS} ORDER BY AVG_TIMER_WAIT DESC LIMIT 50; " > "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt" echo -e "${GREEN}Query analysis complete${NC}" cat "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.txt" else echo -e "${RED}Error: Unsupported database type${NC}" exit 1 fi # Generate JSON summary SLOW_QUERY_COUNT=$([ -f "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" ] && tail -n +1 "${OUTPUT_DIR}/slow-queries-${TIMESTAMP}.csv" | wc -l || echo "0") cat > "${OUTPUT_DIR}/summary-${TIMESTAMP}.json" <