Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:48:55 +08:00
commit f28999f19c
127 changed files with 62038 additions and 0 deletions

View File

@@ -0,0 +1,805 @@
---
name: query-expert
description: Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more.
---
# Query Expert
Master database queries across SQL and NoSQL systems. Generate optimized queries, analyze performance with EXPLAIN plans, design effective indexes, and troubleshoot slow queries.
## What This Skill Does
Helps you write efficient, performant database queries:
- **Generate Queries** - SQL, MongoDB, GraphQL queries
- **Optimize Queries** - Performance tuning and refactoring
- **Design Indexes** - Index strategies for faster queries
- **Analyze Performance** - EXPLAIN plans and query analysis
- **Troubleshoot** - Debug slow queries and bottlenecks
- **Best Practices** - Query patterns and anti-patterns
## Supported Databases
### SQL Databases
- **PostgreSQL** - Advanced features, CTEs, window functions
- **MySQL/MariaDB** - InnoDB optimization, replication
- **SQLite** - Embedded database optimization
- **SQL Server** - T-SQL, execution plans, DMVs
- **Oracle** - PL/SQL, partitioning, hints
### NoSQL Databases
- **MongoDB** - Aggregation pipelines, indexes
- **Redis** - Key-value queries, Lua scripts
- **Elasticsearch** - Full-text search queries
- **Cassandra** - CQL, partition keys
### Query Languages
- **SQL** - Standard and vendor-specific
- **MongoDB Query Language** - Find, aggregation
- **GraphQL** - Efficient data fetching
- **Cypher** - Neo4j graph queries
## SQL Query Patterns
### SELECT Queries
#### Basic SELECT
```sql
-- ✅ Select only needed columns
SELECT
user_id,
email,
created_at
FROM users
WHERE status = 'active'
AND created_at > NOW() - INTERVAL '30 days'
ORDER BY created_at DESC
LIMIT 100;
-- ❌ Avoid SELECT *
SELECT * FROM users; -- Wastes resources
```
#### JOINs
```sql
-- INNER JOIN (most common)
SELECT
o.order_id,
o.total,
c.name AS customer_name,
c.email
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id
WHERE o.created_at >= '2024-01-01';
-- LEFT JOIN (include all left rows)
SELECT
c.customer_id,
c.name,
COUNT(o.order_id) AS order_count,
COALESCE(SUM(o.total), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
-- Multiple JOINs
SELECT
o.order_id,
c.name AS customer_name,
p.product_name,
oi.quantity,
oi.price
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
WHERE o.status = 'completed';
```
#### Subqueries
```sql
-- Subquery in WHERE
SELECT name, email
FROM customers
WHERE customer_id IN (
SELECT DISTINCT customer_id
FROM orders
WHERE total > 1000
);
-- Correlated subquery
SELECT
c.name,
(SELECT COUNT(*)
FROM orders o
WHERE o.customer_id = c.customer_id) AS order_count
FROM customers c;
-- ✅ Better: Use JOIN instead
SELECT
c.name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
```
### Aggregation
```sql
-- GROUP BY with aggregates
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
SUM(stock_quantity) AS total_stock
FROM products
GROUP BY category
HAVING COUNT(*) > 5
ORDER BY avg_price DESC;
-- Multiple GROUP BY columns
SELECT
DATE_TRUNC('month', created_at) AS month,
category,
SUM(total) AS monthly_sales
FROM orders
GROUP BY DATE_TRUNC('month', created_at), category
ORDER BY month DESC, monthly_sales DESC;
-- ROLLUP for subtotals
SELECT
COALESCE(category, 'TOTAL') AS category,
COALESCE(brand, 'All Brands') AS brand,
SUM(sales) AS total_sales
FROM products
GROUP BY ROLLUP(category, brand);
```
### Window Functions (PostgreSQL, SQL Server, MySQL 8+)
```sql
-- ROW_NUMBER
SELECT
customer_id,
order_date,
total,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY order_date DESC
) AS order_rank
FROM orders;
-- Running totals
SELECT
order_date,
total,
SUM(total) OVER (
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM orders;
-- RANK vs DENSE_RANK
SELECT
product_name,
sales,
RANK() OVER (ORDER BY sales DESC) AS rank,
DENSE_RANK() OVER (ORDER BY sales DESC) AS dense_rank,
NTILE(4) OVER (ORDER BY sales DESC) AS quartile
FROM products;
-- LAG and LEAD
SELECT
order_date,
total,
LAG(total, 1) OVER (ORDER BY order_date) AS prev_total,
LEAD(total, 1) OVER (ORDER BY order_date) AS next_total,
total - LAG(total, 1) OVER (ORDER BY order_date) AS change
FROM orders;
```
### CTEs (Common Table Expressions)
```sql
-- Simple CTE
WITH active_customers AS (
SELECT customer_id, name, email
FROM customers
WHERE status = 'active'
)
SELECT
ac.name,
COUNT(o.order_id) AS order_count
FROM active_customers ac
LEFT JOIN orders o ON ac.customer_id = o.customer_id
GROUP BY ac.customer_id, ac.name;
-- Multiple CTEs
WITH
monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total) AS sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
),
avg_monthly AS (
SELECT AVG(sales) AS avg_sales
FROM monthly_sales
)
SELECT
ms.month,
ms.sales,
am.avg_sales,
ms.sales - am.avg_sales AS variance
FROM monthly_sales ms
CROSS JOIN avg_monthly am
ORDER BY ms.month;
-- Recursive CTE (hierarchies)
WITH RECURSIVE org_tree AS (
-- Base case
SELECT
employee_id,
name,
manager_id,
1 AS level,
ARRAY[employee_id] AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT
e.employee_id,
e.name,
e.manager_id,
ot.level + 1,
ot.path || e.employee_id
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.employee_id
)
SELECT * FROM org_tree ORDER BY path;
```
## Query Optimization
### 1. Use Indexes Effectively
```sql
-- Create index on frequently queried columns
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
-- Composite index (order matters!)
CREATE INDEX idx_orders_composite
ON orders(status, customer_id, order_date);
-- ✅ This query uses the index
SELECT * FROM orders
WHERE status = 'pending'
AND customer_id = 123
AND order_date > '2024-01-01';
-- ❌ This doesn't use the index (skips first column)
SELECT * FROM orders
WHERE customer_id = 123;
-- Partial/Filtered index (smaller, faster)
CREATE INDEX idx_active_users
ON users(email)
WHERE status = 'active';
-- Covering index (includes all needed columns)
CREATE INDEX idx_users_covering
ON users(email)
INCLUDE (name, created_at);
```
### 2. Avoid SELECT *
```sql
-- ❌ Bad: Retrieves all columns
SELECT * FROM users;
-- ✅ Good: Select only needed columns
SELECT user_id, email, name FROM users;
-- ✅ Good: More efficient for joins
SELECT
u.user_id,
u.email,
o.order_id,
o.total
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;
```
### 3. Optimize JOINs
```sql
-- ❌ Bad: Filtering after JOIN
SELECT u.name, o.total
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id
WHERE o.status = 'completed';
-- ✅ Good: Filter before JOIN
SELECT u.name, o.total
FROM users u
INNER JOIN (
SELECT user_id, total
FROM orders
WHERE status = 'completed'
) o ON u.user_id = o.user_id;
-- ✅ Even better: Use WHERE with INNER JOIN
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.status = 'completed';
```
### 4. Use EXISTS Instead of IN
```sql
-- ❌ Slower: IN with subquery
SELECT name FROM customers
WHERE customer_id IN (
SELECT customer_id FROM orders WHERE total > 1000
);
-- ✅ Faster: EXISTS
SELECT name FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.total > 1000
);
```
### 5. Avoid Functions on Indexed Columns
```sql
-- ❌ Bad: Function prevents index usage
SELECT * FROM users
WHERE LOWER(email) = 'john@example.com';
-- ✅ Good: Use functional index
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Or use case-insensitive collation
SELECT * FROM users
WHERE email = 'john@example.com' COLLATE utf8_general_ci;
```
### 6. Limit Result Sets
```sql
-- ✅ Use LIMIT/TOP for pagination
SELECT * FROM orders
ORDER BY created_at DESC
LIMIT 20 OFFSET 0;
-- ✅ Use WHERE to reduce rows early
SELECT * FROM orders
WHERE created_at > NOW() - INTERVAL '7 days'
ORDER BY created_at DESC;
```
### 7. Batch Operations
```sql
-- ❌ Bad: Multiple single inserts
INSERT INTO users (name, email) VALUES ('User1', 'user1@example.com');
INSERT INTO users (name, email) VALUES ('User2', 'user2@example.com');
-- ✅ Good: Batch insert
INSERT INTO users (name, email) VALUES
('User1', 'user1@example.com'),
('User2', 'user2@example.com'),
('User3', 'user3@example.com');
-- ✅ Good: Batch update
UPDATE products
SET price = price * 1.1
WHERE category IN ('Electronics', 'Computers');
```
## EXPLAIN Plans
### PostgreSQL
```sql
-- Simple EXPLAIN
EXPLAIN
SELECT * FROM orders WHERE customer_id = 123;
-- EXPLAIN ANALYZE (actually runs query)
EXPLAIN ANALYZE
SELECT
c.name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
-- Look for:
-- - Seq Scan (bad, needs index)
-- - Index Scan (good)
-- - Bitmap Heap Scan (good for multiple rows)
-- - Hash Join vs Nested Loop
-- - High cost numbers
```
### MySQL
```sql
-- EXPLAIN
EXPLAIN
SELECT * FROM orders WHERE customer_id = 123;
-- EXPLAIN ANALYZE (MySQL 8.0.18+)
EXPLAIN ANALYZE
SELECT * FROM orders WHERE customer_id = 123;
-- Look for:
-- - type: ALL (table scan, bad)
-- - type: index (index scan, good)
-- - type: ref (index lookup, great)
-- - Extra: Using filesort (may need index)
-- - Extra: Using temporary (may need optimization)
```
## Indexing Strategies
### When to Index
**✅ Index these columns:**
- Primary keys (automatic)
- Foreign keys
- Columns in WHERE clauses
- Columns in JOIN conditions
- Columns in ORDER BY
- Columns in GROUP BY
**❌ Don't index:**
- Small tables (< 1000 rows)
- Columns with low cardinality (few distinct values)
- Frequently updated columns
- Large text/blob columns
### Index Types
```sql
-- B-Tree (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Hash index (equality only, PostgreSQL)
CREATE INDEX idx_users_email_hash ON users USING HASH(email);
-- GIN (full-text search, arrays, JSONB)
CREATE INDEX idx_posts_content_gin
ON posts USING GIN(to_tsvector('english', content));
-- GiST (geometric, full-text)
CREATE INDEX idx_locations_gist
ON locations USING GIST(coordinates);
-- Partial index (filtered)
CREATE INDEX idx_orders_pending
ON orders(customer_id)
WHERE status = 'pending';
-- Expression index
CREATE INDEX idx_users_email_domain
ON users((email ~~ '%@gmail.com%'));
```
### Composite Index Order
```sql
-- Index column order matters!
CREATE INDEX idx_orders_search
ON orders(status, customer_id, created_at);
-- ✅ Uses index (left-most column)
WHERE status = 'completed'
-- ✅ Uses index (left-most columns)
WHERE status = 'completed' AND customer_id = 123
-- ✅ Uses full index
WHERE status = 'completed'
AND customer_id = 123
AND created_at > '2024-01-01'
-- ❌ Doesn't use index (skips first column)
WHERE customer_id = 123
-- ❌ Doesn't use index (skips first column)
WHERE created_at > '2024-01-01'
```
## MongoDB Queries
### Find Queries
```javascript
// Basic find
db.users.find({ status: 'active' })
// Find with projection
db.users.find(
{ status: 'active' },
{ name: 1, email: 1, _id: 0 }
)
// Find with operators
db.orders.find({
total: { $gt: 100, $lt: 1000 },
status: { $in: ['pending', 'processing'] },
'customer.city': 'New York'
})
// Find with sort and limit
db.products.find({ category: 'Electronics' })
.sort({ price: -1 })
.limit(10)
// Count
db.users.countDocuments({ status: 'active' })
```
### Aggregation Pipeline
```javascript
// Group and count
db.orders.aggregate([
{ $match: { status: 'completed' } },
{ $group: {
_id: '$customer_id',
total_orders: { $sum: 1 },
total_spent: { $sum: '$total' },
avg_order: { $avg: '$total' }
}},
{ $sort: { total_spent: -1 } },
{ $limit: 10 }
])
// Lookup (JOIN)
db.orders.aggregate([
{ $lookup: {
from: 'customers',
localField: 'customer_id',
foreignField: '_id',
as: 'customer'
}},
{ $unwind: '$customer' },
{ $project: {
order_id: 1,
total: 1,
'customer.name': 1,
'customer.email': 1
}}
])
// Complex aggregation
db.sales.aggregate([
// Filter
{ $match: {
date: { $gte: ISODate('2024-01-01') }
}},
// Add computed fields
{ $addFields: {
month: { $month: '$date' },
year: { $year: '$date' }
}},
// Group by month
{ $group: {
_id: { year: '$year', month: '$month' },
total_sales: { $sum: '$amount' },
order_count: { $sum: 1 },
avg_sale: { $avg: '$amount' }
}},
// Sort
{ $sort: { '_id.year': 1, '_id.month': 1 } },
// Reshape
{ $project: {
_id: 0,
date: {
$concat: [
{ $toString: '$_id.year' },
'-',
{ $toString: '$_id.month' }
]
},
total_sales: 1,
order_count: 1,
avg_sale: { $round: ['$avg_sale', 2] }
}}
])
```
### MongoDB Indexes
```javascript
// Single field index
db.users.createIndex({ email: 1 })
// Compound index
db.orders.createIndex({ customer_id: 1, created_at: -1 })
// Unique index
db.users.createIndex({ email: 1 }, { unique: true })
// Partial index
db.orders.createIndex(
{ customer_id: 1 },
{ partialFilterExpression: { status: 'active' } }
)
// Text index
db.products.createIndex({ name: 'text', description: 'text' })
// TTL index (auto-delete after time)
db.sessions.createIndex(
{ created_at: 1 },
{ expireAfterSeconds: 3600 }
)
// List indexes
db.users.getIndexes()
// Analyze query performance
db.orders.find({ customer_id: 123 }).explain('executionStats')
```
## GraphQL Queries
```graphql
# Basic query
query {
users {
id
name
email
}
}
# Query with arguments
query {
user(id: "123") {
name
email
orders {
id
total
status
}
}
}
# Query with variables
query GetUser($userId: ID!) {
user(id: $userId) {
name
email
orders(limit: 10, status: COMPLETED) {
id
total
createdAt
}
}
}
# Fragments (reusable fields)
fragment UserFields on User {
id
name
email
createdAt
}
query {
user(id: "123") {
...UserFields
orders {
id
total
}
}
}
# Avoid N+1 queries with DataLoader
query {
orders {
id
total
customer { # Batched by DataLoader
name
email
}
}
}
```
## Common Anti-Patterns
### ❌ N+1 Query Problem
```sql
-- Bad: N+1 queries
SELECT * FROM customers; -- 1 query
-- Then for each customer:
SELECT * FROM orders WHERE customer_id = ?; -- N queries
-- Good: Single JOIN query
SELECT
c.customer_id,
c.name,
o.order_id,
o.total
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;
```
### ❌ Using OR on Different Columns
```sql
-- Bad: Can't use indexes effectively
SELECT * FROM products
WHERE name = 'iPhone' OR category = 'Electronics';
-- Good: Use UNION
SELECT * FROM products WHERE name = 'iPhone'
UNION
SELECT * FROM products WHERE category = 'Electronics';
```
### ❌ Implicit Type Conversion
```sql
-- Bad: '123' is string, user_id is integer
SELECT * FROM users WHERE user_id = '123';
-- Good: Use correct type
SELECT * FROM users WHERE user_id = 123;
```
## Query Performance Checklist
- [ ] Select only needed columns (no SELECT *)
- [ ] Add indexes to WHERE/JOIN/ORDER BY columns
- [ ] Use EXPLAIN to analyze query plan
- [ ] Avoid functions on indexed columns
- [ ] Use EXISTS instead of IN for subqueries
- [ ] Batch INSERT/UPDATE operations
- [ ] Use appropriate JOIN types
- [ ] Filter early (WHERE before JOIN)
- [ ] Use LIMIT for large result sets
- [ ] Monitor slow query logs
- [ ] Update statistics regularly
- [ ] Avoid SELECT DISTINCT when possible
- [ ] Use covering indexes when appropriate
## Resources
- **PostgreSQL**: https://www.postgresql.org/docs/current/performance-tips.html
- **MySQL**: https://dev.mysql.com/doc/refman/8.0/en/optimization.html
- **MongoDB**: https://docs.mongodb.com/manual/core/query-optimization/
- **Use The Index, Luke**: https://use-the-index-luke.com/
---
**"Premature optimization is the root of all evil, but slow queries are the root of all frustration."**

View File

@@ -0,0 +1,7 @@
# Indexing Strategies - See SKILL.md for complete indexing guide including:
- When to create indexes
- Composite index column order
- Covering indexes
- Partial/filtered indexes
- Index types (B-Tree, Hash, GIN, GiST)
- Index maintenance

View File

@@ -0,0 +1,6 @@
# NoSQL Queries - See SKILL.md for complete NoSQL guide including:
- MongoDB find() queries
- MongoDB aggregation pipelines
- MongoDB indexes
- GraphQL queries
- Query optimization for NoSQL

View File

@@ -0,0 +1,9 @@
# Query Optimization - See SKILL.md for complete optimization guide including:
- Avoid SELECT *
- Use indexes effectively
- Optimize JOINs
- EXISTS vs IN
- Function usage on indexed columns
- LIMIT and pagination
- Batch operations
- Common anti-patterns

View File

@@ -0,0 +1,7 @@
# SQL Query Patterns - See SKILL.md for complete SQL guide including:
- SELECT queries with JOINs
- Aggregation with GROUP BY
- Window functions
- CTEs (Common Table Expressions)
- Subqueries
- Best practices and anti-patterns

View File

@@ -0,0 +1,297 @@
#!/bin/bash
# Query Expert - Performance Analyzer
# Analyze EXPLAIN output and provide optimization recommendations
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() {
echo -e "${BLUE} $1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Performance Analyzer ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
print_info "Performance Analysis Guide"
echo ""
cat << 'EOF'
## PostgreSQL EXPLAIN Analysis
### Run EXPLAIN
```sql
EXPLAIN ANALYZE
SELECT * FROM orders WHERE customer_id = 123;
```
### What to Look For:
**Seq Scan (Table Scan) ❌**
→ Reading entire table
→ Fix: Add index on filtered column
→ CREATE INDEX idx_orders_customer ON orders(customer_id);
**Index Scan ✅**
→ Using index efficiently
→ Good performance for specific rows
**Bitmap Heap Scan ✅**
→ Good for returning multiple rows
→ Efficient index usage
**Nested Loop ⚠️**
→ Can be slow with large datasets
→ Consider: Hash Join or Merge Join
**Hash Join ✅**
→ Good for large joins
→ Requires memory
**Cost Numbers**
→ Higher = slower
→ Compare before/after optimization
→ cost=0.00..35.50 rows=10
**Actual Time**
→ Real execution time
→ actual time=0.023..0.156 rows=10 loops=1
---
## MySQL EXPLAIN Analysis
### Run EXPLAIN
```sql
EXPLAIN SELECT * FROM orders WHERE customer_id = 123;
```
### Type Column:
**ALL ❌**
→ Full table scan
→ Fix: Add index
**index ⚠️**
→ Full index scan
→ Better than ALL, but could improve
**range ✅**
→ Index range scan
→ Good for WHERE with >, <, BETWEEN
**ref ✅✅**
→ Index lookup
→ Excellent performance
**eq_ref ✅✅✅**
→ Unique index lookup
→ Best performance
**const ✅✅✅**
→ Constant lookup (primary key)
→ Fastest possible
### Extra Column:
**Using filesort ⚠️**
→ Sorting in memory/disk
→ Fix: Add index on ORDER BY columns
**Using temporary ⚠️**
→ Creating temporary table
→ Fix: Optimize GROUP BY or DISTINCT
**Using index ✅**
→ Covering index (index-only scan)
→ Excellent performance
**Using where ✅**
→ Filtering after read
→ Normal for WHERE clauses
---
## MongoDB Explain Analysis
### Run Explain
```javascript
db.orders.find({ customer_id: 123 }).explain("executionStats")
```
### What to Look For:
**COLLSCAN ❌**
→ Full collection scan
→ Fix: Create index
→ db.orders.createIndex({ customer_id: 1 })
**IXSCAN ✅**
→ Index scan
→ Good performance
**executionTimeMillis**
→ Total execution time
→ < 100ms good, > 1000ms needs optimization
**nReturned vs totalDocsExamined**
→ Efficiency ratio
→ Ideally close to 1:1
→ If totalDocsExamined >> nReturned, add index
**Index Usage**
→ indexName: "customer_id_1" ✅
→ indexName: null ❌ (no index used)
---
## Index Recommendations
### When to Create Index:
1. **WHERE Clause**
CREATE INDEX idx_table_column ON table(column);
2. **JOIN Columns**
CREATE INDEX idx_table_join_col ON table(join_column);
3. **ORDER BY**
CREATE INDEX idx_table_sort ON table(sort_column);
4. **Composite Index (order matters!)**
CREATE INDEX idx_multi ON table(col1, col2, col3);
→ Works for: col1 | col1,col2 | col1,col2,col3
→ NOT for: col2 | col3 | col2,col3
5. **Covering Index**
CREATE INDEX idx_covering ON table(filter_col) INCLUDE (select_cols);
→ Index contains all needed columns
→ Fastest possible (index-only scan)
### When NOT to Index:
- Small tables (< 1000 rows)
- Columns with low cardinality (few distinct values)
- Frequently updated columns
- Large text/blob columns
---
## Query Optimization Checklist
Performance Issues:
[ ] Check EXPLAIN plan
[ ] Look for table scans (Seq Scan, ALL, COLLSCAN)
[ ] Identify missing indexes
[ ] Check JOIN types (Nested Loop on large tables)
[ ] Look for filesort or temporary tables
[ ] Verify index usage (Using index)
Optimizations:
[ ] Create indexes on WHERE columns
[ ] Create indexes on JOIN columns
[ ] Use composite indexes (correct order)
[ ] Add covering indexes for frequent queries
[ ] Use LIMIT to reduce result set
[ ] Avoid SELECT * (select only needed columns)
[ ] Avoid functions on indexed columns
[ ] Use EXISTS instead of IN (subqueries)
[ ] Filter early (before JOIN)
[ ] Use appropriate JOIN type
Monitoring:
[ ] Run EXPLAIN ANALYZE before optimization
[ ] Create indexes
[ ] Run EXPLAIN ANALYZE after optimization
[ ] Compare execution time and cost
[ ] Test with production-like data volume
[ ] Monitor slow query log
---
## Example Optimization
### Before (Slow)
```sql
-- EXPLAIN shows: Seq Scan, cost=1000.00
SELECT * FROM orders WHERE customer_id = 123;
```
### Optimization Steps
```sql
-- 1. Create index
CREATE INDEX idx_orders_customer ON orders(customer_id);
-- 2. Optimize query (avoid SELECT *)
SELECT order_id, total, created_at
FROM orders
WHERE customer_id = 123
ORDER BY created_at DESC
LIMIT 100;
-- 3. Check improvement
EXPLAIN ANALYZE
SELECT order_id, total, created_at
FROM orders
WHERE customer_id = 123
ORDER BY created_at DESC
LIMIT 100;
```
### After (Fast)
```
→ Index Scan using idx_orders_customer
→ cost=0.29..15.50 (95% improvement!)
→ actual time=0.015..0.023
```
---
## Tools
**PostgreSQL:**
- EXPLAIN ANALYZE
- pg_stat_statements extension
- pgBadger (log analyzer)
**MySQL:**
- EXPLAIN
- SHOW PROFILE
- MySQL Workbench Performance Dashboard
**MongoDB:**
- explain("executionStats")
- MongoDB Compass (GUI)
- Database Profiler
---
EOF
print_success "Performance analysis guide displayed"
echo ""
print_info "Next Steps:"
echo " 1. Run EXPLAIN on your slow query"
echo " 2. Identify the bottleneck (table scan, no index, etc.)"
echo " 3. Apply recommended optimization"
echo " 4. Re-run EXPLAIN to verify improvement"
echo " 5. Test with production data volume"
echo ""

View File

@@ -0,0 +1,436 @@
#!/bin/bash
# Query Expert - Query Generator
# Generate optimized database queries with best practices
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Helper functions
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_info() {
echo -e "${BLUE} $1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
prompt_input() {
local prompt="$1"
local var_name="$2"
local required="${3:-false}"
while true; do
echo -e "${BLUE}${prompt}${NC}"
read -r input
if [ -z "$input" ] && [ "$required" = true ]; then
echo -e "${RED}This field is required.${NC}"
continue
fi
eval "$var_name='$input'"
break
done
}
prompt_select() {
local prompt="$1"
local var_name="$2"
shift 2
local options=("$@")
echo -e "${BLUE}${prompt}${NC}"
PS3="Select (1-${#options[@]}): "
select opt in "${options[@]}"; do
if [ -n "$opt" ]; then
eval "$var_name='$opt'"
break
else
echo -e "${RED}Invalid selection.${NC}"
fi
done
}
# Banner
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Query Generator ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
# Step 1: Database Type
print_info "Step 1/5: Database Type"
prompt_select "Which database?" DB_TYPE \
"PostgreSQL" \
"MySQL" \
"SQLite" \
"SQL Server" \
"MongoDB" \
"GraphQL"
# Step 2: Query Type
print_info "Step 2/5: Query Type"
case $DB_TYPE in
"MongoDB")
prompt_select "What type of query?" QUERY_TYPE \
"Find" \
"Aggregation" \
"Update" \
"Insert" \
"Delete"
;;
"GraphQL")
prompt_select "What type of query?" QUERY_TYPE \
"Query" \
"Mutation" \
"Subscription"
;;
*)
prompt_select "What type of query?" QUERY_TYPE \
"SELECT" \
"INSERT" \
"UPDATE" \
"DELETE" \
"JOIN" \
"Aggregate (GROUP BY)" \
"Window Function" \
"CTE (WITH)"
;;
esac
# Step 3: Table/Collection
print_info "Step 3/5: Target Table/Collection"
if [ "$DB_TYPE" = "MongoDB" ]; then
prompt_input "Collection name (e.g., users, orders):" TABLE_NAME true
else
prompt_input "Table name (e.g., users, orders):" TABLE_NAME true
fi
# Step 4: Columns/Fields
print_info "Step 4/5: Columns/Fields"
prompt_input "Columns to select (comma-separated, or * for all):" COLUMNS
COLUMNS=${COLUMNS:-"*"}
# Step 5: Conditions
print_info "Step 5/5: Conditions (optional)"
prompt_input "WHERE conditions (e.g., status = 'active'):" CONDITIONS
# Generate query based on selections
generate_sql_select() {
cat << EOF
-- Generated SELECT Query
-- Database: $DB_TYPE
-- Optimized for performance
SELECT
${COLUMNS//,/,${'\n'} }
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
cat << 'EOF'
-- Optional: Add ORDER BY
-- ORDER BY created_at DESC
-- Optional: Add LIMIT
-- LIMIT 100;
EOF
echo ""
print_info "Optimization Tips:"
echo " • Select only needed columns (avoid SELECT *)"
echo " • Add index on WHERE columns: CREATE INDEX idx_${TABLE_NAME}_${CONDITIONS%% *} ON $TABLE_NAME(${CONDITIONS%% *})"
echo " • Use LIMIT for large result sets"
echo " • Add ORDER BY for consistent results"
}
generate_sql_join() {
prompt_input "Second table name:" TABLE2
prompt_input "JOIN column (e.g., customer_id):" JOIN_COL
cat << EOF
-- Generated JOIN Query
-- Database: $DB_TYPE
SELECT
${TABLE_NAME:0:1}.${COLUMNS//,/,${'\n'} ${TABLE_NAME:0:1}.}
FROM $TABLE_NAME ${TABLE_NAME:0:1}
INNER JOIN $TABLE2 ${TABLE2:0:1}
ON ${TABLE_NAME:0:1}.$JOIN_COL = ${TABLE2:0:1}.$JOIN_COL
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
echo ";"
echo ""
print_info "JOIN Types:"
echo " • INNER JOIN - Only matching rows"
echo " • LEFT JOIN - All left rows + matching right"
echo " • RIGHT JOIN - All right rows + matching left"
echo " • FULL OUTER JOIN - All rows from both"
echo ""
print_info "Optimization:"
echo " • Add indexes on JOIN columns"
echo " • Filter early with WHERE"
echo " • Use INNER JOIN when possible"
}
generate_sql_aggregate() {
prompt_input "GROUP BY columns (comma-separated):" GROUP_COLS
prompt_input "Aggregate function (e.g., COUNT(*), SUM(amount)):" AGG_FUNC
cat << EOF
-- Generated Aggregate Query
-- Database: $DB_TYPE
SELECT
${GROUP_COLS//,/,${'\n'} },
$AGG_FUNC AS total
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
cat << EOF
GROUP BY ${GROUP_COLS//,/,${'\n'} }
-- Optional: Add HAVING for aggregate filters
-- HAVING COUNT(*) > 10
ORDER BY total DESC;
EOF
echo ""
print_info "Aggregate Functions:"
echo " • COUNT(*) - Count rows"
echo " • SUM(column) - Sum values"
echo " • AVG(column) - Average"
echo " • MIN/MAX(column) - Min/Max values"
}
generate_sql_cte() {
cat << EOF
-- Generated CTE (Common Table Expression)
-- Database: $DB_TYPE
WITH ${TABLE_NAME}_filtered AS (
SELECT
${COLUMNS//,/,${'\n'} }
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo " WHERE $CONDITIONS"
fi
cat << 'EOF'
)
SELECT *
FROM table_filtered
-- Add JOINs or additional filtering here
;
EOF
echo ""
print_info "CTE Benefits:"
echo " • Improves readability"
echo " • Reusable within same query"
echo " • Supports recursion"
echo " • Better than subqueries in many cases"
}
generate_mongodb_find() {
cat << EOF
// Generated MongoDB Find Query
// Collection: $TABLE_NAME
db.$TABLE_NAME.find(
EOF
if [ -n "$CONDITIONS" ]; then
echo " { $CONDITIONS },"
else
echo " {},"
fi
if [ "$COLUMNS" != "*" ]; then
echo " { ${COLUMNS//,/: 1, }: 1, _id: 0 }"
else
echo " {}"
fi
cat << 'EOF'
)
.sort({ created_at: -1 })
.limit(100);
EOF
echo ""
print_info "MongoDB Optimization:"
echo " • Create index: db.$TABLE_NAME.createIndex({ field: 1 })"
echo " • Use projection to limit fields"
echo " • Add sort and limit for performance"
echo " • Use explain(): .explain('executionStats')"
}
generate_mongodb_aggregation() {
cat << EOF
// Generated MongoDB Aggregation Pipeline
// Collection: $TABLE_NAME
db.$TABLE_NAME.aggregate([
// Stage 1: Match (filter)
{ \$match: {
EOF
if [ -n "$CONDITIONS" ]; then
echo " $CONDITIONS"
fi
cat << 'EOF'
}},
// Stage 2: Group (aggregate)
{ $group: {
_id: '$field',
count: { $sum: 1 },
total: { $sum: '$amount' },
average: { $avg: '$amount' }
}},
// Stage 3: Sort
{ $sort: { total: -1 } },
// Stage 4: Limit
{ $limit: 10 }
]);
EOF
echo ""
print_info "Aggregation Stages:"
echo " • \$match - Filter documents"
echo " • \$group - Group and aggregate"
echo " • \$project - Reshape documents"
echo " • \$lookup - JOIN collections"
echo " • \$sort - Sort results"
echo " • \$limit - Limit results"
}
generate_graphql_query() {
cat << EOF
# Generated GraphQL Query
query Get${TABLE_NAME^} {
$TABLE_NAME {
${COLUMNS//,/
}
}
}
# With variables:
query Get${TABLE_NAME^}(\$id: ID!) {
${TABLE_NAME}(id: \$id) {
${COLUMNS//,/
}
}
}
EOF
echo ""
print_info "GraphQL Best Practices:"
echo " • Request only needed fields"
echo " • Use fragments for reusable fields"
echo " • Implement DataLoader to avoid N+1"
echo " • Add pagination (first, after)"
}
# Generate based on query type
case $DB_TYPE in
"MongoDB")
case $QUERY_TYPE in
"Find")
generate_mongodb_find
;;
"Aggregation")
generate_mongodb_aggregation
;;
esac
;;
"GraphQL")
generate_graphql_query
;;
*)
case $QUERY_TYPE in
"SELECT")
generate_sql_select
;;
"JOIN")
generate_sql_join
;;
"Aggregate (GROUP BY)")
generate_sql_aggregate
;;
"CTE (WITH)")
generate_sql_cte
;;
esac
;;
esac
# Summary
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ Query Generated ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
print_success "Query generated for $DB_TYPE"
print_success "Type: $QUERY_TYPE"
echo ""
print_info "Next steps:"
echo " 1. Review and test the query"
echo " 2. Add appropriate indexes"
echo " 3. Use EXPLAIN to analyze performance"
echo " 4. Add error handling in production"
echo " 5. Monitor query performance"
echo ""
print_info "Performance Tools:"
case $DB_TYPE in
"PostgreSQL")
echo " • EXPLAIN ANALYZE query"
echo " • pg_stat_statements extension"
;;
"MySQL")
echo " • EXPLAIN query"
echo " • SHOW PROFILE"
;;
"MongoDB")
echo " • query.explain('executionStats')"
echo " • db.collection.getIndexes()"
;;
esac
echo ""

View File

@@ -0,0 +1,196 @@
#!/bin/bash
# Query Expert - Query Optimizer
# Analyze and optimize SQL queries
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
NC='\033[0m'
PASS_COUNT=0
WARN_COUNT=0
FAIL_COUNT=0
print_success() {
echo -e "${GREEN}✓ GOOD${NC} $1"
((PASS_COUNT++))
}
print_warning() {
echo -e "${YELLOW}⚠ IMPROVE${NC} $1"
((WARN_COUNT++))
}
print_error() {
echo -e "${RED}✗ ISSUE${NC} $1"
((FAIL_COUNT++))
}
print_info() {
echo -e "${BLUE} INFO${NC} $1"
}
print_section() {
echo ""
echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${MAGENTA}$1${NC}"
echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
}
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Query Optimizer ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
if [ -z "$1" ]; then
print_info "Usage: $0 <query-file.sql>"
print_info "Example: $0 slow_query.sql"
exit 1
fi
QUERY_FILE="$1"
if [ ! -f "$QUERY_FILE" ]; then
echo -e "${RED}File not found: $QUERY_FILE${NC}"
exit 1
fi
QUERY=$(cat "$QUERY_FILE")
# Section 1: SELECT * Detection
print_section "1. COLUMN SELECTION"
if echo "$QUERY" | grep -qi "SELECT \*"; then
print_error "Using SELECT * (selects all columns)"
echo " Fix: SELECT only needed columns"
echo " SELECT user_id, name, email FROM users;"
else
print_success "Selecting specific columns"
fi
# Section 2: Index Usage
print_section "2. INDEX OPPORTUNITIES"
if echo "$QUERY" | grep -qi "WHERE"; then
WHERE_COLS=$(echo "$QUERY" | grep -oi "WHERE [^;]*" | grep -o "[a-zA-Z_][a-zA-Z0-9_]*\s*=" | awk '{print $1}')
if [ -n "$WHERE_COLS" ]; then
print_info "Columns in WHERE clause should have indexes:"
for col in $WHERE_COLS; do
echo " CREATE INDEX idx_table_$col ON table($col);"
done
fi
fi
if echo "$QUERY" | grep -qi "JOIN.*ON"; then
print_info "JOIN columns should have indexes:"
echo " CREATE INDEX idx_table_join_col ON table(join_col);"
fi
if echo "$QUERY" | grep -qi "ORDER BY"; then
print_info "ORDER BY columns benefit from indexes:"
echo " Consider composite index with WHERE + ORDER BY columns"
fi
# Section 3: JOIN Analysis
print_section "3. JOIN OPTIMIZATION"
if echo "$QUERY" | grep -qi "LEFT JOIN" && echo "$QUERY" | grep -qi "WHERE"; then
print_warning "LEFT JOIN with WHERE on right table"
echo " Consider using INNER JOIN instead"
fi
if echo "$QUERY" | grep -qi "WHERE.*IN\s*(SELECT"; then
print_error "Using IN with subquery"
echo " Fix: Use EXISTS or JOIN instead"
echo " WHERE EXISTS (SELECT 1 FROM ...)"
fi
# Section 4: Function Usage
print_section "4. FUNCTION ON COLUMNS"
if echo "$QUERY" | grep -Eqi "WHERE.*(LOWER|UPPER|SUBSTRING|DATE|YEAR|MONTH)\s*\("; then
print_error "Function on indexed column in WHERE"
echo " Fix: Use functional index or avoid function"
echo " CREATE INDEX idx_table_lower_col ON table(LOWER(col));"
fi
# Section 5: DISTINCT Usage
print_section "5. DISTINCT USAGE"
if echo "$QUERY" | grep -qi "SELECT DISTINCT"; then
print_warning "Using DISTINCT (potentially expensive)"
echo " Consider: Is DISTINCT necessary?"
echo " Alternative: Use GROUP BY if aggregating"
fi
# Section 6: Subqueries
print_section "6. SUBQUERY OPTIMIZATION"
SUBQUERY_COUNT=$(echo "$QUERY" | grep -oi "SELECT" | wc -l)
if [ "$SUBQUERY_COUNT" -gt 1 ]; then
if echo "$QUERY" | grep -qi "FROM.*SELECT"; then
print_info "Contains subqueries - consider CTEs for readability"
echo " WITH cte AS (SELECT ...) SELECT ... FROM cte"
fi
fi
# Section 7: LIMIT Usage
print_section "7. RESULT SET SIZE"
if ! echo "$QUERY" | grep -qi "LIMIT\|TOP\|FETCH FIRST"; then
print_warning "No LIMIT clause found"
echo " Add LIMIT to prevent large result sets"
echo " SELECT ... LIMIT 100;"
fi
# Section 8: Sorting
print_section "8. SORTING"
if echo "$QUERY" | grep -qi "ORDER BY"; then
if ! echo "$QUERY" | grep -qi "LIMIT"; then
print_warning "ORDER BY without LIMIT"
echo " Consider adding LIMIT to reduce sort cost"
fi
fi
# Summary
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ Optimization Summary ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
echo -e "${GREEN}✓ Good practices: $PASS_COUNT${NC}"
echo -e "${YELLOW}⚠ Improvements: $WARN_COUNT${NC}"
echo -e "${RED}✗ Issues found: $FAIL_COUNT${NC}"
echo ""
TOTAL=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT))
if [ $TOTAL -gt 0 ]; then
SCORE=$(( ((PASS_COUNT * 2 + WARN_COUNT) * 100) / (TOTAL * 2) ))
echo "Query Score: $SCORE%"
echo ""
fi
print_info "Recommended Next Steps:"
echo " 1. Run EXPLAIN ANALYZE on this query"
echo " 2. Create recommended indexes"
echo " 3. Test query performance before/after"
echo " 4. Monitor query in production"
echo ""
print_info "EXPLAIN Commands:"
echo " PostgreSQL: EXPLAIN ANALYZE <query>"
echo " MySQL: EXPLAIN <query>"
echo " MongoDB: db.collection.find().explain('executionStats')"
echo ""
[ $FAIL_COUNT -gt 0 ] && exit 1 || exit 0