Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:49:43 +08:00
commit 55ae231efe
11 changed files with 1850 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
{
"name": "query-expert",
"description": "Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more.",
"version": "0.0.0-2025.11.28",
"author": {
"name": "James Rochabrun",
"email": "jamesrochabrun@gmail.com"
},
"skills": [
"./skills/query-expert"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# query-expert
Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more.

72
plugin.lock.json Normal file
View File

@@ -0,0 +1,72 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:jamesrochabrun/skills:query-expert",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "0495aa194524c0c13794bdb4b73f8eeb58d62849",
"treeHash": "92b6306bff831c94ab6c34534c48dd407ef68c06a44f745796e77e2060eeeae1",
"generatedAt": "2025-11-28T10:17:54.604144Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "query-expert",
"description": "Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more."
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "bfd411b887ab6bd4ca78801016079e0dddc4eddc9e18e83dbab08801e8d454cf"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "af2016bf964622dd672b9b34c60c05fb4a5ae11003753a080bc79d4149241781"
},
{
"path": "skills/query-expert/SKILL.md",
"sha256": "04ea2fe3a6d221358ae177de8e28a3fe3ea3676b6ef18fc17f48a46fa58237eb"
},
{
"path": "skills/query-expert/references/sql_patterns.md",
"sha256": "97d9027a39d3e23bc555e3a8041d5b2de0b50074d7b31a702928aff97cbed999"
},
{
"path": "skills/query-expert/references/indexing.md",
"sha256": "b4cc64e2e2a5569bcf0ca67ee8646813bbc89654719e835c567437f35b8fa08b"
},
{
"path": "skills/query-expert/references/nosql_queries.md",
"sha256": "a190b0d5e6ef88bab7dfb938c0da2262fffc126a092b7c0ab9e9ef1ce8093436"
},
{
"path": "skills/query-expert/references/optimization.md",
"sha256": "e7d5d7425d97ce0eec5f18bd8e4a335f936b763f57ce437c8ca01cd84257c22e"
},
{
"path": "skills/query-expert/scripts/optimize_query.sh",
"sha256": "5a6b8232245d512041e49051c580269fc473cc59471a22d6aa2bc61fa9268548"
},
{
"path": "skills/query-expert/scripts/analyze_performance.sh",
"sha256": "e40acef43ed7d63389beeeec9c27a3295355920e661769ef520157bd932c5a77"
},
{
"path": "skills/query-expert/scripts/generate_query.sh",
"sha256": "3fcc909157aec909c9b839dc6c5889dbe5d7e15c9c5ca2aa5e4d13ed45b17991"
}
],
"dirSha256": "92b6306bff831c94ab6c34534c48dd407ef68c06a44f745796e77e2060eeeae1"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}

View File

@@ -0,0 +1,805 @@
---
name: query-expert
description: Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more.
---
# Query Expert
Master database queries across SQL and NoSQL systems. Generate optimized queries, analyze performance with EXPLAIN plans, design effective indexes, and troubleshoot slow queries.
## What This Skill Does
Helps you write efficient, performant database queries:
- **Generate Queries** - SQL, MongoDB, GraphQL queries
- **Optimize Queries** - Performance tuning and refactoring
- **Design Indexes** - Index strategies for faster queries
- **Analyze Performance** - EXPLAIN plans and query analysis
- **Troubleshoot** - Debug slow queries and bottlenecks
- **Best Practices** - Query patterns and anti-patterns
## Supported Databases
### SQL Databases
- **PostgreSQL** - Advanced features, CTEs, window functions
- **MySQL/MariaDB** - InnoDB optimization, replication
- **SQLite** - Embedded database optimization
- **SQL Server** - T-SQL, execution plans, DMVs
- **Oracle** - PL/SQL, partitioning, hints
### NoSQL Databases
- **MongoDB** - Aggregation pipelines, indexes
- **Redis** - Key-value queries, Lua scripts
- **Elasticsearch** - Full-text search queries
- **Cassandra** - CQL, partition keys
### Query Languages
- **SQL** - Standard and vendor-specific
- **MongoDB Query Language** - Find, aggregation
- **GraphQL** - Efficient data fetching
- **Cypher** - Neo4j graph queries
## SQL Query Patterns
### SELECT Queries
#### Basic SELECT
```sql
-- ✅ Select only needed columns
SELECT
user_id,
email,
created_at
FROM users
WHERE status = 'active'
AND created_at > NOW() - INTERVAL '30 days'
ORDER BY created_at DESC
LIMIT 100;
-- ❌ Avoid SELECT *
SELECT * FROM users; -- Wastes resources
```
#### JOINs
```sql
-- INNER JOIN (most common)
SELECT
o.order_id,
o.total,
c.name AS customer_name,
c.email
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id
WHERE o.created_at >= '2024-01-01';
-- LEFT JOIN (include all left rows)
SELECT
c.customer_id,
c.name,
COUNT(o.order_id) AS order_count,
COALESCE(SUM(o.total), 0) AS total_spent
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
-- Multiple JOINs
SELECT
o.order_id,
c.name AS customer_name,
p.product_name,
oi.quantity,
oi.price
FROM orders o
INNER JOIN customers c ON o.customer_id = c.customer_id
INNER JOIN order_items oi ON o.order_id = oi.order_id
INNER JOIN products p ON oi.product_id = p.product_id
WHERE o.status = 'completed';
```
#### Subqueries
```sql
-- Subquery in WHERE
SELECT name, email
FROM customers
WHERE customer_id IN (
SELECT DISTINCT customer_id
FROM orders
WHERE total > 1000
);
-- Correlated subquery
SELECT
c.name,
(SELECT COUNT(*)
FROM orders o
WHERE o.customer_id = c.customer_id) AS order_count
FROM customers c;
-- ✅ Better: Use JOIN instead
SELECT
c.name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
```
### Aggregation
```sql
-- GROUP BY with aggregates
SELECT
category,
COUNT(*) AS product_count,
AVG(price) AS avg_price,
MIN(price) AS min_price,
MAX(price) AS max_price,
SUM(stock_quantity) AS total_stock
FROM products
GROUP BY category
HAVING COUNT(*) > 5
ORDER BY avg_price DESC;
-- Multiple GROUP BY columns
SELECT
DATE_TRUNC('month', created_at) AS month,
category,
SUM(total) AS monthly_sales
FROM orders
GROUP BY DATE_TRUNC('month', created_at), category
ORDER BY month DESC, monthly_sales DESC;
-- ROLLUP for subtotals
SELECT
COALESCE(category, 'TOTAL') AS category,
COALESCE(brand, 'All Brands') AS brand,
SUM(sales) AS total_sales
FROM products
GROUP BY ROLLUP(category, brand);
```
### Window Functions (PostgreSQL, SQL Server, MySQL 8+)
```sql
-- ROW_NUMBER
SELECT
customer_id,
order_date,
total,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY order_date DESC
) AS order_rank
FROM orders;
-- Running totals
SELECT
order_date,
total,
SUM(total) OVER (
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_total
FROM orders;
-- RANK vs DENSE_RANK
SELECT
product_name,
sales,
RANK() OVER (ORDER BY sales DESC) AS rank,
DENSE_RANK() OVER (ORDER BY sales DESC) AS dense_rank,
NTILE(4) OVER (ORDER BY sales DESC) AS quartile
FROM products;
-- LAG and LEAD
SELECT
order_date,
total,
LAG(total, 1) OVER (ORDER BY order_date) AS prev_total,
LEAD(total, 1) OVER (ORDER BY order_date) AS next_total,
total - LAG(total, 1) OVER (ORDER BY order_date) AS change
FROM orders;
```
### CTEs (Common Table Expressions)
```sql
-- Simple CTE
WITH active_customers AS (
SELECT customer_id, name, email
FROM customers
WHERE status = 'active'
)
SELECT
ac.name,
COUNT(o.order_id) AS order_count
FROM active_customers ac
LEFT JOIN orders o ON ac.customer_id = o.customer_id
GROUP BY ac.customer_id, ac.name;
-- Multiple CTEs
WITH
monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) AS month,
SUM(total) AS sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
),
avg_monthly AS (
SELECT AVG(sales) AS avg_sales
FROM monthly_sales
)
SELECT
ms.month,
ms.sales,
am.avg_sales,
ms.sales - am.avg_sales AS variance
FROM monthly_sales ms
CROSS JOIN avg_monthly am
ORDER BY ms.month;
-- Recursive CTE (hierarchies)
WITH RECURSIVE org_tree AS (
-- Base case
SELECT
employee_id,
name,
manager_id,
1 AS level,
ARRAY[employee_id] AS path
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- Recursive case
SELECT
e.employee_id,
e.name,
e.manager_id,
ot.level + 1,
ot.path || e.employee_id
FROM employees e
INNER JOIN org_tree ot ON e.manager_id = ot.employee_id
)
SELECT * FROM org_tree ORDER BY path;
```
## Query Optimization
### 1. Use Indexes Effectively
```sql
-- Create index on frequently queried columns
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
-- Composite index (order matters!)
CREATE INDEX idx_orders_composite
ON orders(status, customer_id, order_date);
-- ✅ This query uses the index
SELECT * FROM orders
WHERE status = 'pending'
AND customer_id = 123
AND order_date > '2024-01-01';
-- ❌ This doesn't use the index (skips first column)
SELECT * FROM orders
WHERE customer_id = 123;
-- Partial/Filtered index (smaller, faster)
CREATE INDEX idx_active_users
ON users(email)
WHERE status = 'active';
-- Covering index (includes all needed columns)
CREATE INDEX idx_users_covering
ON users(email)
INCLUDE (name, created_at);
```
### 2. Avoid SELECT *
```sql
-- ❌ Bad: Retrieves all columns
SELECT * FROM users;
-- ✅ Good: Select only needed columns
SELECT user_id, email, name FROM users;
-- ✅ Good: More efficient for joins
SELECT
u.user_id,
u.email,
o.order_id,
o.total
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id;
```
### 3. Optimize JOINs
```sql
-- ❌ Bad: Filtering after JOIN
SELECT u.name, o.total
FROM users u
LEFT JOIN orders o ON u.user_id = o.user_id
WHERE o.status = 'completed';
-- ✅ Good: Filter before JOIN
SELECT u.name, o.total
FROM users u
INNER JOIN (
SELECT user_id, total
FROM orders
WHERE status = 'completed'
) o ON u.user_id = o.user_id;
-- ✅ Even better: Use WHERE with INNER JOIN
SELECT u.name, o.total
FROM users u
INNER JOIN orders o ON u.user_id = o.user_id
WHERE o.status = 'completed';
```
### 4. Use EXISTS Instead of IN
```sql
-- ❌ Slower: IN with subquery
SELECT name FROM customers
WHERE customer_id IN (
SELECT customer_id FROM orders WHERE total > 1000
);
-- ✅ Faster: EXISTS
SELECT name FROM customers c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = c.customer_id
AND o.total > 1000
);
```
### 5. Avoid Functions on Indexed Columns
```sql
-- ❌ Bad: Function prevents index usage
SELECT * FROM users
WHERE LOWER(email) = 'john@example.com';
-- ✅ Good: Use functional index
CREATE INDEX idx_users_email_lower ON users(LOWER(email));
-- Or use case-insensitive collation
SELECT * FROM users
WHERE email = 'john@example.com' COLLATE utf8_general_ci;
```
### 6. Limit Result Sets
```sql
-- ✅ Use LIMIT/TOP for pagination
SELECT * FROM orders
ORDER BY created_at DESC
LIMIT 20 OFFSET 0;
-- ✅ Use WHERE to reduce rows early
SELECT * FROM orders
WHERE created_at > NOW() - INTERVAL '7 days'
ORDER BY created_at DESC;
```
### 7. Batch Operations
```sql
-- ❌ Bad: Multiple single inserts
INSERT INTO users (name, email) VALUES ('User1', 'user1@example.com');
INSERT INTO users (name, email) VALUES ('User2', 'user2@example.com');
-- ✅ Good: Batch insert
INSERT INTO users (name, email) VALUES
('User1', 'user1@example.com'),
('User2', 'user2@example.com'),
('User3', 'user3@example.com');
-- ✅ Good: Batch update
UPDATE products
SET price = price * 1.1
WHERE category IN ('Electronics', 'Computers');
```
## EXPLAIN Plans
### PostgreSQL
```sql
-- Simple EXPLAIN
EXPLAIN
SELECT * FROM orders WHERE customer_id = 123;
-- EXPLAIN ANALYZE (actually runs query)
EXPLAIN ANALYZE
SELECT
c.name,
COUNT(o.order_id) AS order_count
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id
GROUP BY c.customer_id, c.name;
-- Look for:
-- - Seq Scan (bad, needs index)
-- - Index Scan (good)
-- - Bitmap Heap Scan (good for multiple rows)
-- - Hash Join vs Nested Loop
-- - High cost numbers
```
### MySQL
```sql
-- EXPLAIN
EXPLAIN
SELECT * FROM orders WHERE customer_id = 123;
-- EXPLAIN ANALYZE (MySQL 8.0.18+)
EXPLAIN ANALYZE
SELECT * FROM orders WHERE customer_id = 123;
-- Look for:
-- - type: ALL (table scan, bad)
-- - type: index (index scan, good)
-- - type: ref (index lookup, great)
-- - Extra: Using filesort (may need index)
-- - Extra: Using temporary (may need optimization)
```
## Indexing Strategies
### When to Index
**✅ Index these columns:**
- Primary keys (automatic)
- Foreign keys
- Columns in WHERE clauses
- Columns in JOIN conditions
- Columns in ORDER BY
- Columns in GROUP BY
**❌ Don't index:**
- Small tables (< 1000 rows)
- Columns with low cardinality (few distinct values)
- Frequently updated columns
- Large text/blob columns
### Index Types
```sql
-- B-Tree (default, most common)
CREATE INDEX idx_users_email ON users(email);
-- Hash index (equality only, PostgreSQL)
CREATE INDEX idx_users_email_hash ON users USING HASH(email);
-- GIN (full-text search, arrays, JSONB)
CREATE INDEX idx_posts_content_gin
ON posts USING GIN(to_tsvector('english', content));
-- GiST (geometric, full-text)
CREATE INDEX idx_locations_gist
ON locations USING GIST(coordinates);
-- Partial index (filtered)
CREATE INDEX idx_orders_pending
ON orders(customer_id)
WHERE status = 'pending';
-- Expression index
CREATE INDEX idx_users_email_domain
ON users((email ~~ '%@gmail.com%'));
```
### Composite Index Order
```sql
-- Index column order matters!
CREATE INDEX idx_orders_search
ON orders(status, customer_id, created_at);
-- ✅ Uses index (left-most column)
WHERE status = 'completed'
-- ✅ Uses index (left-most columns)
WHERE status = 'completed' AND customer_id = 123
-- ✅ Uses full index
WHERE status = 'completed'
AND customer_id = 123
AND created_at > '2024-01-01'
-- ❌ Doesn't use index (skips first column)
WHERE customer_id = 123
-- ❌ Doesn't use index (skips first column)
WHERE created_at > '2024-01-01'
```
## MongoDB Queries
### Find Queries
```javascript
// Basic find
db.users.find({ status: 'active' })
// Find with projection
db.users.find(
{ status: 'active' },
{ name: 1, email: 1, _id: 0 }
)
// Find with operators
db.orders.find({
total: { $gt: 100, $lt: 1000 },
status: { $in: ['pending', 'processing'] },
'customer.city': 'New York'
})
// Find with sort and limit
db.products.find({ category: 'Electronics' })
.sort({ price: -1 })
.limit(10)
// Count
db.users.countDocuments({ status: 'active' })
```
### Aggregation Pipeline
```javascript
// Group and count
db.orders.aggregate([
{ $match: { status: 'completed' } },
{ $group: {
_id: '$customer_id',
total_orders: { $sum: 1 },
total_spent: { $sum: '$total' },
avg_order: { $avg: '$total' }
}},
{ $sort: { total_spent: -1 } },
{ $limit: 10 }
])
// Lookup (JOIN)
db.orders.aggregate([
{ $lookup: {
from: 'customers',
localField: 'customer_id',
foreignField: '_id',
as: 'customer'
}},
{ $unwind: '$customer' },
{ $project: {
order_id: 1,
total: 1,
'customer.name': 1,
'customer.email': 1
}}
])
// Complex aggregation
db.sales.aggregate([
// Filter
{ $match: {
date: { $gte: ISODate('2024-01-01') }
}},
// Add computed fields
{ $addFields: {
month: { $month: '$date' },
year: { $year: '$date' }
}},
// Group by month
{ $group: {
_id: { year: '$year', month: '$month' },
total_sales: { $sum: '$amount' },
order_count: { $sum: 1 },
avg_sale: { $avg: '$amount' }
}},
// Sort
{ $sort: { '_id.year': 1, '_id.month': 1 } },
// Reshape
{ $project: {
_id: 0,
date: {
$concat: [
{ $toString: '$_id.year' },
'-',
{ $toString: '$_id.month' }
]
},
total_sales: 1,
order_count: 1,
avg_sale: { $round: ['$avg_sale', 2] }
}}
])
```
### MongoDB Indexes
```javascript
// Single field index
db.users.createIndex({ email: 1 })
// Compound index
db.orders.createIndex({ customer_id: 1, created_at: -1 })
// Unique index
db.users.createIndex({ email: 1 }, { unique: true })
// Partial index
db.orders.createIndex(
{ customer_id: 1 },
{ partialFilterExpression: { status: 'active' } }
)
// Text index
db.products.createIndex({ name: 'text', description: 'text' })
// TTL index (auto-delete after time)
db.sessions.createIndex(
{ created_at: 1 },
{ expireAfterSeconds: 3600 }
)
// List indexes
db.users.getIndexes()
// Analyze query performance
db.orders.find({ customer_id: 123 }).explain('executionStats')
```
## GraphQL Queries
```graphql
# Basic query
query {
users {
id
name
email
}
}
# Query with arguments
query {
user(id: "123") {
name
email
orders {
id
total
status
}
}
}
# Query with variables
query GetUser($userId: ID!) {
user(id: $userId) {
name
email
orders(limit: 10, status: COMPLETED) {
id
total
createdAt
}
}
}
# Fragments (reusable fields)
fragment UserFields on User {
id
name
email
createdAt
}
query {
user(id: "123") {
...UserFields
orders {
id
total
}
}
}
# Avoid N+1 queries with DataLoader
query {
orders {
id
total
customer { # Batched by DataLoader
name
email
}
}
}
```
## Common Anti-Patterns
### ❌ N+1 Query Problem
```sql
-- Bad: N+1 queries
SELECT * FROM customers; -- 1 query
-- Then for each customer:
SELECT * FROM orders WHERE customer_id = ?; -- N queries
-- Good: Single JOIN query
SELECT
c.customer_id,
c.name,
o.order_id,
o.total
FROM customers c
LEFT JOIN orders o ON c.customer_id = o.customer_id;
```
### ❌ Using OR on Different Columns
```sql
-- Bad: Can't use indexes effectively
SELECT * FROM products
WHERE name = 'iPhone' OR category = 'Electronics';
-- Good: Use UNION
SELECT * FROM products WHERE name = 'iPhone'
UNION
SELECT * FROM products WHERE category = 'Electronics';
```
### ❌ Implicit Type Conversion
```sql
-- Bad: '123' is string, user_id is integer
SELECT * FROM users WHERE user_id = '123';
-- Good: Use correct type
SELECT * FROM users WHERE user_id = 123;
```
## Query Performance Checklist
- [ ] Select only needed columns (no SELECT *)
- [ ] Add indexes to WHERE/JOIN/ORDER BY columns
- [ ] Use EXPLAIN to analyze query plan
- [ ] Avoid functions on indexed columns
- [ ] Use EXISTS instead of IN for subqueries
- [ ] Batch INSERT/UPDATE operations
- [ ] Use appropriate JOIN types
- [ ] Filter early (WHERE before JOIN)
- [ ] Use LIMIT for large result sets
- [ ] Monitor slow query logs
- [ ] Update statistics regularly
- [ ] Avoid SELECT DISTINCT when possible
- [ ] Use covering indexes when appropriate
## Resources
- **PostgreSQL**: https://www.postgresql.org/docs/current/performance-tips.html
- **MySQL**: https://dev.mysql.com/doc/refman/8.0/en/optimization.html
- **MongoDB**: https://docs.mongodb.com/manual/core/query-optimization/
- **Use The Index, Luke**: https://use-the-index-luke.com/
---
**"Premature optimization is the root of all evil, but slow queries are the root of all frustration."**

View File

@@ -0,0 +1,7 @@
# Indexing Strategies - See SKILL.md for complete indexing guide including:
- When to create indexes
- Composite index column order
- Covering indexes
- Partial/filtered indexes
- Index types (B-Tree, Hash, GIN, GiST)
- Index maintenance

View File

@@ -0,0 +1,6 @@
# NoSQL Queries - See SKILL.md for complete NoSQL guide including:
- MongoDB find() queries
- MongoDB aggregation pipelines
- MongoDB indexes
- GraphQL queries
- Query optimization for NoSQL

View File

@@ -0,0 +1,9 @@
# Query Optimization - See SKILL.md for complete optimization guide including:
- Avoid SELECT *
- Use indexes effectively
- Optimize JOINs
- EXISTS vs IN
- Function usage on indexed columns
- LIMIT and pagination
- Batch operations
- Common anti-patterns

View File

@@ -0,0 +1,7 @@
# SQL Query Patterns - See SKILL.md for complete SQL guide including:
- SELECT queries with JOINs
- Aggregation with GROUP BY
- Window functions
- CTEs (Common Table Expressions)
- Subqueries
- Best practices and anti-patterns

View File

@@ -0,0 +1,297 @@
#!/bin/bash
# Query Expert - Performance Analyzer
# Analyze EXPLAIN output and provide optimization recommendations
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
print_info() {
echo -e "${BLUE} $1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
print_success() {
echo -e "${GREEN}$1${NC}"
}
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Performance Analyzer ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
print_info "Performance Analysis Guide"
echo ""
cat << 'EOF'
## PostgreSQL EXPLAIN Analysis
### Run EXPLAIN
```sql
EXPLAIN ANALYZE
SELECT * FROM orders WHERE customer_id = 123;
```
### What to Look For:
**Seq Scan (Table Scan) ❌**
→ Reading entire table
→ Fix: Add index on filtered column
→ CREATE INDEX idx_orders_customer ON orders(customer_id);
**Index Scan ✅**
→ Using index efficiently
→ Good performance for specific rows
**Bitmap Heap Scan ✅**
→ Good for returning multiple rows
→ Efficient index usage
**Nested Loop ⚠️**
→ Can be slow with large datasets
→ Consider: Hash Join or Merge Join
**Hash Join ✅**
→ Good for large joins
→ Requires memory
**Cost Numbers**
→ Higher = slower
→ Compare before/after optimization
→ cost=0.00..35.50 rows=10
**Actual Time**
→ Real execution time
→ actual time=0.023..0.156 rows=10 loops=1
---
## MySQL EXPLAIN Analysis
### Run EXPLAIN
```sql
EXPLAIN SELECT * FROM orders WHERE customer_id = 123;
```
### Type Column:
**ALL ❌**
→ Full table scan
→ Fix: Add index
**index ⚠️**
→ Full index scan
→ Better than ALL, but could improve
**range ✅**
→ Index range scan
→ Good for WHERE with >, <, BETWEEN
**ref ✅✅**
→ Index lookup
→ Excellent performance
**eq_ref ✅✅✅**
→ Unique index lookup
→ Best performance
**const ✅✅✅**
→ Constant lookup (primary key)
→ Fastest possible
### Extra Column:
**Using filesort ⚠️**
→ Sorting in memory/disk
→ Fix: Add index on ORDER BY columns
**Using temporary ⚠️**
→ Creating temporary table
→ Fix: Optimize GROUP BY or DISTINCT
**Using index ✅**
→ Covering index (index-only scan)
→ Excellent performance
**Using where ✅**
→ Filtering after read
→ Normal for WHERE clauses
---
## MongoDB Explain Analysis
### Run Explain
```javascript
db.orders.find({ customer_id: 123 }).explain("executionStats")
```
### What to Look For:
**COLLSCAN ❌**
→ Full collection scan
→ Fix: Create index
→ db.orders.createIndex({ customer_id: 1 })
**IXSCAN ✅**
→ Index scan
→ Good performance
**executionTimeMillis**
→ Total execution time
→ < 100ms good, > 1000ms needs optimization
**nReturned vs totalDocsExamined**
→ Efficiency ratio
→ Ideally close to 1:1
→ If totalDocsExamined >> nReturned, add index
**Index Usage**
→ indexName: "customer_id_1" ✅
→ indexName: null ❌ (no index used)
---
## Index Recommendations
### When to Create Index:
1. **WHERE Clause**
CREATE INDEX idx_table_column ON table(column);
2. **JOIN Columns**
CREATE INDEX idx_table_join_col ON table(join_column);
3. **ORDER BY**
CREATE INDEX idx_table_sort ON table(sort_column);
4. **Composite Index (order matters!)**
CREATE INDEX idx_multi ON table(col1, col2, col3);
→ Works for: col1 | col1,col2 | col1,col2,col3
→ NOT for: col2 | col3 | col2,col3
5. **Covering Index**
CREATE INDEX idx_covering ON table(filter_col) INCLUDE (select_cols);
→ Index contains all needed columns
→ Fastest possible (index-only scan)
### When NOT to Index:
- Small tables (< 1000 rows)
- Columns with low cardinality (few distinct values)
- Frequently updated columns
- Large text/blob columns
---
## Query Optimization Checklist
Performance Issues:
[ ] Check EXPLAIN plan
[ ] Look for table scans (Seq Scan, ALL, COLLSCAN)
[ ] Identify missing indexes
[ ] Check JOIN types (Nested Loop on large tables)
[ ] Look for filesort or temporary tables
[ ] Verify index usage (Using index)
Optimizations:
[ ] Create indexes on WHERE columns
[ ] Create indexes on JOIN columns
[ ] Use composite indexes (correct order)
[ ] Add covering indexes for frequent queries
[ ] Use LIMIT to reduce result set
[ ] Avoid SELECT * (select only needed columns)
[ ] Avoid functions on indexed columns
[ ] Use EXISTS instead of IN (subqueries)
[ ] Filter early (before JOIN)
[ ] Use appropriate JOIN type
Monitoring:
[ ] Run EXPLAIN ANALYZE before optimization
[ ] Create indexes
[ ] Run EXPLAIN ANALYZE after optimization
[ ] Compare execution time and cost
[ ] Test with production-like data volume
[ ] Monitor slow query log
---
## Example Optimization
### Before (Slow)
```sql
-- EXPLAIN shows: Seq Scan, cost=1000.00
SELECT * FROM orders WHERE customer_id = 123;
```
### Optimization Steps
```sql
-- 1. Create index
CREATE INDEX idx_orders_customer ON orders(customer_id);
-- 2. Optimize query (avoid SELECT *)
SELECT order_id, total, created_at
FROM orders
WHERE customer_id = 123
ORDER BY created_at DESC
LIMIT 100;
-- 3. Check improvement
EXPLAIN ANALYZE
SELECT order_id, total, created_at
FROM orders
WHERE customer_id = 123
ORDER BY created_at DESC
LIMIT 100;
```
### After (Fast)
```
→ Index Scan using idx_orders_customer
→ cost=0.29..15.50 (95% improvement!)
→ actual time=0.015..0.023
```
---
## Tools
**PostgreSQL:**
- EXPLAIN ANALYZE
- pg_stat_statements extension
- pgBadger (log analyzer)
**MySQL:**
- EXPLAIN
- SHOW PROFILE
- MySQL Workbench Performance Dashboard
**MongoDB:**
- explain("executionStats")
- MongoDB Compass (GUI)
- Database Profiler
---
EOF
print_success "Performance analysis guide displayed"
echo ""
print_info "Next Steps:"
echo " 1. Run EXPLAIN on your slow query"
echo " 2. Identify the bottleneck (table scan, no index, etc.)"
echo " 3. Apply recommended optimization"
echo " 4. Re-run EXPLAIN to verify improvement"
echo " 5. Test with production data volume"
echo ""

View File

@@ -0,0 +1,436 @@
#!/bin/bash
# Query Expert - Query Generator
# Generate optimized database queries with best practices
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
# Helper functions
print_success() {
echo -e "${GREEN}$1${NC}"
}
print_info() {
echo -e "${BLUE} $1${NC}"
}
print_warning() {
echo -e "${YELLOW}$1${NC}"
}
prompt_input() {
local prompt="$1"
local var_name="$2"
local required="${3:-false}"
while true; do
echo -e "${BLUE}${prompt}${NC}"
read -r input
if [ -z "$input" ] && [ "$required" = true ]; then
echo -e "${RED}This field is required.${NC}"
continue
fi
eval "$var_name='$input'"
break
done
}
prompt_select() {
local prompt="$1"
local var_name="$2"
shift 2
local options=("$@")
echo -e "${BLUE}${prompt}${NC}"
PS3="Select (1-${#options[@]}): "
select opt in "${options[@]}"; do
if [ -n "$opt" ]; then
eval "$var_name='$opt'"
break
else
echo -e "${RED}Invalid selection.${NC}"
fi
done
}
# Banner
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Query Generator ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
# Step 1: Database Type
print_info "Step 1/5: Database Type"
prompt_select "Which database?" DB_TYPE \
"PostgreSQL" \
"MySQL" \
"SQLite" \
"SQL Server" \
"MongoDB" \
"GraphQL"
# Step 2: Query Type
print_info "Step 2/5: Query Type"
case $DB_TYPE in
"MongoDB")
prompt_select "What type of query?" QUERY_TYPE \
"Find" \
"Aggregation" \
"Update" \
"Insert" \
"Delete"
;;
"GraphQL")
prompt_select "What type of query?" QUERY_TYPE \
"Query" \
"Mutation" \
"Subscription"
;;
*)
prompt_select "What type of query?" QUERY_TYPE \
"SELECT" \
"INSERT" \
"UPDATE" \
"DELETE" \
"JOIN" \
"Aggregate (GROUP BY)" \
"Window Function" \
"CTE (WITH)"
;;
esac
# Step 3: Table/Collection
print_info "Step 3/5: Target Table/Collection"
if [ "$DB_TYPE" = "MongoDB" ]; then
prompt_input "Collection name (e.g., users, orders):" TABLE_NAME true
else
prompt_input "Table name (e.g., users, orders):" TABLE_NAME true
fi
# Step 4: Columns/Fields
print_info "Step 4/5: Columns/Fields"
prompt_input "Columns to select (comma-separated, or * for all):" COLUMNS
COLUMNS=${COLUMNS:-"*"}
# Step 5: Conditions
print_info "Step 5/5: Conditions (optional)"
prompt_input "WHERE conditions (e.g., status = 'active'):" CONDITIONS
# Generate query based on selections
generate_sql_select() {
cat << EOF
-- Generated SELECT Query
-- Database: $DB_TYPE
-- Optimized for performance
SELECT
${COLUMNS//,/,${'\n'} }
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
cat << 'EOF'
-- Optional: Add ORDER BY
-- ORDER BY created_at DESC
-- Optional: Add LIMIT
-- LIMIT 100;
EOF
echo ""
print_info "Optimization Tips:"
echo " • Select only needed columns (avoid SELECT *)"
echo " • Add index on WHERE columns: CREATE INDEX idx_${TABLE_NAME}_${CONDITIONS%% *} ON $TABLE_NAME(${CONDITIONS%% *})"
echo " • Use LIMIT for large result sets"
echo " • Add ORDER BY for consistent results"
}
generate_sql_join() {
prompt_input "Second table name:" TABLE2
prompt_input "JOIN column (e.g., customer_id):" JOIN_COL
cat << EOF
-- Generated JOIN Query
-- Database: $DB_TYPE
SELECT
${TABLE_NAME:0:1}.${COLUMNS//,/,${'\n'} ${TABLE_NAME:0:1}.}
FROM $TABLE_NAME ${TABLE_NAME:0:1}
INNER JOIN $TABLE2 ${TABLE2:0:1}
ON ${TABLE_NAME:0:1}.$JOIN_COL = ${TABLE2:0:1}.$JOIN_COL
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
echo ";"
echo ""
print_info "JOIN Types:"
echo " • INNER JOIN - Only matching rows"
echo " • LEFT JOIN - All left rows + matching right"
echo " • RIGHT JOIN - All right rows + matching left"
echo " • FULL OUTER JOIN - All rows from both"
echo ""
print_info "Optimization:"
echo " • Add indexes on JOIN columns"
echo " • Filter early with WHERE"
echo " • Use INNER JOIN when possible"
}
generate_sql_aggregate() {
prompt_input "GROUP BY columns (comma-separated):" GROUP_COLS
prompt_input "Aggregate function (e.g., COUNT(*), SUM(amount)):" AGG_FUNC
cat << EOF
-- Generated Aggregate Query
-- Database: $DB_TYPE
SELECT
${GROUP_COLS//,/,${'\n'} },
$AGG_FUNC AS total
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo "WHERE $CONDITIONS"
fi
cat << EOF
GROUP BY ${GROUP_COLS//,/,${'\n'} }
-- Optional: Add HAVING for aggregate filters
-- HAVING COUNT(*) > 10
ORDER BY total DESC;
EOF
echo ""
print_info "Aggregate Functions:"
echo " • COUNT(*) - Count rows"
echo " • SUM(column) - Sum values"
echo " • AVG(column) - Average"
echo " • MIN/MAX(column) - Min/Max values"
}
generate_sql_cte() {
cat << EOF
-- Generated CTE (Common Table Expression)
-- Database: $DB_TYPE
WITH ${TABLE_NAME}_filtered AS (
SELECT
${COLUMNS//,/,${'\n'} }
FROM $TABLE_NAME
EOF
if [ -n "$CONDITIONS" ]; then
echo " WHERE $CONDITIONS"
fi
cat << 'EOF'
)
SELECT *
FROM table_filtered
-- Add JOINs or additional filtering here
;
EOF
echo ""
print_info "CTE Benefits:"
echo " • Improves readability"
echo " • Reusable within same query"
echo " • Supports recursion"
echo " • Better than subqueries in many cases"
}
generate_mongodb_find() {
cat << EOF
// Generated MongoDB Find Query
// Collection: $TABLE_NAME
db.$TABLE_NAME.find(
EOF
if [ -n "$CONDITIONS" ]; then
echo " { $CONDITIONS },"
else
echo " {},"
fi
if [ "$COLUMNS" != "*" ]; then
echo " { ${COLUMNS//,/: 1, }: 1, _id: 0 }"
else
echo " {}"
fi
cat << 'EOF'
)
.sort({ created_at: -1 })
.limit(100);
EOF
echo ""
print_info "MongoDB Optimization:"
echo " • Create index: db.$TABLE_NAME.createIndex({ field: 1 })"
echo " • Use projection to limit fields"
echo " • Add sort and limit for performance"
echo " • Use explain(): .explain('executionStats')"
}
generate_mongodb_aggregation() {
cat << EOF
// Generated MongoDB Aggregation Pipeline
// Collection: $TABLE_NAME
db.$TABLE_NAME.aggregate([
// Stage 1: Match (filter)
{ \$match: {
EOF
if [ -n "$CONDITIONS" ]; then
echo " $CONDITIONS"
fi
cat << 'EOF'
}},
// Stage 2: Group (aggregate)
{ $group: {
_id: '$field',
count: { $sum: 1 },
total: { $sum: '$amount' },
average: { $avg: '$amount' }
}},
// Stage 3: Sort
{ $sort: { total: -1 } },
// Stage 4: Limit
{ $limit: 10 }
]);
EOF
echo ""
print_info "Aggregation Stages:"
echo " • \$match - Filter documents"
echo " • \$group - Group and aggregate"
echo " • \$project - Reshape documents"
echo " • \$lookup - JOIN collections"
echo " • \$sort - Sort results"
echo " • \$limit - Limit results"
}
generate_graphql_query() {
cat << EOF
# Generated GraphQL Query
query Get${TABLE_NAME^} {
$TABLE_NAME {
${COLUMNS//,/
}
}
}
# With variables:
query Get${TABLE_NAME^}(\$id: ID!) {
${TABLE_NAME}(id: \$id) {
${COLUMNS//,/
}
}
}
EOF
echo ""
print_info "GraphQL Best Practices:"
echo " • Request only needed fields"
echo " • Use fragments for reusable fields"
echo " • Implement DataLoader to avoid N+1"
echo " • Add pagination (first, after)"
}
# Generate based on query type
case $DB_TYPE in
"MongoDB")
case $QUERY_TYPE in
"Find")
generate_mongodb_find
;;
"Aggregation")
generate_mongodb_aggregation
;;
esac
;;
"GraphQL")
generate_graphql_query
;;
*)
case $QUERY_TYPE in
"SELECT")
generate_sql_select
;;
"JOIN")
generate_sql_join
;;
"Aggregate (GROUP BY)")
generate_sql_aggregate
;;
"CTE (WITH)")
generate_sql_cte
;;
esac
;;
esac
# Summary
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ Query Generated ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
print_success "Query generated for $DB_TYPE"
print_success "Type: $QUERY_TYPE"
echo ""
print_info "Next steps:"
echo " 1. Review and test the query"
echo " 2. Add appropriate indexes"
echo " 3. Use EXPLAIN to analyze performance"
echo " 4. Add error handling in production"
echo " 5. Monitor query performance"
echo ""
print_info "Performance Tools:"
case $DB_TYPE in
"PostgreSQL")
echo " • EXPLAIN ANALYZE query"
echo " • pg_stat_statements extension"
;;
"MySQL")
echo " • EXPLAIN query"
echo " • SHOW PROFILE"
;;
"MongoDB")
echo " • query.explain('executionStats')"
echo " • db.collection.getIndexes()"
;;
esac
echo ""

View File

@@ -0,0 +1,196 @@
#!/bin/bash
# Query Expert - Query Optimizer
# Analyze and optimize SQL queries
set -e
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
MAGENTA='\033[0;35m'
NC='\033[0m'
PASS_COUNT=0
WARN_COUNT=0
FAIL_COUNT=0
print_success() {
echo -e "${GREEN}✓ GOOD${NC} $1"
((PASS_COUNT++))
}
print_warning() {
echo -e "${YELLOW}⚠ IMPROVE${NC} $1"
((WARN_COUNT++))
}
print_error() {
echo -e "${RED}✗ ISSUE${NC} $1"
((FAIL_COUNT++))
}
print_info() {
echo -e "${BLUE} INFO${NC} $1"
}
print_section() {
echo ""
echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${MAGENTA}$1${NC}"
echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
}
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ ║"
echo "║ Query Expert - Query Optimizer ║"
echo "║ ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
if [ -z "$1" ]; then
print_info "Usage: $0 <query-file.sql>"
print_info "Example: $0 slow_query.sql"
exit 1
fi
QUERY_FILE="$1"
if [ ! -f "$QUERY_FILE" ]; then
echo -e "${RED}File not found: $QUERY_FILE${NC}"
exit 1
fi
QUERY=$(cat "$QUERY_FILE")
# Section 1: SELECT * Detection
print_section "1. COLUMN SELECTION"
if echo "$QUERY" | grep -qi "SELECT \*"; then
print_error "Using SELECT * (selects all columns)"
echo " Fix: SELECT only needed columns"
echo " SELECT user_id, name, email FROM users;"
else
print_success "Selecting specific columns"
fi
# Section 2: Index Usage
print_section "2. INDEX OPPORTUNITIES"
if echo "$QUERY" | grep -qi "WHERE"; then
WHERE_COLS=$(echo "$QUERY" | grep -oi "WHERE [^;]*" | grep -o "[a-zA-Z_][a-zA-Z0-9_]*\s*=" | awk '{print $1}')
if [ -n "$WHERE_COLS" ]; then
print_info "Columns in WHERE clause should have indexes:"
for col in $WHERE_COLS; do
echo " CREATE INDEX idx_table_$col ON table($col);"
done
fi
fi
if echo "$QUERY" | grep -qi "JOIN.*ON"; then
print_info "JOIN columns should have indexes:"
echo " CREATE INDEX idx_table_join_col ON table(join_col);"
fi
if echo "$QUERY" | grep -qi "ORDER BY"; then
print_info "ORDER BY columns benefit from indexes:"
echo " Consider composite index with WHERE + ORDER BY columns"
fi
# Section 3: JOIN Analysis
print_section "3. JOIN OPTIMIZATION"
if echo "$QUERY" | grep -qi "LEFT JOIN" && echo "$QUERY" | grep -qi "WHERE"; then
print_warning "LEFT JOIN with WHERE on right table"
echo " Consider using INNER JOIN instead"
fi
if echo "$QUERY" | grep -qi "WHERE.*IN\s*(SELECT"; then
print_error "Using IN with subquery"
echo " Fix: Use EXISTS or JOIN instead"
echo " WHERE EXISTS (SELECT 1 FROM ...)"
fi
# Section 4: Function Usage
print_section "4. FUNCTION ON COLUMNS"
if echo "$QUERY" | grep -Eqi "WHERE.*(LOWER|UPPER|SUBSTRING|DATE|YEAR|MONTH)\s*\("; then
print_error "Function on indexed column in WHERE"
echo " Fix: Use functional index or avoid function"
echo " CREATE INDEX idx_table_lower_col ON table(LOWER(col));"
fi
# Section 5: DISTINCT Usage
print_section "5. DISTINCT USAGE"
if echo "$QUERY" | grep -qi "SELECT DISTINCT"; then
print_warning "Using DISTINCT (potentially expensive)"
echo " Consider: Is DISTINCT necessary?"
echo " Alternative: Use GROUP BY if aggregating"
fi
# Section 6: Subqueries
print_section "6. SUBQUERY OPTIMIZATION"
SUBQUERY_COUNT=$(echo "$QUERY" | grep -oi "SELECT" | wc -l)
if [ "$SUBQUERY_COUNT" -gt 1 ]; then
if echo "$QUERY" | grep -qi "FROM.*SELECT"; then
print_info "Contains subqueries - consider CTEs for readability"
echo " WITH cte AS (SELECT ...) SELECT ... FROM cte"
fi
fi
# Section 7: LIMIT Usage
print_section "7. RESULT SET SIZE"
if ! echo "$QUERY" | grep -qi "LIMIT\|TOP\|FETCH FIRST"; then
print_warning "No LIMIT clause found"
echo " Add LIMIT to prevent large result sets"
echo " SELECT ... LIMIT 100;"
fi
# Section 8: Sorting
print_section "8. SORTING"
if echo "$QUERY" | grep -qi "ORDER BY"; then
if ! echo "$QUERY" | grep -qi "LIMIT"; then
print_warning "ORDER BY without LIMIT"
echo " Consider adding LIMIT to reduce sort cost"
fi
fi
# Summary
echo ""
echo "╔════════════════════════════════════════════════════════════╗"
echo "║ Optimization Summary ║"
echo "╚════════════════════════════════════════════════════════════╝"
echo ""
echo -e "${GREEN}✓ Good practices: $PASS_COUNT${NC}"
echo -e "${YELLOW}⚠ Improvements: $WARN_COUNT${NC}"
echo -e "${RED}✗ Issues found: $FAIL_COUNT${NC}"
echo ""
TOTAL=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT))
if [ $TOTAL -gt 0 ]; then
SCORE=$(( ((PASS_COUNT * 2 + WARN_COUNT) * 100) / (TOTAL * 2) ))
echo "Query Score: $SCORE%"
echo ""
fi
print_info "Recommended Next Steps:"
echo " 1. Run EXPLAIN ANALYZE on this query"
echo " 2. Create recommended indexes"
echo " 3. Test query performance before/after"
echo " 4. Monitor query in production"
echo ""
print_info "EXPLAIN Commands:"
echo " PostgreSQL: EXPLAIN ANALYZE <query>"
echo " MySQL: EXPLAIN <query>"
echo " MongoDB: db.collection.find().explain('executionStats')"
echo ""
[ $FAIL_COUNT -gt 0 ] && exit 1 || exit 0