From 55ae231efe01284b795df552ea4b967ccb577454 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:49:43 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 72 ++ skills/query-expert/SKILL.md | 805 ++++++++++++++++++ skills/query-expert/references/indexing.md | 7 + .../query-expert/references/nosql_queries.md | 6 + .../query-expert/references/optimization.md | 9 + .../query-expert/references/sql_patterns.md | 7 + .../scripts/analyze_performance.sh | 297 +++++++ skills/query-expert/scripts/generate_query.sh | 436 ++++++++++ skills/query-expert/scripts/optimize_query.sh | 196 +++++ 11 files changed, 1850 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/query-expert/SKILL.md create mode 100644 skills/query-expert/references/indexing.md create mode 100644 skills/query-expert/references/nosql_queries.md create mode 100644 skills/query-expert/references/optimization.md create mode 100644 skills/query-expert/references/sql_patterns.md create mode 100755 skills/query-expert/scripts/analyze_performance.sh create mode 100755 skills/query-expert/scripts/generate_query.sh create mode 100755 skills/query-expert/scripts/optimize_query.sh diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..1305c4c --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "query-expert", + "description": "Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more.", + "version": "0.0.0-2025.11.28", + "author": { + "name": "James Rochabrun", + "email": "jamesrochabrun@gmail.com" + }, + "skills": [ + "./skills/query-expert" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..31746a2 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# query-expert + +Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more. diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..6076d4f --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,72 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:jamesrochabrun/skills:query-expert", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "0495aa194524c0c13794bdb4b73f8eeb58d62849", + "treeHash": "92b6306bff831c94ab6c34534c48dd407ef68c06a44f745796e77e2060eeeae1", + "generatedAt": "2025-11-28T10:17:54.604144Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "query-expert", + "description": "Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more." + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "bfd411b887ab6bd4ca78801016079e0dddc4eddc9e18e83dbab08801e8d454cf" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "af2016bf964622dd672b9b34c60c05fb4a5ae11003753a080bc79d4149241781" + }, + { + "path": "skills/query-expert/SKILL.md", + "sha256": "04ea2fe3a6d221358ae177de8e28a3fe3ea3676b6ef18fc17f48a46fa58237eb" + }, + { + "path": "skills/query-expert/references/sql_patterns.md", + "sha256": "97d9027a39d3e23bc555e3a8041d5b2de0b50074d7b31a702928aff97cbed999" + }, + { + "path": "skills/query-expert/references/indexing.md", + "sha256": "b4cc64e2e2a5569bcf0ca67ee8646813bbc89654719e835c567437f35b8fa08b" + }, + { + "path": "skills/query-expert/references/nosql_queries.md", + "sha256": "a190b0d5e6ef88bab7dfb938c0da2262fffc126a092b7c0ab9e9ef1ce8093436" + }, + { + "path": "skills/query-expert/references/optimization.md", + "sha256": "e7d5d7425d97ce0eec5f18bd8e4a335f936b763f57ce437c8ca01cd84257c22e" + }, + { + "path": "skills/query-expert/scripts/optimize_query.sh", + "sha256": "5a6b8232245d512041e49051c580269fc473cc59471a22d6aa2bc61fa9268548" + }, + { + "path": "skills/query-expert/scripts/analyze_performance.sh", + "sha256": "e40acef43ed7d63389beeeec9c27a3295355920e661769ef520157bd932c5a77" + }, + { + "path": "skills/query-expert/scripts/generate_query.sh", + "sha256": "3fcc909157aec909c9b839dc6c5889dbe5d7e15c9c5ca2aa5e4d13ed45b17991" + } + ], + "dirSha256": "92b6306bff831c94ab6c34534c48dd407ef68c06a44f745796e77e2060eeeae1" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/query-expert/SKILL.md b/skills/query-expert/SKILL.md new file mode 100644 index 0000000..06bac09 --- /dev/null +++ b/skills/query-expert/SKILL.md @@ -0,0 +1,805 @@ +--- +name: query-expert +description: Master SQL and database queries across multiple systems. Generate optimized queries, analyze performance, design indexes, and troubleshoot slow queries for PostgreSQL, MySQL, MongoDB, and more. +--- + +# Query Expert + +Master database queries across SQL and NoSQL systems. Generate optimized queries, analyze performance with EXPLAIN plans, design effective indexes, and troubleshoot slow queries. + +## What This Skill Does + +Helps you write efficient, performant database queries: +- **Generate Queries** - SQL, MongoDB, GraphQL queries +- **Optimize Queries** - Performance tuning and refactoring +- **Design Indexes** - Index strategies for faster queries +- **Analyze Performance** - EXPLAIN plans and query analysis +- **Troubleshoot** - Debug slow queries and bottlenecks +- **Best Practices** - Query patterns and anti-patterns + +## Supported Databases + +### SQL Databases +- **PostgreSQL** - Advanced features, CTEs, window functions +- **MySQL/MariaDB** - InnoDB optimization, replication +- **SQLite** - Embedded database optimization +- **SQL Server** - T-SQL, execution plans, DMVs +- **Oracle** - PL/SQL, partitioning, hints + +### NoSQL Databases +- **MongoDB** - Aggregation pipelines, indexes +- **Redis** - Key-value queries, Lua scripts +- **Elasticsearch** - Full-text search queries +- **Cassandra** - CQL, partition keys + +### Query Languages +- **SQL** - Standard and vendor-specific +- **MongoDB Query Language** - Find, aggregation +- **GraphQL** - Efficient data fetching +- **Cypher** - Neo4j graph queries + +## SQL Query Patterns + +### SELECT Queries + +#### Basic SELECT + +```sql +-- ✅ Select only needed columns +SELECT + user_id, + email, + created_at +FROM users +WHERE status = 'active' + AND created_at > NOW() - INTERVAL '30 days' +ORDER BY created_at DESC +LIMIT 100; + +-- ❌ Avoid SELECT * +SELECT * FROM users; -- Wastes resources +``` + +#### JOINs + +```sql +-- INNER JOIN (most common) +SELECT + o.order_id, + o.total, + c.name AS customer_name, + c.email +FROM orders o +INNER JOIN customers c ON o.customer_id = c.customer_id +WHERE o.created_at >= '2024-01-01'; + +-- LEFT JOIN (include all left rows) +SELECT + c.customer_id, + c.name, + COUNT(o.order_id) AS order_count, + COALESCE(SUM(o.total), 0) AS total_spent +FROM customers c +LEFT JOIN orders o ON c.customer_id = o.customer_id +GROUP BY c.customer_id, c.name; + +-- Multiple JOINs +SELECT + o.order_id, + c.name AS customer_name, + p.product_name, + oi.quantity, + oi.price +FROM orders o +INNER JOIN customers c ON o.customer_id = c.customer_id +INNER JOIN order_items oi ON o.order_id = oi.order_id +INNER JOIN products p ON oi.product_id = p.product_id +WHERE o.status = 'completed'; +``` + +#### Subqueries + +```sql +-- Subquery in WHERE +SELECT name, email +FROM customers +WHERE customer_id IN ( + SELECT DISTINCT customer_id + FROM orders + WHERE total > 1000 +); + +-- Correlated subquery +SELECT + c.name, + (SELECT COUNT(*) + FROM orders o + WHERE o.customer_id = c.customer_id) AS order_count +FROM customers c; + +-- ✅ Better: Use JOIN instead +SELECT + c.name, + COUNT(o.order_id) AS order_count +FROM customers c +LEFT JOIN orders o ON c.customer_id = o.customer_id +GROUP BY c.customer_id, c.name; +``` + +### Aggregation + +```sql +-- GROUP BY with aggregates +SELECT + category, + COUNT(*) AS product_count, + AVG(price) AS avg_price, + MIN(price) AS min_price, + MAX(price) AS max_price, + SUM(stock_quantity) AS total_stock +FROM products +GROUP BY category +HAVING COUNT(*) > 5 +ORDER BY avg_price DESC; + +-- Multiple GROUP BY columns +SELECT + DATE_TRUNC('month', created_at) AS month, + category, + SUM(total) AS monthly_sales +FROM orders +GROUP BY DATE_TRUNC('month', created_at), category +ORDER BY month DESC, monthly_sales DESC; + +-- ROLLUP for subtotals +SELECT + COALESCE(category, 'TOTAL') AS category, + COALESCE(brand, 'All Brands') AS brand, + SUM(sales) AS total_sales +FROM products +GROUP BY ROLLUP(category, brand); +``` + +### Window Functions (PostgreSQL, SQL Server, MySQL 8+) + +```sql +-- ROW_NUMBER +SELECT + customer_id, + order_date, + total, + ROW_NUMBER() OVER ( + PARTITION BY customer_id + ORDER BY order_date DESC + ) AS order_rank +FROM orders; + +-- Running totals +SELECT + order_date, + total, + SUM(total) OVER ( + ORDER BY order_date + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS running_total +FROM orders; + +-- RANK vs DENSE_RANK +SELECT + product_name, + sales, + RANK() OVER (ORDER BY sales DESC) AS rank, + DENSE_RANK() OVER (ORDER BY sales DESC) AS dense_rank, + NTILE(4) OVER (ORDER BY sales DESC) AS quartile +FROM products; + +-- LAG and LEAD +SELECT + order_date, + total, + LAG(total, 1) OVER (ORDER BY order_date) AS prev_total, + LEAD(total, 1) OVER (ORDER BY order_date) AS next_total, + total - LAG(total, 1) OVER (ORDER BY order_date) AS change +FROM orders; +``` + +### CTEs (Common Table Expressions) + +```sql +-- Simple CTE +WITH active_customers AS ( + SELECT customer_id, name, email + FROM customers + WHERE status = 'active' +) +SELECT + ac.name, + COUNT(o.order_id) AS order_count +FROM active_customers ac +LEFT JOIN orders o ON ac.customer_id = o.customer_id +GROUP BY ac.customer_id, ac.name; + +-- Multiple CTEs +WITH +monthly_sales AS ( + SELECT + DATE_TRUNC('month', order_date) AS month, + SUM(total) AS sales + FROM orders + GROUP BY DATE_TRUNC('month', order_date) +), +avg_monthly AS ( + SELECT AVG(sales) AS avg_sales + FROM monthly_sales +) +SELECT + ms.month, + ms.sales, + am.avg_sales, + ms.sales - am.avg_sales AS variance +FROM monthly_sales ms +CROSS JOIN avg_monthly am +ORDER BY ms.month; + +-- Recursive CTE (hierarchies) +WITH RECURSIVE org_tree AS ( + -- Base case + SELECT + employee_id, + name, + manager_id, + 1 AS level, + ARRAY[employee_id] AS path + FROM employees + WHERE manager_id IS NULL + + UNION ALL + + -- Recursive case + SELECT + e.employee_id, + e.name, + e.manager_id, + ot.level + 1, + ot.path || e.employee_id + FROM employees e + INNER JOIN org_tree ot ON e.manager_id = ot.employee_id +) +SELECT * FROM org_tree ORDER BY path; +``` + +## Query Optimization + +### 1. Use Indexes Effectively + +```sql +-- Create index on frequently queried columns +CREATE INDEX idx_users_email ON users(email); +CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date); + +-- Composite index (order matters!) +CREATE INDEX idx_orders_composite +ON orders(status, customer_id, order_date); + +-- ✅ This query uses the index +SELECT * FROM orders +WHERE status = 'pending' + AND customer_id = 123 + AND order_date > '2024-01-01'; + +-- ❌ This doesn't use the index (skips first column) +SELECT * FROM orders +WHERE customer_id = 123; + +-- Partial/Filtered index (smaller, faster) +CREATE INDEX idx_active_users +ON users(email) +WHERE status = 'active'; + +-- Covering index (includes all needed columns) +CREATE INDEX idx_users_covering +ON users(email) +INCLUDE (name, created_at); +``` + +### 2. Avoid SELECT * + +```sql +-- ❌ Bad: Retrieves all columns +SELECT * FROM users; + +-- ✅ Good: Select only needed columns +SELECT user_id, email, name FROM users; + +-- ✅ Good: More efficient for joins +SELECT + u.user_id, + u.email, + o.order_id, + o.total +FROM users u +INNER JOIN orders o ON u.user_id = o.user_id; +``` + +### 3. Optimize JOINs + +```sql +-- ❌ Bad: Filtering after JOIN +SELECT u.name, o.total +FROM users u +LEFT JOIN orders o ON u.user_id = o.user_id +WHERE o.status = 'completed'; + +-- ✅ Good: Filter before JOIN +SELECT u.name, o.total +FROM users u +INNER JOIN ( + SELECT user_id, total + FROM orders + WHERE status = 'completed' +) o ON u.user_id = o.user_id; + +-- ✅ Even better: Use WHERE with INNER JOIN +SELECT u.name, o.total +FROM users u +INNER JOIN orders o ON u.user_id = o.user_id +WHERE o.status = 'completed'; +``` + +### 4. Use EXISTS Instead of IN + +```sql +-- ❌ Slower: IN with subquery +SELECT name FROM customers +WHERE customer_id IN ( + SELECT customer_id FROM orders WHERE total > 1000 +); + +-- ✅ Faster: EXISTS +SELECT name FROM customers c +WHERE EXISTS ( + SELECT 1 FROM orders o + WHERE o.customer_id = c.customer_id + AND o.total > 1000 +); +``` + +### 5. Avoid Functions on Indexed Columns + +```sql +-- ❌ Bad: Function prevents index usage +SELECT * FROM users +WHERE LOWER(email) = 'john@example.com'; + +-- ✅ Good: Use functional index +CREATE INDEX idx_users_email_lower ON users(LOWER(email)); + +-- Or use case-insensitive collation +SELECT * FROM users +WHERE email = 'john@example.com' COLLATE utf8_general_ci; +``` + +### 6. Limit Result Sets + +```sql +-- ✅ Use LIMIT/TOP for pagination +SELECT * FROM orders +ORDER BY created_at DESC +LIMIT 20 OFFSET 0; + +-- ✅ Use WHERE to reduce rows early +SELECT * FROM orders +WHERE created_at > NOW() - INTERVAL '7 days' +ORDER BY created_at DESC; +``` + +### 7. Batch Operations + +```sql +-- ❌ Bad: Multiple single inserts +INSERT INTO users (name, email) VALUES ('User1', 'user1@example.com'); +INSERT INTO users (name, email) VALUES ('User2', 'user2@example.com'); + +-- ✅ Good: Batch insert +INSERT INTO users (name, email) VALUES + ('User1', 'user1@example.com'), + ('User2', 'user2@example.com'), + ('User3', 'user3@example.com'); + +-- ✅ Good: Batch update +UPDATE products +SET price = price * 1.1 +WHERE category IN ('Electronics', 'Computers'); +``` + +## EXPLAIN Plans + +### PostgreSQL + +```sql +-- Simple EXPLAIN +EXPLAIN +SELECT * FROM orders WHERE customer_id = 123; + +-- EXPLAIN ANALYZE (actually runs query) +EXPLAIN ANALYZE +SELECT + c.name, + COUNT(o.order_id) AS order_count +FROM customers c +LEFT JOIN orders o ON c.customer_id = o.customer_id +GROUP BY c.customer_id, c.name; + +-- Look for: +-- - Seq Scan (bad, needs index) +-- - Index Scan (good) +-- - Bitmap Heap Scan (good for multiple rows) +-- - Hash Join vs Nested Loop +-- - High cost numbers +``` + +### MySQL + +```sql +-- EXPLAIN +EXPLAIN +SELECT * FROM orders WHERE customer_id = 123; + +-- EXPLAIN ANALYZE (MySQL 8.0.18+) +EXPLAIN ANALYZE +SELECT * FROM orders WHERE customer_id = 123; + +-- Look for: +-- - type: ALL (table scan, bad) +-- - type: index (index scan, good) +-- - type: ref (index lookup, great) +-- - Extra: Using filesort (may need index) +-- - Extra: Using temporary (may need optimization) +``` + +## Indexing Strategies + +### When to Index + +**✅ Index these columns:** +- Primary keys (automatic) +- Foreign keys +- Columns in WHERE clauses +- Columns in JOIN conditions +- Columns in ORDER BY +- Columns in GROUP BY + +**❌ Don't index:** +- Small tables (< 1000 rows) +- Columns with low cardinality (few distinct values) +- Frequently updated columns +- Large text/blob columns + +### Index Types + +```sql +-- B-Tree (default, most common) +CREATE INDEX idx_users_email ON users(email); + +-- Hash index (equality only, PostgreSQL) +CREATE INDEX idx_users_email_hash ON users USING HASH(email); + +-- GIN (full-text search, arrays, JSONB) +CREATE INDEX idx_posts_content_gin +ON posts USING GIN(to_tsvector('english', content)); + +-- GiST (geometric, full-text) +CREATE INDEX idx_locations_gist +ON locations USING GIST(coordinates); + +-- Partial index (filtered) +CREATE INDEX idx_orders_pending +ON orders(customer_id) +WHERE status = 'pending'; + +-- Expression index +CREATE INDEX idx_users_email_domain +ON users((email ~~ '%@gmail.com%')); +``` + +### Composite Index Order + +```sql +-- Index column order matters! +CREATE INDEX idx_orders_search +ON orders(status, customer_id, created_at); + +-- ✅ Uses index (left-most column) +WHERE status = 'completed' + +-- ✅ Uses index (left-most columns) +WHERE status = 'completed' AND customer_id = 123 + +-- ✅ Uses full index +WHERE status = 'completed' + AND customer_id = 123 + AND created_at > '2024-01-01' + +-- ❌ Doesn't use index (skips first column) +WHERE customer_id = 123 + +-- ❌ Doesn't use index (skips first column) +WHERE created_at > '2024-01-01' +``` + +## MongoDB Queries + +### Find Queries + +```javascript +// Basic find +db.users.find({ status: 'active' }) + +// Find with projection +db.users.find( + { status: 'active' }, + { name: 1, email: 1, _id: 0 } +) + +// Find with operators +db.orders.find({ + total: { $gt: 100, $lt: 1000 }, + status: { $in: ['pending', 'processing'] }, + 'customer.city': 'New York' +}) + +// Find with sort and limit +db.products.find({ category: 'Electronics' }) + .sort({ price: -1 }) + .limit(10) + +// Count +db.users.countDocuments({ status: 'active' }) +``` + +### Aggregation Pipeline + +```javascript +// Group and count +db.orders.aggregate([ + { $match: { status: 'completed' } }, + { $group: { + _id: '$customer_id', + total_orders: { $sum: 1 }, + total_spent: { $sum: '$total' }, + avg_order: { $avg: '$total' } + }}, + { $sort: { total_spent: -1 } }, + { $limit: 10 } +]) + +// Lookup (JOIN) +db.orders.aggregate([ + { $lookup: { + from: 'customers', + localField: 'customer_id', + foreignField: '_id', + as: 'customer' + }}, + { $unwind: '$customer' }, + { $project: { + order_id: 1, + total: 1, + 'customer.name': 1, + 'customer.email': 1 + }} +]) + +// Complex aggregation +db.sales.aggregate([ + // Filter + { $match: { + date: { $gte: ISODate('2024-01-01') } + }}, + + // Add computed fields + { $addFields: { + month: { $month: '$date' }, + year: { $year: '$date' } + }}, + + // Group by month + { $group: { + _id: { year: '$year', month: '$month' }, + total_sales: { $sum: '$amount' }, + order_count: { $sum: 1 }, + avg_sale: { $avg: '$amount' } + }}, + + // Sort + { $sort: { '_id.year': 1, '_id.month': 1 } }, + + // Reshape + { $project: { + _id: 0, + date: { + $concat: [ + { $toString: '$_id.year' }, + '-', + { $toString: '$_id.month' } + ] + }, + total_sales: 1, + order_count: 1, + avg_sale: { $round: ['$avg_sale', 2] } + }} +]) +``` + +### MongoDB Indexes + +```javascript +// Single field index +db.users.createIndex({ email: 1 }) + +// Compound index +db.orders.createIndex({ customer_id: 1, created_at: -1 }) + +// Unique index +db.users.createIndex({ email: 1 }, { unique: true }) + +// Partial index +db.orders.createIndex( + { customer_id: 1 }, + { partialFilterExpression: { status: 'active' } } +) + +// Text index +db.products.createIndex({ name: 'text', description: 'text' }) + +// TTL index (auto-delete after time) +db.sessions.createIndex( + { created_at: 1 }, + { expireAfterSeconds: 3600 } +) + +// List indexes +db.users.getIndexes() + +// Analyze query performance +db.orders.find({ customer_id: 123 }).explain('executionStats') +``` + +## GraphQL Queries + +```graphql +# Basic query +query { + users { + id + name + email + } +} + +# Query with arguments +query { + user(id: "123") { + name + email + orders { + id + total + status + } + } +} + +# Query with variables +query GetUser($userId: ID!) { + user(id: $userId) { + name + email + orders(limit: 10, status: COMPLETED) { + id + total + createdAt + } + } +} + +# Fragments (reusable fields) +fragment UserFields on User { + id + name + email + createdAt +} + +query { + user(id: "123") { + ...UserFields + orders { + id + total + } + } +} + +# Avoid N+1 queries with DataLoader +query { + orders { + id + total + customer { # Batched by DataLoader + name + email + } + } +} +``` + +## Common Anti-Patterns + +### ❌ N+1 Query Problem + +```sql +-- Bad: N+1 queries +SELECT * FROM customers; -- 1 query +-- Then for each customer: +SELECT * FROM orders WHERE customer_id = ?; -- N queries + +-- Good: Single JOIN query +SELECT + c.customer_id, + c.name, + o.order_id, + o.total +FROM customers c +LEFT JOIN orders o ON c.customer_id = o.customer_id; +``` + +### ❌ Using OR on Different Columns + +```sql +-- Bad: Can't use indexes effectively +SELECT * FROM products +WHERE name = 'iPhone' OR category = 'Electronics'; + +-- Good: Use UNION +SELECT * FROM products WHERE name = 'iPhone' +UNION +SELECT * FROM products WHERE category = 'Electronics'; +``` + +### ❌ Implicit Type Conversion + +```sql +-- Bad: '123' is string, user_id is integer +SELECT * FROM users WHERE user_id = '123'; + +-- Good: Use correct type +SELECT * FROM users WHERE user_id = 123; +``` + +## Query Performance Checklist + +- [ ] Select only needed columns (no SELECT *) +- [ ] Add indexes to WHERE/JOIN/ORDER BY columns +- [ ] Use EXPLAIN to analyze query plan +- [ ] Avoid functions on indexed columns +- [ ] Use EXISTS instead of IN for subqueries +- [ ] Batch INSERT/UPDATE operations +- [ ] Use appropriate JOIN types +- [ ] Filter early (WHERE before JOIN) +- [ ] Use LIMIT for large result sets +- [ ] Monitor slow query logs +- [ ] Update statistics regularly +- [ ] Avoid SELECT DISTINCT when possible +- [ ] Use covering indexes when appropriate + +## Resources + +- **PostgreSQL**: https://www.postgresql.org/docs/current/performance-tips.html +- **MySQL**: https://dev.mysql.com/doc/refman/8.0/en/optimization.html +- **MongoDB**: https://docs.mongodb.com/manual/core/query-optimization/ +- **Use The Index, Luke**: https://use-the-index-luke.com/ + +--- + +**"Premature optimization is the root of all evil, but slow queries are the root of all frustration."** diff --git a/skills/query-expert/references/indexing.md b/skills/query-expert/references/indexing.md new file mode 100644 index 0000000..7883def --- /dev/null +++ b/skills/query-expert/references/indexing.md @@ -0,0 +1,7 @@ +# Indexing Strategies - See SKILL.md for complete indexing guide including: +- When to create indexes +- Composite index column order +- Covering indexes +- Partial/filtered indexes +- Index types (B-Tree, Hash, GIN, GiST) +- Index maintenance diff --git a/skills/query-expert/references/nosql_queries.md b/skills/query-expert/references/nosql_queries.md new file mode 100644 index 0000000..f08e176 --- /dev/null +++ b/skills/query-expert/references/nosql_queries.md @@ -0,0 +1,6 @@ +# NoSQL Queries - See SKILL.md for complete NoSQL guide including: +- MongoDB find() queries +- MongoDB aggregation pipelines +- MongoDB indexes +- GraphQL queries +- Query optimization for NoSQL diff --git a/skills/query-expert/references/optimization.md b/skills/query-expert/references/optimization.md new file mode 100644 index 0000000..38028bf --- /dev/null +++ b/skills/query-expert/references/optimization.md @@ -0,0 +1,9 @@ +# Query Optimization - See SKILL.md for complete optimization guide including: +- Avoid SELECT * +- Use indexes effectively +- Optimize JOINs +- EXISTS vs IN +- Function usage on indexed columns +- LIMIT and pagination +- Batch operations +- Common anti-patterns diff --git a/skills/query-expert/references/sql_patterns.md b/skills/query-expert/references/sql_patterns.md new file mode 100644 index 0000000..f028112 --- /dev/null +++ b/skills/query-expert/references/sql_patterns.md @@ -0,0 +1,7 @@ +# SQL Query Patterns - See SKILL.md for complete SQL guide including: +- SELECT queries with JOINs +- Aggregation with GROUP BY +- Window functions +- CTEs (Common Table Expressions) +- Subqueries +- Best practices and anti-patterns diff --git a/skills/query-expert/scripts/analyze_performance.sh b/skills/query-expert/scripts/analyze_performance.sh new file mode 100755 index 0000000..4555661 --- /dev/null +++ b/skills/query-expert/scripts/analyze_performance.sh @@ -0,0 +1,297 @@ +#!/bin/bash + +# Query Expert - Performance Analyzer +# Analyze EXPLAIN output and provide optimization recommendations + +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +print_info() { + echo -e "${BLUE}ℹ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠ $1${NC}" +} + +print_success() { + echo -e "${GREEN}✓ $1${NC}" +} + +echo "" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ ║" +echo "║ Query Expert - Performance Analyzer ║" +echo "║ ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo "" + +print_info "Performance Analysis Guide" +echo "" + +cat << 'EOF' +## PostgreSQL EXPLAIN Analysis + +### Run EXPLAIN +```sql +EXPLAIN ANALYZE +SELECT * FROM orders WHERE customer_id = 123; +``` + +### What to Look For: + +**Seq Scan (Table Scan) ❌** +→ Reading entire table +→ Fix: Add index on filtered column +→ CREATE INDEX idx_orders_customer ON orders(customer_id); + +**Index Scan ✅** +→ Using index efficiently +→ Good performance for specific rows + +**Bitmap Heap Scan ✅** +→ Good for returning multiple rows +→ Efficient index usage + +**Nested Loop ⚠️** +→ Can be slow with large datasets +→ Consider: Hash Join or Merge Join + +**Hash Join ✅** +→ Good for large joins +→ Requires memory + +**Cost Numbers** +→ Higher = slower +→ Compare before/after optimization +→ cost=0.00..35.50 rows=10 + +**Actual Time** +→ Real execution time +→ actual time=0.023..0.156 rows=10 loops=1 + +--- + +## MySQL EXPLAIN Analysis + +### Run EXPLAIN +```sql +EXPLAIN SELECT * FROM orders WHERE customer_id = 123; +``` + +### Type Column: + +**ALL ❌** +→ Full table scan +→ Fix: Add index + +**index ⚠️** +→ Full index scan +→ Better than ALL, but could improve + +**range ✅** +→ Index range scan +→ Good for WHERE with >, <, BETWEEN + +**ref ✅✅** +→ Index lookup +→ Excellent performance + +**eq_ref ✅✅✅** +→ Unique index lookup +→ Best performance + +**const ✅✅✅** +→ Constant lookup (primary key) +→ Fastest possible + +### Extra Column: + +**Using filesort ⚠️** +→ Sorting in memory/disk +→ Fix: Add index on ORDER BY columns + +**Using temporary ⚠️** +→ Creating temporary table +→ Fix: Optimize GROUP BY or DISTINCT + +**Using index ✅** +→ Covering index (index-only scan) +→ Excellent performance + +**Using where ✅** +→ Filtering after read +→ Normal for WHERE clauses + +--- + +## MongoDB Explain Analysis + +### Run Explain +```javascript +db.orders.find({ customer_id: 123 }).explain("executionStats") +``` + +### What to Look For: + +**COLLSCAN ❌** +→ Full collection scan +→ Fix: Create index +→ db.orders.createIndex({ customer_id: 1 }) + +**IXSCAN ✅** +→ Index scan +→ Good performance + +**executionTimeMillis** +→ Total execution time +→ < 100ms good, > 1000ms needs optimization + +**nReturned vs totalDocsExamined** +→ Efficiency ratio +→ Ideally close to 1:1 +→ If totalDocsExamined >> nReturned, add index + +**Index Usage** +→ indexName: "customer_id_1" ✅ +→ indexName: null ❌ (no index used) + +--- + +## Index Recommendations + +### When to Create Index: + +1. **WHERE Clause** + CREATE INDEX idx_table_column ON table(column); + +2. **JOIN Columns** + CREATE INDEX idx_table_join_col ON table(join_column); + +3. **ORDER BY** + CREATE INDEX idx_table_sort ON table(sort_column); + +4. **Composite Index (order matters!)** + CREATE INDEX idx_multi ON table(col1, col2, col3); + → Works for: col1 | col1,col2 | col1,col2,col3 + → NOT for: col2 | col3 | col2,col3 + +5. **Covering Index** + CREATE INDEX idx_covering ON table(filter_col) INCLUDE (select_cols); + → Index contains all needed columns + → Fastest possible (index-only scan) + +### When NOT to Index: + +- Small tables (< 1000 rows) +- Columns with low cardinality (few distinct values) +- Frequently updated columns +- Large text/blob columns + +--- + +## Query Optimization Checklist + +Performance Issues: +[ ] Check EXPLAIN plan +[ ] Look for table scans (Seq Scan, ALL, COLLSCAN) +[ ] Identify missing indexes +[ ] Check JOIN types (Nested Loop on large tables) +[ ] Look for filesort or temporary tables +[ ] Verify index usage (Using index) + +Optimizations: +[ ] Create indexes on WHERE columns +[ ] Create indexes on JOIN columns +[ ] Use composite indexes (correct order) +[ ] Add covering indexes for frequent queries +[ ] Use LIMIT to reduce result set +[ ] Avoid SELECT * (select only needed columns) +[ ] Avoid functions on indexed columns +[ ] Use EXISTS instead of IN (subqueries) +[ ] Filter early (before JOIN) +[ ] Use appropriate JOIN type + +Monitoring: +[ ] Run EXPLAIN ANALYZE before optimization +[ ] Create indexes +[ ] Run EXPLAIN ANALYZE after optimization +[ ] Compare execution time and cost +[ ] Test with production-like data volume +[ ] Monitor slow query log + +--- + +## Example Optimization + +### Before (Slow) +```sql +-- EXPLAIN shows: Seq Scan, cost=1000.00 +SELECT * FROM orders WHERE customer_id = 123; +``` + +### Optimization Steps +```sql +-- 1. Create index +CREATE INDEX idx_orders_customer ON orders(customer_id); + +-- 2. Optimize query (avoid SELECT *) +SELECT order_id, total, created_at +FROM orders +WHERE customer_id = 123 +ORDER BY created_at DESC +LIMIT 100; + +-- 3. Check improvement +EXPLAIN ANALYZE +SELECT order_id, total, created_at +FROM orders +WHERE customer_id = 123 +ORDER BY created_at DESC +LIMIT 100; +``` + +### After (Fast) +``` +→ Index Scan using idx_orders_customer +→ cost=0.29..15.50 (95% improvement!) +→ actual time=0.015..0.023 +``` + +--- + +## Tools + +**PostgreSQL:** +- EXPLAIN ANALYZE +- pg_stat_statements extension +- pgBadger (log analyzer) + +**MySQL:** +- EXPLAIN +- SHOW PROFILE +- MySQL Workbench Performance Dashboard + +**MongoDB:** +- explain("executionStats") +- MongoDB Compass (GUI) +- Database Profiler + +--- + +EOF + +print_success "Performance analysis guide displayed" +echo "" +print_info "Next Steps:" +echo " 1. Run EXPLAIN on your slow query" +echo " 2. Identify the bottleneck (table scan, no index, etc.)" +echo " 3. Apply recommended optimization" +echo " 4. Re-run EXPLAIN to verify improvement" +echo " 5. Test with production data volume" +echo "" diff --git a/skills/query-expert/scripts/generate_query.sh b/skills/query-expert/scripts/generate_query.sh new file mode 100755 index 0000000..13c7352 --- /dev/null +++ b/skills/query-expert/scripts/generate_query.sh @@ -0,0 +1,436 @@ +#!/bin/bash + +# Query Expert - Query Generator +# Generate optimized database queries with best practices + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' + +# Helper functions +print_success() { + echo -e "${GREEN}✓ $1${NC}" +} + +print_info() { + echo -e "${BLUE}ℹ $1${NC}" +} + +print_warning() { + echo -e "${YELLOW}⚠ $1${NC}" +} + +prompt_input() { + local prompt="$1" + local var_name="$2" + local required="${3:-false}" + + while true; do + echo -e "${BLUE}${prompt}${NC}" + read -r input + + if [ -z "$input" ] && [ "$required" = true ]; then + echo -e "${RED}This field is required.${NC}" + continue + fi + + eval "$var_name='$input'" + break + done +} + +prompt_select() { + local prompt="$1" + local var_name="$2" + shift 2 + local options=("$@") + + echo -e "${BLUE}${prompt}${NC}" + PS3="Select (1-${#options[@]}): " + select opt in "${options[@]}"; do + if [ -n "$opt" ]; then + eval "$var_name='$opt'" + break + else + echo -e "${RED}Invalid selection.${NC}" + fi + done +} + +# Banner +echo "" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ ║" +echo "║ Query Expert - Query Generator ║" +echo "║ ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo "" + +# Step 1: Database Type +print_info "Step 1/5: Database Type" +prompt_select "Which database?" DB_TYPE \ + "PostgreSQL" \ + "MySQL" \ + "SQLite" \ + "SQL Server" \ + "MongoDB" \ + "GraphQL" + +# Step 2: Query Type +print_info "Step 2/5: Query Type" + +case $DB_TYPE in + "MongoDB") + prompt_select "What type of query?" QUERY_TYPE \ + "Find" \ + "Aggregation" \ + "Update" \ + "Insert" \ + "Delete" + ;; + "GraphQL") + prompt_select "What type of query?" QUERY_TYPE \ + "Query" \ + "Mutation" \ + "Subscription" + ;; + *) + prompt_select "What type of query?" QUERY_TYPE \ + "SELECT" \ + "INSERT" \ + "UPDATE" \ + "DELETE" \ + "JOIN" \ + "Aggregate (GROUP BY)" \ + "Window Function" \ + "CTE (WITH)" + ;; +esac + +# Step 3: Table/Collection +print_info "Step 3/5: Target Table/Collection" +if [ "$DB_TYPE" = "MongoDB" ]; then + prompt_input "Collection name (e.g., users, orders):" TABLE_NAME true +else + prompt_input "Table name (e.g., users, orders):" TABLE_NAME true +fi + +# Step 4: Columns/Fields +print_info "Step 4/5: Columns/Fields" +prompt_input "Columns to select (comma-separated, or * for all):" COLUMNS +COLUMNS=${COLUMNS:-"*"} + +# Step 5: Conditions +print_info "Step 5/5: Conditions (optional)" +prompt_input "WHERE conditions (e.g., status = 'active'):" CONDITIONS + +# Generate query based on selections +generate_sql_select() { + cat << EOF + +-- Generated SELECT Query +-- Database: $DB_TYPE +-- Optimized for performance + +SELECT + ${COLUMNS//,/,${'\n'} } +FROM $TABLE_NAME +EOF + + if [ -n "$CONDITIONS" ]; then + echo "WHERE $CONDITIONS" + fi + + cat << 'EOF' +-- Optional: Add ORDER BY +-- ORDER BY created_at DESC +-- Optional: Add LIMIT +-- LIMIT 100; +EOF + + echo "" + print_info "Optimization Tips:" + echo " • Select only needed columns (avoid SELECT *)" + echo " • Add index on WHERE columns: CREATE INDEX idx_${TABLE_NAME}_${CONDITIONS%% *} ON $TABLE_NAME(${CONDITIONS%% *})" + echo " • Use LIMIT for large result sets" + echo " • Add ORDER BY for consistent results" +} + +generate_sql_join() { + prompt_input "Second table name:" TABLE2 + prompt_input "JOIN column (e.g., customer_id):" JOIN_COL + + cat << EOF + +-- Generated JOIN Query +-- Database: $DB_TYPE + +SELECT + ${TABLE_NAME:0:1}.${COLUMNS//,/,${'\n'} ${TABLE_NAME:0:1}.} +FROM $TABLE_NAME ${TABLE_NAME:0:1} +INNER JOIN $TABLE2 ${TABLE2:0:1} + ON ${TABLE_NAME:0:1}.$JOIN_COL = ${TABLE2:0:1}.$JOIN_COL +EOF + + if [ -n "$CONDITIONS" ]; then + echo "WHERE $CONDITIONS" + fi + + echo ";" + echo "" + print_info "JOIN Types:" + echo " • INNER JOIN - Only matching rows" + echo " • LEFT JOIN - All left rows + matching right" + echo " • RIGHT JOIN - All right rows + matching left" + echo " • FULL OUTER JOIN - All rows from both" + echo "" + print_info "Optimization:" + echo " • Add indexes on JOIN columns" + echo " • Filter early with WHERE" + echo " • Use INNER JOIN when possible" +} + +generate_sql_aggregate() { + prompt_input "GROUP BY columns (comma-separated):" GROUP_COLS + prompt_input "Aggregate function (e.g., COUNT(*), SUM(amount)):" AGG_FUNC + + cat << EOF + +-- Generated Aggregate Query +-- Database: $DB_TYPE + +SELECT + ${GROUP_COLS//,/,${'\n'} }, + $AGG_FUNC AS total +FROM $TABLE_NAME +EOF + + if [ -n "$CONDITIONS" ]; then + echo "WHERE $CONDITIONS" + fi + + cat << EOF +GROUP BY ${GROUP_COLS//,/,${'\n'} } +-- Optional: Add HAVING for aggregate filters +-- HAVING COUNT(*) > 10 +ORDER BY total DESC; +EOF + + echo "" + print_info "Aggregate Functions:" + echo " • COUNT(*) - Count rows" + echo " • SUM(column) - Sum values" + echo " • AVG(column) - Average" + echo " • MIN/MAX(column) - Min/Max values" +} + +generate_sql_cte() { + cat << EOF + +-- Generated CTE (Common Table Expression) +-- Database: $DB_TYPE + +WITH ${TABLE_NAME}_filtered AS ( + SELECT + ${COLUMNS//,/,${'\n'} } + FROM $TABLE_NAME +EOF + + if [ -n "$CONDITIONS" ]; then + echo " WHERE $CONDITIONS" + fi + + cat << 'EOF' +) +SELECT * +FROM table_filtered +-- Add JOINs or additional filtering here +; +EOF + + echo "" + print_info "CTE Benefits:" + echo " • Improves readability" + echo " • Reusable within same query" + echo " • Supports recursion" + echo " • Better than subqueries in many cases" +} + +generate_mongodb_find() { + cat << EOF + +// Generated MongoDB Find Query +// Collection: $TABLE_NAME + +db.$TABLE_NAME.find( +EOF + + if [ -n "$CONDITIONS" ]; then + echo " { $CONDITIONS }," + else + echo " {}," + fi + + if [ "$COLUMNS" != "*" ]; then + echo " { ${COLUMNS//,/: 1, }: 1, _id: 0 }" + else + echo " {}" + fi + + cat << 'EOF' +) +.sort({ created_at: -1 }) +.limit(100); +EOF + + echo "" + print_info "MongoDB Optimization:" + echo " • Create index: db.$TABLE_NAME.createIndex({ field: 1 })" + echo " • Use projection to limit fields" + echo " • Add sort and limit for performance" + echo " • Use explain(): .explain('executionStats')" +} + +generate_mongodb_aggregation() { + cat << EOF + +// Generated MongoDB Aggregation Pipeline +// Collection: $TABLE_NAME + +db.$TABLE_NAME.aggregate([ + // Stage 1: Match (filter) + { \$match: { +EOF + + if [ -n "$CONDITIONS" ]; then + echo " $CONDITIONS" + fi + + cat << 'EOF' + }}, + + // Stage 2: Group (aggregate) + { $group: { + _id: '$field', + count: { $sum: 1 }, + total: { $sum: '$amount' }, + average: { $avg: '$amount' } + }}, + + // Stage 3: Sort + { $sort: { total: -1 } }, + + // Stage 4: Limit + { $limit: 10 } +]); +EOF + + echo "" + print_info "Aggregation Stages:" + echo " • \$match - Filter documents" + echo " • \$group - Group and aggregate" + echo " • \$project - Reshape documents" + echo " • \$lookup - JOIN collections" + echo " • \$sort - Sort results" + echo " • \$limit - Limit results" +} + +generate_graphql_query() { + cat << EOF + +# Generated GraphQL Query + +query Get${TABLE_NAME^} { + $TABLE_NAME { + ${COLUMNS//,/ + } + } +} + +# With variables: +query Get${TABLE_NAME^}(\$id: ID!) { + ${TABLE_NAME}(id: \$id) { + ${COLUMNS//,/ + } + } +} +EOF + + echo "" + print_info "GraphQL Best Practices:" + echo " • Request only needed fields" + echo " • Use fragments for reusable fields" + echo " • Implement DataLoader to avoid N+1" + echo " • Add pagination (first, after)" +} + +# Generate based on query type +case $DB_TYPE in + "MongoDB") + case $QUERY_TYPE in + "Find") + generate_mongodb_find + ;; + "Aggregation") + generate_mongodb_aggregation + ;; + esac + ;; + "GraphQL") + generate_graphql_query + ;; + *) + case $QUERY_TYPE in + "SELECT") + generate_sql_select + ;; + "JOIN") + generate_sql_join + ;; + "Aggregate (GROUP BY)") + generate_sql_aggregate + ;; + "CTE (WITH)") + generate_sql_cte + ;; + esac + ;; +esac + +# Summary +echo "" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ Query Generated ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo "" +print_success "Query generated for $DB_TYPE" +print_success "Type: $QUERY_TYPE" +echo "" +print_info "Next steps:" +echo " 1. Review and test the query" +echo " 2. Add appropriate indexes" +echo " 3. Use EXPLAIN to analyze performance" +echo " 4. Add error handling in production" +echo " 5. Monitor query performance" +echo "" +print_info "Performance Tools:" +case $DB_TYPE in + "PostgreSQL") + echo " • EXPLAIN ANALYZE query" + echo " • pg_stat_statements extension" + ;; + "MySQL") + echo " • EXPLAIN query" + echo " • SHOW PROFILE" + ;; + "MongoDB") + echo " • query.explain('executionStats')" + echo " • db.collection.getIndexes()" + ;; +esac +echo "" diff --git a/skills/query-expert/scripts/optimize_query.sh b/skills/query-expert/scripts/optimize_query.sh new file mode 100755 index 0000000..e6dfcfe --- /dev/null +++ b/skills/query-expert/scripts/optimize_query.sh @@ -0,0 +1,196 @@ +#!/bin/bash + +# Query Expert - Query Optimizer +# Analyze and optimize SQL queries + +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +MAGENTA='\033[0;35m' +NC='\033[0m' + +PASS_COUNT=0 +WARN_COUNT=0 +FAIL_COUNT=0 + +print_success() { + echo -e "${GREEN}✓ GOOD${NC} $1" + ((PASS_COUNT++)) +} + +print_warning() { + echo -e "${YELLOW}⚠ IMPROVE${NC} $1" + ((WARN_COUNT++)) +} + +print_error() { + echo -e "${RED}✗ ISSUE${NC} $1" + ((FAIL_COUNT++)) +} + +print_info() { + echo -e "${BLUE}ℹ INFO${NC} $1" +} + +print_section() { + echo "" + echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" + echo -e "${MAGENTA}$1${NC}" + echo -e "${MAGENTA}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +} + +echo "" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ ║" +echo "║ Query Expert - Query Optimizer ║" +echo "║ ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo "" + +if [ -z "$1" ]; then + print_info "Usage: $0 " + print_info "Example: $0 slow_query.sql" + exit 1 +fi + +QUERY_FILE="$1" + +if [ ! -f "$QUERY_FILE" ]; then + echo -e "${RED}File not found: $QUERY_FILE${NC}" + exit 1 +fi + +QUERY=$(cat "$QUERY_FILE") + +# Section 1: SELECT * Detection +print_section "1. COLUMN SELECTION" + +if echo "$QUERY" | grep -qi "SELECT \*"; then + print_error "Using SELECT * (selects all columns)" + echo " Fix: SELECT only needed columns" + echo " SELECT user_id, name, email FROM users;" +else + print_success "Selecting specific columns" +fi + +# Section 2: Index Usage +print_section "2. INDEX OPPORTUNITIES" + +if echo "$QUERY" | grep -qi "WHERE"; then + WHERE_COLS=$(echo "$QUERY" | grep -oi "WHERE [^;]*" | grep -o "[a-zA-Z_][a-zA-Z0-9_]*\s*=" | awk '{print $1}') + + if [ -n "$WHERE_COLS" ]; then + print_info "Columns in WHERE clause should have indexes:" + for col in $WHERE_COLS; do + echo " CREATE INDEX idx_table_$col ON table($col);" + done + fi +fi + +if echo "$QUERY" | grep -qi "JOIN.*ON"; then + print_info "JOIN columns should have indexes:" + echo " CREATE INDEX idx_table_join_col ON table(join_col);" +fi + +if echo "$QUERY" | grep -qi "ORDER BY"; then + print_info "ORDER BY columns benefit from indexes:" + echo " Consider composite index with WHERE + ORDER BY columns" +fi + +# Section 3: JOIN Analysis +print_section "3. JOIN OPTIMIZATION" + +if echo "$QUERY" | grep -qi "LEFT JOIN" && echo "$QUERY" | grep -qi "WHERE"; then + print_warning "LEFT JOIN with WHERE on right table" + echo " Consider using INNER JOIN instead" +fi + +if echo "$QUERY" | grep -qi "WHERE.*IN\s*(SELECT"; then + print_error "Using IN with subquery" + echo " Fix: Use EXISTS or JOIN instead" + echo " WHERE EXISTS (SELECT 1 FROM ...)" +fi + +# Section 4: Function Usage +print_section "4. FUNCTION ON COLUMNS" + +if echo "$QUERY" | grep -Eqi "WHERE.*(LOWER|UPPER|SUBSTRING|DATE|YEAR|MONTH)\s*\("; then + print_error "Function on indexed column in WHERE" + echo " Fix: Use functional index or avoid function" + echo " CREATE INDEX idx_table_lower_col ON table(LOWER(col));" +fi + +# Section 5: DISTINCT Usage +print_section "5. DISTINCT USAGE" + +if echo "$QUERY" | grep -qi "SELECT DISTINCT"; then + print_warning "Using DISTINCT (potentially expensive)" + echo " Consider: Is DISTINCT necessary?" + echo " Alternative: Use GROUP BY if aggregating" +fi + +# Section 6: Subqueries +print_section "6. SUBQUERY OPTIMIZATION" + +SUBQUERY_COUNT=$(echo "$QUERY" | grep -oi "SELECT" | wc -l) +if [ "$SUBQUERY_COUNT" -gt 1 ]; then + if echo "$QUERY" | grep -qi "FROM.*SELECT"; then + print_info "Contains subqueries - consider CTEs for readability" + echo " WITH cte AS (SELECT ...) SELECT ... FROM cte" + fi +fi + +# Section 7: LIMIT Usage +print_section "7. RESULT SET SIZE" + +if ! echo "$QUERY" | grep -qi "LIMIT\|TOP\|FETCH FIRST"; then + print_warning "No LIMIT clause found" + echo " Add LIMIT to prevent large result sets" + echo " SELECT ... LIMIT 100;" +fi + +# Section 8: Sorting +print_section "8. SORTING" + +if echo "$QUERY" | grep -qi "ORDER BY"; then + if ! echo "$QUERY" | grep -qi "LIMIT"; then + print_warning "ORDER BY without LIMIT" + echo " Consider adding LIMIT to reduce sort cost" + fi +fi + +# Summary +echo "" +echo "╔════════════════════════════════════════════════════════════╗" +echo "║ Optimization Summary ║" +echo "╚════════════════════════════════════════════════════════════╝" +echo "" +echo -e "${GREEN}✓ Good practices: $PASS_COUNT${NC}" +echo -e "${YELLOW}⚠ Improvements: $WARN_COUNT${NC}" +echo -e "${RED}✗ Issues found: $FAIL_COUNT${NC}" +echo "" + +TOTAL=$((PASS_COUNT + FAIL_COUNT + WARN_COUNT)) +if [ $TOTAL -gt 0 ]; then + SCORE=$(( ((PASS_COUNT * 2 + WARN_COUNT) * 100) / (TOTAL * 2) )) + echo "Query Score: $SCORE%" + echo "" +fi + +print_info "Recommended Next Steps:" +echo " 1. Run EXPLAIN ANALYZE on this query" +echo " 2. Create recommended indexes" +echo " 3. Test query performance before/after" +echo " 4. Monitor query in production" +echo "" + +print_info "EXPLAIN Commands:" +echo " PostgreSQL: EXPLAIN ANALYZE " +echo " MySQL: EXPLAIN " +echo " MongoDB: db.collection.find().explain('executionStats')" +echo "" + +[ $FAIL_COUNT -gt 0 ] && exit 1 || exit 0