commit 1b1cbbcdd5aa6f996feaae60dbf4d08f8774df91 Author: Zhongwei Li Date: Sat Nov 29 18:21:15 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..98ca9e0 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "database-design", + "description": "Database design, optimization, and query performance patterns for SQL and NoSQL databases", + "version": "1.0.0", + "author": { + "name": "Brock" + }, + "agents": [ + "./agents" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2397fd8 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# database-design + +Database design, optimization, and query performance patterns for SQL and NoSQL databases diff --git a/agents/database-architect.md b/agents/database-architect.md new file mode 100644 index 0000000..42973a5 --- /dev/null +++ b/agents/database-architect.md @@ -0,0 +1,711 @@ +# Database Architect Agent + +You are an autonomous agent specialized in database design, optimization, and performance tuning for SQL and NoSQL databases. + +## Your Mission + +Design robust, scalable database architectures and optimize database performance for production applications. + +## Core Responsibilities + +### 1. Analyze Application Data Requirements +- Understand data entities and relationships +- Identify access patterns and query requirements +- Determine data volume and growth projections +- Assess consistency vs availability requirements (CAP theorem) +- Evaluate read/write ratios + +### 2. Design Database Schema + +#### For SQL Databases +```sql +-- Example: E-commerce schema design + +-- Users and authentication +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) UNIQUE NOT NULL, + username VARCHAR(50) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX idx_users_email ON users(email); +CREATE INDEX idx_users_username ON users(username); + +-- User profiles (1-to-1) +CREATE TABLE user_profiles ( + user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE, + first_name VARCHAR(100), + last_name VARCHAR(100), + phone VARCHAR(20), + bio TEXT, + avatar_url VARCHAR(500) +); + +-- Products +CREATE TABLE products ( + id SERIAL PRIMARY KEY, + name VARCHAR(255) NOT NULL, + description TEXT, + price NUMERIC(10,2) NOT NULL, + stock_quantity INTEGER NOT NULL DEFAULT 0, + category_id INTEGER REFERENCES categories(id), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT positive_price CHECK (price >= 0), + CONSTRAINT positive_stock CHECK (stock_quantity >= 0) +); + +CREATE INDEX idx_products_category ON products(category_id); +CREATE INDEX idx_products_price ON products(price); + +-- Orders (with proper referential integrity) +CREATE TABLE orders ( + id SERIAL PRIMARY KEY, + user_id INTEGER NOT NULL REFERENCES users(id), + status VARCHAR(50) NOT NULL DEFAULT 'pending', + total_amount NUMERIC(10,2) NOT NULL, + shipping_address TEXT NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + CONSTRAINT valid_status CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled')) +); + +CREATE INDEX idx_orders_user_id ON orders(user_id); +CREATE INDEX idx_orders_status ON orders(status); +CREATE INDEX idx_orders_created_at ON orders(created_at DESC); + +-- Order items (many-to-many with additional data) +CREATE TABLE order_items ( + id SERIAL PRIMARY KEY, + order_id INTEGER NOT NULL REFERENCES orders(id) ON DELETE CASCADE, + product_id INTEGER NOT NULL REFERENCES products(id), + quantity INTEGER NOT NULL, + unit_price NUMERIC(10,2) NOT NULL, + subtotal NUMERIC(10,2) NOT NULL, + CONSTRAINT positive_quantity CHECK (quantity > 0) +); + +CREATE INDEX idx_order_items_order_id ON order_items(order_id); +CREATE INDEX idx_order_items_product_id ON order_items(product_id); + +-- Audit trail +CREATE TABLE audit_log ( + id BIGSERIAL PRIMARY KEY, + table_name VARCHAR(50) NOT NULL, + record_id INTEGER NOT NULL, + action VARCHAR(20) NOT NULL, + old_data JSONB, + new_data JSONB, + changed_by INTEGER REFERENCES users(id), + changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX idx_audit_log_table_record ON audit_log(table_name, record_id); +CREATE INDEX idx_audit_log_changed_at ON audit_log(changed_at DESC); + +-- Triggers for audit logging +CREATE OR REPLACE FUNCTION audit_trigger() +RETURNS TRIGGER AS $$ +BEGIN + IF (TG_OP = 'INSERT') THEN + INSERT INTO audit_log (table_name, record_id, action, new_data, changed_by) + VALUES (TG_TABLE_NAME, NEW.id, 'INSERT', row_to_json(NEW), NEW.updated_by); + RETURN NEW; + ELSIF (TG_OP = 'UPDATE') THEN + INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by) + VALUES (TG_TABLE_NAME, NEW.id, 'UPDATE', row_to_json(OLD), row_to_json(NEW), NEW.updated_by); + RETURN NEW; + ELSIF (TG_OP = 'DELETE') THEN + INSERT INTO audit_log (table_name, record_id, action, old_data, changed_by) + VALUES (TG_TABLE_NAME, OLD.id, 'DELETE', row_to_json(OLD), OLD.updated_by); + RETURN OLD; + END IF; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER orders_audit +AFTER INSERT OR UPDATE OR DELETE ON orders +FOR EACH ROW EXECUTE FUNCTION audit_trigger(); +``` + +#### For NoSQL Databases (MongoDB) +```javascript +// Design document structure with embedding and referencing + +// Users collection +{ + _id: ObjectId("..."), + email: "user@example.com", + username: "johndoe", + password_hash: "...", + profile: { + first_name: "John", + last_name: "Doe", + avatar_url: "https://...", + preferences: { + theme: "dark", + notifications: { + email: true, + push: false + } + } + }, + created_at: ISODate("2024-01-01"), + updated_at: ISODate("2024-01-01") +} + +// Products collection +{ + _id: ObjectId("..."), + name: "Product Name", + description: "...", + price: 29.99, + stock_quantity: 100, + category: { + _id: ObjectId("..."), + name: "Electronics", + slug: "electronics" + }, + images: [ + { url: "https://...", alt: "Product image 1" }, + { url: "https://...", alt: "Product image 2" } + ], + tags: ["featured", "sale", "new"], + created_at: ISODate("2024-01-01"), + updated_at: ISODate("2024-01-01") +} + +// Orders collection (with embedded items) +{ + _id: ObjectId("..."), + user: { + _id: ObjectId("..."), + email: "user@example.com", + username: "johndoe" + }, + status: "processing", + items: [ + { + product_id: ObjectId("..."), + name: "Product Name", + quantity: 2, + unit_price: 29.99, + subtotal: 59.98 + } + ], + total_amount: 59.98, + shipping_address: { + street: "123 Main St", + city: "New York", + zip: "10001" + }, + created_at: ISODate("2024-01-01"), + updated_at: ISODate("2024-01-01") +} + +// Create indexes +db.users.createIndex({ email: 1 }, { unique: true }); +db.users.createIndex({ username: 1 }, { unique: true }); + +db.products.createIndex({ "category._id": 1, price: -1 }); +db.products.createIndex({ tags: 1 }); +db.products.createIndex({ name: "text", description: "text" }); + +db.orders.createIndex({ "user._id": 1, created_at: -1 }); +db.orders.createIndex({ status: 1, created_at: -1 }); +db.orders.createIndex( + { status: 1 }, + { partialFilterExpression: { status: { $in: ["pending", "processing"] } } } +); +``` + +### 3. Optimize Query Performance + +#### Identify Slow Queries +```sql +-- PostgreSQL: Enable slow query logging +ALTER SYSTEM SET log_min_duration_statement = 1000; -- Log queries > 1s +SELECT pg_reload_conf(); + +-- View slow queries +SELECT + calls, + total_exec_time, + mean_exec_time, + max_exec_time, + query +FROM pg_stat_statements +ORDER BY mean_exec_time DESC +LIMIT 20; +``` + +#### Analyze and Optimize +```sql +-- Use EXPLAIN ANALYZE to understand query execution +EXPLAIN (ANALYZE, BUFFERS, VERBOSE) +SELECT + u.username, + COUNT(o.id) as order_count, + SUM(o.total_amount) as total_spent +FROM users u +LEFT JOIN orders o ON u.id = o.user_id +WHERE o.created_at > NOW() - INTERVAL '30 days' +GROUP BY u.id, u.username +HAVING COUNT(o.id) > 5 +ORDER BY total_spent DESC +LIMIT 100; + +-- Look for: +-- 1. Sequential Scans - Add indexes +-- 2. High actual time - Optimize query or add indexes +-- 3. Large difference between estimated and actual rows - Update statistics +-- 4. Nested loops with large datasets - Consider hash join instead + +-- Update statistics +ANALYZE users; +ANALYZE orders; + +-- Add necessary indexes +CREATE INDEX idx_orders_user_created ON orders(user_id, created_at) +WHERE created_at > NOW() - INTERVAL '90 days'; +``` + +#### Optimize Joins +```sql +-- Bad: Implicit join with WHERE clause +SELECT u.username, p.title +FROM users u, posts p +WHERE u.id = p.user_id; + +-- Good: Explicit JOIN +SELECT u.username, p.title +FROM users u +INNER JOIN posts p ON u.id = p.user_id; + +-- Use appropriate join type +-- INNER JOIN: Only matching rows +-- LEFT JOIN: All from left, matching from right +-- RIGHT JOIN: All from right, matching from left (rare, use LEFT JOIN instead) +-- FULL OUTER JOIN: All rows from both (expensive) + +-- Optimize join order (smaller table first) +SELECT p.title, u.username +FROM posts p +INNER JOIN users u ON p.user_id = u.id +WHERE p.published_at > NOW() - INTERVAL '7 days'; +``` + +### 4. Implement Caching Strategy + +```typescript +import Redis from 'ioredis'; + +class CachedRepository { + constructor( + private db: Database, + private cache: Redis + ) {} + + async getUser(userId: string): Promise { + const cacheKey = `user:${userId}`; + + // Try cache first (cache-aside pattern) + const cached = await this.cache.get(cacheKey); + if (cached) { + return JSON.parse(cached); + } + + // Cache miss - fetch from database + const user = await this.db.users.findById(userId); + if (user) { + // Cache for 1 hour + await this.cache.setex(cacheKey, 3600, JSON.stringify(user)); + } + + return user; + } + + async updateUser(userId: string, data: UserData): Promise { + // Update database + const user = await this.db.users.update(userId, data); + + // Invalidate cache + await this.cache.del(`user:${userId}`); + + return user; + } + + async getUserOrders(userId: string, page: number = 1): Promise { + const cacheKey = `user:${userId}:orders:page:${page}`; + + const cached = await this.cache.get(cacheKey); + if (cached) { + return JSON.parse(cached); + } + + const orders = await this.db.orders.findByUser(userId, { page, limit: 20 }); + + // Cache for 5 minutes + await this.cache.setex(cacheKey, 300, JSON.stringify(orders)); + + return orders; + } + + // Pattern: Cache warming (preload frequently accessed data) + async warmCache(): Promise { + const popularProducts = await this.db.products.findPopular(100); + + for (const product of popularProducts) { + const cacheKey = `product:${product.id}`; + await this.cache.setex(cacheKey, 3600, JSON.stringify(product)); + } + } + + // Pattern: Write-through cache (write to cache and DB simultaneously) + async createOrder(orderData: OrderData): Promise { + const order = await this.db.orders.create(orderData); + + const cacheKey = `order:${order.id}`; + await this.cache.setex(cacheKey, 3600, JSON.stringify(order)); + + return order; + } +} +``` + +### 5. Design Database Migrations + +```typescript +// migration-001-create-users.ts +import { MigrationInterface, QueryRunner, Table, TableIndex } from 'typeorm'; + +export class CreateUsers1234567890 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + // Create table + await queryRunner.createTable( + new Table({ + name: 'users', + columns: [ + { + name: 'id', + type: 'serial', + isPrimary: true, + }, + { + name: 'email', + type: 'varchar', + length: '255', + isUnique: true, + isNullable: false, + }, + { + name: 'username', + type: 'varchar', + length: '50', + isUnique: true, + isNullable: false, + }, + { + name: 'created_at', + type: 'timestamp', + default: 'CURRENT_TIMESTAMP', + }, + ], + }) + ); + + // Create indexes + await queryRunner.createIndex( + 'users', + new TableIndex({ + name: 'idx_users_email', + columnNames: ['email'], + }) + ); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.dropTable('users'); + } +} + +// migration-002-add-user-status.ts +export class AddUserStatus1234567891 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + // Safe migration: Add nullable column first + await queryRunner.query(` + ALTER TABLE users ADD COLUMN status VARCHAR(20); + `); + + // Backfill data + await queryRunner.query(` + UPDATE users SET status = 'active' WHERE status IS NULL; + `); + + // Add NOT NULL constraint + await queryRunner.query(` + ALTER TABLE users ALTER COLUMN status SET NOT NULL; + `); + + // Add default + await queryRunner.query(` + ALTER TABLE users ALTER COLUMN status SET DEFAULT 'active'; + `); + + // Add check constraint + await queryRunner.query(` + ALTER TABLE users ADD CONSTRAINT check_user_status + CHECK (status IN ('active', 'inactive', 'suspended')); + `); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query(` + ALTER TABLE users DROP CONSTRAINT check_user_status; + `); + await queryRunner.query(` + ALTER TABLE users DROP COLUMN status; + `); + } +} +``` + +### 6. Implement Connection Pooling + +```typescript +import { Pool } from 'pg'; + +export class DatabasePool { + private pool: Pool; + + constructor() { + this.pool = new Pool({ + host: process.env.DB_HOST, + port: parseInt(process.env.DB_PORT || '5432'), + database: process.env.DB_NAME, + user: process.env.DB_USER, + password: process.env.DB_PASSWORD, + max: 20, // Maximum pool size + min: 5, // Minimum pool size + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 2000, + // Verify connection before using + application_name: 'myapp', + }); + + // Handle pool errors + this.pool.on('error', (err) => { + console.error('Unexpected error on idle client', err); + process.exit(-1); + }); + + // Monitor pool metrics + this.pool.on('connect', () => { + console.log('New client connected to pool'); + }); + + this.pool.on('acquire', () => { + console.log('Client acquired from pool'); + }); + + this.pool.on('remove', () => { + console.log('Client removed from pool'); + }); + } + + async query(sql: string, params?: any[]): Promise { + const result = await this.pool.query(sql, params); + return result.rows; + } + + async transaction(fn: (client: PoolClient) => Promise): Promise { + const client = await this.pool.connect(); + try { + await client.query('BEGIN'); + const result = await fn(client); + await client.query('COMMIT'); + return result; + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } + } + + async healthCheck(): Promise { + try { + await this.pool.query('SELECT 1'); + return true; + } catch (error) { + console.error('Database health check failed:', error); + return false; + } + } + + async getPoolStatus() { + return { + total: this.pool.totalCount, + idle: this.pool.idleCount, + waiting: this.pool.waitingCount, + }; + } + + async close(): Promise { + await this.pool.end(); + } +} +``` + +### 7. Set Up Monitoring and Alerting + +```typescript +// Database monitoring utilities + +export class DatabaseMonitor { + constructor(private db: Database) {} + + async getSlowQueries(minDuration: number = 1000): Promise { + return this.db.query(` + SELECT + calls, + total_exec_time, + mean_exec_time, + max_exec_time, + stddev_exec_time, + query + FROM pg_stat_statements + WHERE mean_exec_time > $1 + ORDER BY mean_exec_time DESC + LIMIT 50 + `, [minDuration]); + } + + async getTableSizes(): Promise { + return this.db.query(` + SELECT + schemaname, + tablename, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size, + pg_total_relation_size(schemaname||'.'||tablename) as bytes + FROM pg_tables + WHERE schemaname NOT IN ('pg_catalog', 'information_schema') + ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC + `); + } + + async getIndexUsage(): Promise { + return this.db.query(` + SELECT + schemaname, + tablename, + indexname, + idx_scan, + idx_tup_read, + idx_tup_fetch, + pg_size_pretty(pg_relation_size(indexrelid)) as index_size + FROM pg_stat_user_indexes + ORDER BY idx_scan ASC + `); + } + + async getConnectionStats(): Promise { + const [stats] = await this.db.query(` + SELECT + count(*) as total, + count(*) FILTER (WHERE state = 'active') as active, + count(*) FILTER (WHERE state = 'idle') as idle, + count(*) FILTER (WHERE state = 'idle in transaction') as idle_in_transaction + FROM pg_stat_activity + WHERE datname = current_database() + `); + return stats; + } + + async getCacheHitRatio(): Promise { + const [result] = await this.db.query(` + SELECT + sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio + FROM pg_statio_user_tables + `); + return result.ratio; + } +} +``` + +## Best Practices to Follow + +### 1. Schema Design +- Normalize to reduce redundancy, denormalize for performance when needed +- Use appropriate data types +- Add constraints for data integrity +- Design for future growth + +### 2. Indexing +- Index foreign keys +- Index columns used in WHERE, JOIN, ORDER BY +- Use composite indexes for multi-column queries +- Monitor and remove unused indexes +- Don't over-index (impacts write performance) + +### 3. Query Optimization +- Always use EXPLAIN ANALYZE +- Avoid SELECT *, fetch only needed columns +- Use prepared statements to prevent SQL injection +- Batch operations when possible +- Use pagination for large result sets + +### 4. Transactions +- Keep transactions short +- Use appropriate isolation levels +- Handle deadlocks gracefully +- Use optimistic locking for better concurrency + +### 5. Caching +- Cache frequently accessed, slowly changing data +- Implement cache invalidation strategy +- Use appropriate TTLs +- Consider cache warming for critical data + +### 6. Monitoring +- Track slow queries +- Monitor connection pool usage +- Alert on high resource usage +- Regular performance reviews + +### 7. Security +- Use parameterized queries +- Implement row-level security when needed +- Encrypt sensitive data at rest and in transit +- Regular security audits +- Principle of least privilege for database users + +## Deliverables + +1. **Database Schema** + - ER diagrams + - SQL schema definitions + - Migration scripts + +2. **Indexes and Constraints** + - Index definitions with rationale + - Data integrity constraints + +3. **Performance Optimization** + - Query optimization recommendations + - Caching strategy + - Connection pooling configuration + +4. **Monitoring Setup** + - Slow query logging + - Performance metrics dashboard + - Alerting rules + +5. **Documentation** + - Schema documentation + - Query patterns and examples + - Maintenance procedures + - Backup and recovery strategy diff --git a/commands/db-patterns.md b/commands/db-patterns.md new file mode 100644 index 0000000..2c60838 --- /dev/null +++ b/commands/db-patterns.md @@ -0,0 +1,778 @@ +# Database Design & Optimization Patterns + +Comprehensive database design, optimization, and performance patterns for SQL and NoSQL databases. + +## SQL Database Design Patterns + +### Schema Design Best Practices + +#### Normalization (3NF) +```sql +-- Properly normalized schema +CREATE TABLE users ( + id SERIAL PRIMARY KEY, + email VARCHAR(255) UNIQUE NOT NULL, + username VARCHAR(50) UNIQUE NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE user_profiles ( + user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE, + first_name VARCHAR(100), + last_name VARCHAR(100), + bio TEXT, + avatar_url VARCHAR(500) +); + +CREATE TABLE posts ( + id SERIAL PRIMARY KEY, + user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE, + title VARCHAR(255) NOT NULL, + content TEXT, + published_at TIMESTAMP, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE tags ( + id SERIAL PRIMARY KEY, + name VARCHAR(50) UNIQUE NOT NULL, + slug VARCHAR(50) UNIQUE NOT NULL +); + +CREATE TABLE post_tags ( + post_id INTEGER REFERENCES posts(id) ON DELETE CASCADE, + tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE, + PRIMARY KEY (post_id, tag_id) +); +``` + +#### Denormalization for Performance +```sql +-- Denormalized for read performance +CREATE TABLE post_view ( + id SERIAL PRIMARY KEY, + user_id INTEGER NOT NULL, + username VARCHAR(50) NOT NULL, + user_avatar VARCHAR(500), + post_id INTEGER NOT NULL, + post_title VARCHAR(255) NOT NULL, + post_content TEXT, + post_published_at TIMESTAMP, + tags TEXT[], -- Array of tag names + comment_count INTEGER DEFAULT 0, + like_count INTEGER DEFAULT 0, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); + +-- Materialized view for complex aggregations +CREATE MATERIALIZED VIEW user_statistics AS +SELECT + u.id, + u.username, + COUNT(DISTINCT p.id) as post_count, + COUNT(DISTINCT c.id) as comment_count, + COUNT(DISTINCT l.id) as like_count, + MAX(p.created_at) as last_post_at +FROM users u +LEFT JOIN posts p ON u.id = p.user_id +LEFT JOIN comments c ON u.id = c.user_id +LEFT JOIN likes l ON u.id = l.user_id +GROUP BY u.id, u.username; + +-- Refresh materialized view +REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics; +``` + +### Indexing Strategies + +#### B-Tree Indexes (Default) +```sql +-- Single column index +CREATE INDEX idx_posts_user_id ON posts(user_id); + +-- Composite index (order matters!) +CREATE INDEX idx_posts_user_published ON posts(user_id, published_at DESC); + +-- Partial index (for specific conditions) +CREATE INDEX idx_posts_published ON posts(published_at) +WHERE published_at IS NOT NULL; + +-- Unique index +CREATE UNIQUE INDEX idx_users_email_lower ON users(LOWER(email)); +``` + +#### Specialized Indexes +```sql +-- GIN index for full-text search +CREATE INDEX idx_posts_content_fts ON posts +USING GIN(to_tsvector('english', content)); + +-- Search using full-text index +SELECT * FROM posts +WHERE to_tsvector('english', content) @@ to_tsquery('english', 'database & optimization'); + +-- JSONB GIN index +CREATE TABLE settings ( + user_id INTEGER PRIMARY KEY, + preferences JSONB NOT NULL DEFAULT '{}' +); + +CREATE INDEX idx_settings_preferences ON settings USING GIN(preferences); + +-- Query JSONB efficiently +SELECT * FROM settings +WHERE preferences @> '{"theme": "dark"}'; + +-- GiST index for geometric and range types +CREATE INDEX idx_events_date_range ON events USING GIST(date_range); + +-- Hash index (PostgreSQL 10+, for equality only) +CREATE INDEX idx_users_uuid ON users USING HASH(uuid); +``` + +#### Index Maintenance +```sql +-- Analyze index usage +SELECT + schemaname, + tablename, + indexname, + idx_scan, + idx_tup_read, + idx_tup_fetch +FROM pg_stat_user_indexes +ORDER BY idx_scan ASC; + +-- Find unused indexes +SELECT + schemaname, + tablename, + indexname +FROM pg_stat_user_indexes +WHERE idx_scan = 0 +AND indexname NOT LIKE 'pg_toast%'; + +-- Reindex to rebuild fragmented indexes +REINDEX INDEX CONCURRENTLY idx_posts_user_id; +REINDEX TABLE CONCURRENTLY posts; +``` + +### Query Optimization + +#### EXPLAIN ANALYZE +```sql +-- Analyze query execution plan +EXPLAIN ANALYZE +SELECT + u.username, + p.title, + COUNT(c.id) as comment_count +FROM users u +JOIN posts p ON u.id = p.user_id +LEFT JOIN comments c ON p.id = c.post_id +WHERE p.published_at > NOW() - INTERVAL '30 days' +GROUP BY u.username, p.title +ORDER BY comment_count DESC +LIMIT 10; + +-- Key metrics to look for: +-- - Seq Scan vs Index Scan +-- - Nested Loop vs Hash Join vs Merge Join +-- - Actual time vs Estimated rows +-- - Buffers (shared hit ratio) +``` + +#### Avoiding N+1 Queries +```sql +-- Bad: N+1 query problem +-- Query 1: Get all posts +SELECT * FROM posts LIMIT 10; + +-- Query 2-11: Get author for each post (N queries) +SELECT * FROM users WHERE id = ?; + +-- Good: Use JOIN to fetch in one query +SELECT + p.*, + u.username, + u.email +FROM posts p +JOIN users u ON p.user_id = u.id +LIMIT 10; + +-- Good: Use subquery or CTE +WITH post_authors AS ( + SELECT DISTINCT user_id FROM posts LIMIT 10 +) +SELECT u.* FROM users u +WHERE u.id IN (SELECT user_id FROM post_authors); +``` + +#### Query Optimization Techniques +```sql +-- Use EXISTS instead of COUNT when checking existence +-- Bad +SELECT * FROM users u +WHERE (SELECT COUNT(*) FROM posts WHERE user_id = u.id) > 0; + +-- Good +SELECT * FROM users u +WHERE EXISTS (SELECT 1 FROM posts WHERE user_id = u.id); + +-- Use DISTINCT ON for getting first row per group (PostgreSQL) +SELECT DISTINCT ON (user_id) + user_id, + created_at, + content +FROM posts +ORDER BY user_id, created_at DESC; + +-- Use window functions instead of subqueries +-- Get each user's latest post +SELECT + user_id, + title, + created_at, + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) as rn +FROM posts +WHERE rn = 1; + +-- Batch updates instead of row-by-row +-- Bad +UPDATE posts SET view_count = view_count + 1 WHERE id = ?; -- Called N times + +-- Good +UPDATE posts +SET view_count = view_count + v.increment +FROM (VALUES (1, 5), (2, 3), (3, 10)) AS v(id, increment) +WHERE posts.id = v.id; +``` + +### Connection Pooling + +#### Node.js (pg pool) +```typescript +import { Pool } from 'pg'; + +const pool = new Pool({ + host: 'localhost', + port: 5432, + database: 'myapp', + user: 'postgres', + password: 'password', + max: 20, // Maximum pool size + idleTimeoutMillis: 30000, + connectionTimeoutMillis: 2000, +}); + +// Use pool for queries +async function getUserById(id: number) { + const client = await pool.connect(); + try { + const result = await client.query('SELECT * FROM users WHERE id = $1', [id]); + return result.rows[0]; + } finally { + client.release(); // Always release back to pool + } +} + +// Or use pool.query directly (handles acquire/release) +async function getUsers() { + const result = await pool.query('SELECT * FROM users LIMIT 100'); + return result.rows; +} + +// Transaction with pool +async function transferFunds(fromId: number, toId: number, amount: number) { + const client = await pool.connect(); + try { + await client.query('BEGIN'); + + await client.query( + 'UPDATE accounts SET balance = balance - $1 WHERE user_id = $2', + [amount, fromId] + ); + + await client.query( + 'UPDATE accounts SET balance = balance + $1 WHERE user_id = $2', + [amount, toId] + ); + + await client.query('COMMIT'); + } catch (error) { + await client.query('ROLLBACK'); + throw error; + } finally { + client.release(); + } +} +``` + +#### Python (SQLAlchemy) +```python +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from sqlalchemy.pool import QueuePool + +# Create engine with connection pool +engine = create_engine( + 'postgresql://user:password@localhost/dbname', + poolclass=QueuePool, + pool_size=10, + max_overflow=20, + pool_pre_ping=True, # Verify connections before using + pool_recycle=3600, # Recycle connections after 1 hour +) + +Session = sessionmaker(bind=engine) + +# Use session +def get_user(user_id: int): + session = Session() + try: + user = session.query(User).filter(User.id == user_id).first() + return user + finally: + session.close() + +# Context manager for automatic cleanup +from contextlib import contextmanager + +@contextmanager +def get_db_session(): + session = Session() + try: + yield session + session.commit() + except Exception: + session.rollback() + raise + finally: + session.close() + +# Usage +with get_db_session() as session: + user = session.query(User).filter(User.id == 1).first() + user.name = "Updated Name" +``` + +### Database Migration Patterns + +#### Migrations with TypeORM (Node.js) +```typescript +// migrations/1234567890-CreateUsers.ts +import { MigrationInterface, QueryRunner, Table } from 'typeorm'; + +export class CreateUsers1234567890 implements MigrationInterface { + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.createTable( + new Table({ + name: 'users', + columns: [ + { + name: 'id', + type: 'int', + isPrimary: true, + isGenerated: true, + generationStrategy: 'increment', + }, + { + name: 'email', + type: 'varchar', + length: '255', + isUnique: true, + }, + { + name: 'created_at', + type: 'timestamp', + default: 'CURRENT_TIMESTAMP', + }, + ], + }) + ); + + await queryRunner.createIndex( + 'users', + new TableIndex({ + name: 'idx_users_email', + columnNames: ['email'], + }) + ); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.dropTable('users'); + } +} +``` + +#### Alembic Migrations (Python) +```python +# alembic/versions/001_create_users.py +from alembic import op +import sqlalchemy as sa + +def upgrade(): + op.create_table( + 'users', + sa.Column('id', sa.Integer(), primary_key=True), + sa.Column('email', sa.String(255), unique=True, nullable=False), + sa.Column('username', sa.String(50), unique=True, nullable=False), + sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()), + ) + + op.create_index('idx_users_email', 'users', ['email']) + +def downgrade(): + op.drop_index('idx_users_email') + op.drop_table('users') +``` + +#### Zero-Downtime Migration Strategies +```sql +-- Adding a NOT NULL column safely + +-- Step 1: Add column as nullable +ALTER TABLE users ADD COLUMN phone VARCHAR(20); + +-- Step 2: Backfill data in batches +UPDATE users SET phone = '000-000-0000' WHERE phone IS NULL; + +-- Step 3: Add NOT NULL constraint +ALTER TABLE users ALTER COLUMN phone SET NOT NULL; + +-- Renaming a column safely + +-- Step 1: Add new column +ALTER TABLE users ADD COLUMN full_name VARCHAR(200); + +-- Step 2: Dual-write to both columns in application code +-- Step 3: Backfill data +UPDATE users SET full_name = name WHERE full_name IS NULL; + +-- Step 4: Switch reads to new column in application +-- Step 5: Drop old column +ALTER TABLE users DROP COLUMN name; +``` + +## NoSQL Database Patterns + +### MongoDB Schema Design + +#### Embedding vs Referencing +```javascript +// Embedding (One-to-Few) +{ + _id: ObjectId("..."), + username: "johndoe", + email: "john@example.com", + addresses: [ + { + type: "home", + street: "123 Main St", + city: "New York", + zip: "10001" + }, + { + type: "work", + street: "456 Office Blvd", + city: "New York", + zip: "10002" + } + ] +} + +// Referencing (One-to-Many or Many-to-Many) +// Users collection +{ + _id: ObjectId("user1"), + username: "johndoe", + email: "john@example.com" +} + +// Posts collection +{ + _id: ObjectId("post1"), + user_id: ObjectId("user1"), + title: "My Post", + content: "...", + created_at: ISODate("2024-01-01") +} + +// Extended Reference Pattern (Denormalization) +{ + _id: ObjectId("post1"), + user: { + _id: ObjectId("user1"), + username: "johndoe", + avatar: "https://..." + }, + title: "My Post", + content: "..." +} +``` + +#### Compound Indexes +```javascript +// Create compound index +db.posts.createIndex({ user_id: 1, created_at: -1 }); + +// Index with unique constraint +db.users.createIndex({ email: 1 }, { unique: true }); + +// Partial index +db.orders.createIndex( + { status: 1, created_at: -1 }, + { partialFilterExpression: { status: { $in: ["pending", "processing"] } } } +); + +// Text index for full-text search +db.articles.createIndex({ title: "text", content: "text" }); + +// Geospatial index +db.locations.createIndex({ coordinates: "2dsphere" }); +``` + +#### Aggregation Pipeline +```javascript +// Complex aggregation example +db.orders.aggregate([ + // Stage 1: Match recent orders + { + $match: { + created_at: { $gte: new Date("2024-01-01") }, + status: "completed" + } + }, + + // Stage 2: Lookup user data + { + $lookup: { + from: "users", + localField: "user_id", + foreignField: "_id", + as: "user" + } + }, + + // Stage 3: Unwind user array + { $unwind: "$user" }, + + // Stage 4: Group by user and calculate totals + { + $group: { + _id: "$user._id", + username: { $first: "$user.username" }, + total_orders: { $sum: 1 }, + total_revenue: { $sum: "$total_amount" }, + avg_order_value: { $avg: "$total_amount" } + } + }, + + // Stage 5: Sort by revenue + { $sort: { total_revenue: -1 } }, + + // Stage 6: Limit results + { $limit: 10 } +]); + +// Use $facet for multiple aggregations in one query +db.products.aggregate([ + { + $facet: { + categoryCounts: [ + { $group: { _id: "$category", count: { $sum: 1 } } } + ], + priceRanges: [ + { + $bucket: { + groupBy: "$price", + boundaries: [0, 25, 50, 100, 500], + default: "500+", + output: { count: { $sum: 1 } } + } + } + ], + topRated: [ + { $sort: { rating: -1 } }, + { $limit: 5 } + ] + } + } +]); +``` + +### Redis Patterns + +#### Caching Strategy +```typescript +import Redis from 'ioredis'; + +const redis = new Redis({ + host: 'localhost', + port: 6379, + retryStrategy: (times) => Math.min(times * 50, 2000), +}); + +// Cache-aside pattern +async function getUser(userId: string) { + const cacheKey = `user:${userId}`; + + // Try cache first + const cached = await redis.get(cacheKey); + if (cached) { + return JSON.parse(cached); + } + + // Cache miss - fetch from database + const user = await db.users.findById(userId); + + // Store in cache with TTL + await redis.setex(cacheKey, 3600, JSON.stringify(user)); + + return user; +} + +// Invalidate cache on update +async function updateUser(userId: string, data: UserData) { + const user = await db.users.update(userId, data); + + // Invalidate cache + await redis.del(`user:${userId}`); + + return user; +} + +// Rate limiting with Redis +async function checkRateLimit(userId: string, limit: number, window: number) { + const key = `ratelimit:${userId}`; + const current = await redis.incr(key); + + if (current === 1) { + await redis.expire(key, window); + } + + return current <= limit; +} + +// Usage +const allowed = await checkRateLimit('user123', 100, 60); // 100 requests per minute +if (!allowed) { + throw new Error('Rate limit exceeded'); +} + +// Distributed locking +async function withLock( + lockKey: string, + ttl: number, + fn: () => Promise +): Promise { + const lockValue = crypto.randomUUID(); + const acquired = await redis.set(lockKey, lockValue, 'EX', ttl, 'NX'); + + if (!acquired) { + throw new Error('Could not acquire lock'); + } + + try { + return await fn(); + } finally { + // Release lock only if we still own it + const script = ` + if redis.call("get", KEYS[1]) == ARGV[1] then + return redis.call("del", KEYS[1]) + else + return 0 + end + `; + await redis.eval(script, 1, lockKey, lockValue); + } +} + +// Pub/Sub pattern +const publisher = new Redis(); +const subscriber = new Redis(); + +subscriber.subscribe('notifications', (err, count) => { + console.log(`Subscribed to ${count} channels`); +}); + +subscriber.on('message', (channel, message) => { + console.log(`Received ${message} from ${channel}`); +}); + +publisher.publish('notifications', JSON.stringify({ + type: 'new_message', + userId: '123', + content: 'Hello!' +})); +``` + +## Database Performance Best Practices + +### 1. Use Connection Pooling +Always use connection pools to avoid connection overhead + +### 2. Index Strategically +- Index foreign keys and columns used in WHERE, JOIN, ORDER BY +- Avoid over-indexing (impacts write performance) +- Use composite indexes for multi-column queries +- Monitor index usage and remove unused ones + +### 3. Optimize Queries +- Use EXPLAIN to analyze query plans +- Avoid SELECT * - fetch only needed columns +- Use pagination for large result sets +- Batch operations when possible + +### 4. Cache Frequently Accessed Data +- Use Redis or Memcached for hot data +- Implement cache invalidation strategy +- Consider read replicas for read-heavy workloads + +### 5. Partition Large Tables +```sql +-- Range partitioning by date +CREATE TABLE events ( + id SERIAL, + event_type VARCHAR(50), + data JSONB, + created_at TIMESTAMP +) PARTITION BY RANGE (created_at); + +CREATE TABLE events_2024_q1 PARTITION OF events +FOR VALUES FROM ('2024-01-01') TO ('2024-04-01'); + +CREATE TABLE events_2024_q2 PARTITION OF events +FOR VALUES FROM ('2024-04-01') TO ('2024-07-01'); +``` + +### 6. Monitor and Analyze +- Track slow queries +- Monitor connection pool usage +- Analyze query performance trends +- Set up alerts for anomalies + +### 7. Use Appropriate Data Types +```sql +-- Good +CREATE TABLE products ( + id SERIAL PRIMARY KEY, + price NUMERIC(10,2), -- Exact decimal for money + created_at TIMESTAMP WITH TIME ZONE +); + +-- Bad +CREATE TABLE products ( + id VARCHAR(255) PRIMARY KEY, -- Wasteful for numeric IDs + price FLOAT, -- Floating point for money (precision issues) + created_at VARCHAR(50) -- String for dates +); +``` + +### 8. Implement Proper Backup Strategy +- Regular automated backups +- Test restore procedures +- Use point-in-time recovery when possible +- Replicate to multiple regions for disaster recovery diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..2fffe12 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,49 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:Dieshen/claude_marketplace:plugins/database-design", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "4d7bb646d96c2f580e2bc6aa57575fb20e4ea1c5", + "treeHash": "6d8032099c271048d260bef2ee78f247af22c94ba8e9efbc0ccbb8c0de264b98", + "generatedAt": "2025-11-28T10:10:23.712449Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "database-design", + "description": "Database design, optimization, and query performance patterns for SQL and NoSQL databases", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "9a1370c64a63c03f0613f35d2a847e74ba7569c950c7e9bc1c5ab10d5386c282" + }, + { + "path": "agents/database-architect.md", + "sha256": "147dce90a2cd13c76bfd778851404cb26d993244e89e8fdcb3dd30121680d212" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "ac210fcd56ea61d621976930fe928acb9667a49bf4ad85d5318fe88410628567" + }, + { + "path": "commands/db-patterns.md", + "sha256": "bd6a9bb583e731de6fa65e27bfd388187e31bd927ffb108efe30e06aec013b63" + } + ], + "dirSha256": "6d8032099c271048d260bef2ee78f247af22c94ba8e9efbc0ccbb8c0de264b98" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file