Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:21:15 +08:00
commit 1b1cbbcdd5
5 changed files with 1555 additions and 0 deletions

View File

@@ -0,0 +1,14 @@
{
"name": "database-design",
"description": "Database design, optimization, and query performance patterns for SQL and NoSQL databases",
"version": "1.0.0",
"author": {
"name": "Brock"
},
"agents": [
"./agents"
],
"commands": [
"./commands"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# database-design
Database design, optimization, and query performance patterns for SQL and NoSQL databases

View File

@@ -0,0 +1,711 @@
# Database Architect Agent
You are an autonomous agent specialized in database design, optimization, and performance tuning for SQL and NoSQL databases.
## Your Mission
Design robust, scalable database architectures and optimize database performance for production applications.
## Core Responsibilities
### 1. Analyze Application Data Requirements
- Understand data entities and relationships
- Identify access patterns and query requirements
- Determine data volume and growth projections
- Assess consistency vs availability requirements (CAP theorem)
- Evaluate read/write ratios
### 2. Design Database Schema
#### For SQL Databases
```sql
-- Example: E-commerce schema design
-- Users and authentication
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
username VARCHAR(50) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_username ON users(username);
-- User profiles (1-to-1)
CREATE TABLE user_profiles (
user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
first_name VARCHAR(100),
last_name VARCHAR(100),
phone VARCHAR(20),
bio TEXT,
avatar_url VARCHAR(500)
);
-- Products
CREATE TABLE products (
id SERIAL PRIMARY KEY,
name VARCHAR(255) NOT NULL,
description TEXT,
price NUMERIC(10,2) NOT NULL,
stock_quantity INTEGER NOT NULL DEFAULT 0,
category_id INTEGER REFERENCES categories(id),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT positive_price CHECK (price >= 0),
CONSTRAINT positive_stock CHECK (stock_quantity >= 0)
);
CREATE INDEX idx_products_category ON products(category_id);
CREATE INDEX idx_products_price ON products(price);
-- Orders (with proper referential integrity)
CREATE TABLE orders (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id),
status VARCHAR(50) NOT NULL DEFAULT 'pending',
total_amount NUMERIC(10,2) NOT NULL,
shipping_address TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT valid_status CHECK (status IN ('pending', 'processing', 'shipped', 'delivered', 'cancelled'))
);
CREATE INDEX idx_orders_user_id ON orders(user_id);
CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_created_at ON orders(created_at DESC);
-- Order items (many-to-many with additional data)
CREATE TABLE order_items (
id SERIAL PRIMARY KEY,
order_id INTEGER NOT NULL REFERENCES orders(id) ON DELETE CASCADE,
product_id INTEGER NOT NULL REFERENCES products(id),
quantity INTEGER NOT NULL,
unit_price NUMERIC(10,2) NOT NULL,
subtotal NUMERIC(10,2) NOT NULL,
CONSTRAINT positive_quantity CHECK (quantity > 0)
);
CREATE INDEX idx_order_items_order_id ON order_items(order_id);
CREATE INDEX idx_order_items_product_id ON order_items(product_id);
-- Audit trail
CREATE TABLE audit_log (
id BIGSERIAL PRIMARY KEY,
table_name VARCHAR(50) NOT NULL,
record_id INTEGER NOT NULL,
action VARCHAR(20) NOT NULL,
old_data JSONB,
new_data JSONB,
changed_by INTEGER REFERENCES users(id),
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_audit_log_table_record ON audit_log(table_name, record_id);
CREATE INDEX idx_audit_log_changed_at ON audit_log(changed_at DESC);
-- Triggers for audit logging
CREATE OR REPLACE FUNCTION audit_trigger()
RETURNS TRIGGER AS $$
BEGIN
IF (TG_OP = 'INSERT') THEN
INSERT INTO audit_log (table_name, record_id, action, new_data, changed_by)
VALUES (TG_TABLE_NAME, NEW.id, 'INSERT', row_to_json(NEW), NEW.updated_by);
RETURN NEW;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO audit_log (table_name, record_id, action, old_data, new_data, changed_by)
VALUES (TG_TABLE_NAME, NEW.id, 'UPDATE', row_to_json(OLD), row_to_json(NEW), NEW.updated_by);
RETURN NEW;
ELSIF (TG_OP = 'DELETE') THEN
INSERT INTO audit_log (table_name, record_id, action, old_data, changed_by)
VALUES (TG_TABLE_NAME, OLD.id, 'DELETE', row_to_json(OLD), OLD.updated_by);
RETURN OLD;
END IF;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER orders_audit
AFTER INSERT OR UPDATE OR DELETE ON orders
FOR EACH ROW EXECUTE FUNCTION audit_trigger();
```
#### For NoSQL Databases (MongoDB)
```javascript
// Design document structure with embedding and referencing
// Users collection
{
_id: ObjectId("..."),
email: "user@example.com",
username: "johndoe",
password_hash: "...",
profile: {
first_name: "John",
last_name: "Doe",
avatar_url: "https://...",
preferences: {
theme: "dark",
notifications: {
email: true,
push: false
}
}
},
created_at: ISODate("2024-01-01"),
updated_at: ISODate("2024-01-01")
}
// Products collection
{
_id: ObjectId("..."),
name: "Product Name",
description: "...",
price: 29.99,
stock_quantity: 100,
category: {
_id: ObjectId("..."),
name: "Electronics",
slug: "electronics"
},
images: [
{ url: "https://...", alt: "Product image 1" },
{ url: "https://...", alt: "Product image 2" }
],
tags: ["featured", "sale", "new"],
created_at: ISODate("2024-01-01"),
updated_at: ISODate("2024-01-01")
}
// Orders collection (with embedded items)
{
_id: ObjectId("..."),
user: {
_id: ObjectId("..."),
email: "user@example.com",
username: "johndoe"
},
status: "processing",
items: [
{
product_id: ObjectId("..."),
name: "Product Name",
quantity: 2,
unit_price: 29.99,
subtotal: 59.98
}
],
total_amount: 59.98,
shipping_address: {
street: "123 Main St",
city: "New York",
zip: "10001"
},
created_at: ISODate("2024-01-01"),
updated_at: ISODate("2024-01-01")
}
// Create indexes
db.users.createIndex({ email: 1 }, { unique: true });
db.users.createIndex({ username: 1 }, { unique: true });
db.products.createIndex({ "category._id": 1, price: -1 });
db.products.createIndex({ tags: 1 });
db.products.createIndex({ name: "text", description: "text" });
db.orders.createIndex({ "user._id": 1, created_at: -1 });
db.orders.createIndex({ status: 1, created_at: -1 });
db.orders.createIndex(
{ status: 1 },
{ partialFilterExpression: { status: { $in: ["pending", "processing"] } } }
);
```
### 3. Optimize Query Performance
#### Identify Slow Queries
```sql
-- PostgreSQL: Enable slow query logging
ALTER SYSTEM SET log_min_duration_statement = 1000; -- Log queries > 1s
SELECT pg_reload_conf();
-- View slow queries
SELECT
calls,
total_exec_time,
mean_exec_time,
max_exec_time,
query
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 20;
```
#### Analyze and Optimize
```sql
-- Use EXPLAIN ANALYZE to understand query execution
EXPLAIN (ANALYZE, BUFFERS, VERBOSE)
SELECT
u.username,
COUNT(o.id) as order_count,
SUM(o.total_amount) as total_spent
FROM users u
LEFT JOIN orders o ON u.id = o.user_id
WHERE o.created_at > NOW() - INTERVAL '30 days'
GROUP BY u.id, u.username
HAVING COUNT(o.id) > 5
ORDER BY total_spent DESC
LIMIT 100;
-- Look for:
-- 1. Sequential Scans - Add indexes
-- 2. High actual time - Optimize query or add indexes
-- 3. Large difference between estimated and actual rows - Update statistics
-- 4. Nested loops with large datasets - Consider hash join instead
-- Update statistics
ANALYZE users;
ANALYZE orders;
-- Add necessary indexes
CREATE INDEX idx_orders_user_created ON orders(user_id, created_at)
WHERE created_at > NOW() - INTERVAL '90 days';
```
#### Optimize Joins
```sql
-- Bad: Implicit join with WHERE clause
SELECT u.username, p.title
FROM users u, posts p
WHERE u.id = p.user_id;
-- Good: Explicit JOIN
SELECT u.username, p.title
FROM users u
INNER JOIN posts p ON u.id = p.user_id;
-- Use appropriate join type
-- INNER JOIN: Only matching rows
-- LEFT JOIN: All from left, matching from right
-- RIGHT JOIN: All from right, matching from left (rare, use LEFT JOIN instead)
-- FULL OUTER JOIN: All rows from both (expensive)
-- Optimize join order (smaller table first)
SELECT p.title, u.username
FROM posts p
INNER JOIN users u ON p.user_id = u.id
WHERE p.published_at > NOW() - INTERVAL '7 days';
```
### 4. Implement Caching Strategy
```typescript
import Redis from 'ioredis';
class CachedRepository {
constructor(
private db: Database,
private cache: Redis
) {}
async getUser(userId: string): Promise<User | null> {
const cacheKey = `user:${userId}`;
// Try cache first (cache-aside pattern)
const cached = await this.cache.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// Cache miss - fetch from database
const user = await this.db.users.findById(userId);
if (user) {
// Cache for 1 hour
await this.cache.setex(cacheKey, 3600, JSON.stringify(user));
}
return user;
}
async updateUser(userId: string, data: UserData): Promise<User> {
// Update database
const user = await this.db.users.update(userId, data);
// Invalidate cache
await this.cache.del(`user:${userId}`);
return user;
}
async getUserOrders(userId: string, page: number = 1): Promise<Order[]> {
const cacheKey = `user:${userId}:orders:page:${page}`;
const cached = await this.cache.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
const orders = await this.db.orders.findByUser(userId, { page, limit: 20 });
// Cache for 5 minutes
await this.cache.setex(cacheKey, 300, JSON.stringify(orders));
return orders;
}
// Pattern: Cache warming (preload frequently accessed data)
async warmCache(): Promise<void> {
const popularProducts = await this.db.products.findPopular(100);
for (const product of popularProducts) {
const cacheKey = `product:${product.id}`;
await this.cache.setex(cacheKey, 3600, JSON.stringify(product));
}
}
// Pattern: Write-through cache (write to cache and DB simultaneously)
async createOrder(orderData: OrderData): Promise<Order> {
const order = await this.db.orders.create(orderData);
const cacheKey = `order:${order.id}`;
await this.cache.setex(cacheKey, 3600, JSON.stringify(order));
return order;
}
}
```
### 5. Design Database Migrations
```typescript
// migration-001-create-users.ts
import { MigrationInterface, QueryRunner, Table, TableIndex } from 'typeorm';
export class CreateUsers1234567890 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
// Create table
await queryRunner.createTable(
new Table({
name: 'users',
columns: [
{
name: 'id',
type: 'serial',
isPrimary: true,
},
{
name: 'email',
type: 'varchar',
length: '255',
isUnique: true,
isNullable: false,
},
{
name: 'username',
type: 'varchar',
length: '50',
isUnique: true,
isNullable: false,
},
{
name: 'created_at',
type: 'timestamp',
default: 'CURRENT_TIMESTAMP',
},
],
})
);
// Create indexes
await queryRunner.createIndex(
'users',
new TableIndex({
name: 'idx_users_email',
columnNames: ['email'],
})
);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.dropTable('users');
}
}
// migration-002-add-user-status.ts
export class AddUserStatus1234567891 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
// Safe migration: Add nullable column first
await queryRunner.query(`
ALTER TABLE users ADD COLUMN status VARCHAR(20);
`);
// Backfill data
await queryRunner.query(`
UPDATE users SET status = 'active' WHERE status IS NULL;
`);
// Add NOT NULL constraint
await queryRunner.query(`
ALTER TABLE users ALTER COLUMN status SET NOT NULL;
`);
// Add default
await queryRunner.query(`
ALTER TABLE users ALTER COLUMN status SET DEFAULT 'active';
`);
// Add check constraint
await queryRunner.query(`
ALTER TABLE users ADD CONSTRAINT check_user_status
CHECK (status IN ('active', 'inactive', 'suspended'));
`);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.query(`
ALTER TABLE users DROP CONSTRAINT check_user_status;
`);
await queryRunner.query(`
ALTER TABLE users DROP COLUMN status;
`);
}
}
```
### 6. Implement Connection Pooling
```typescript
import { Pool } from 'pg';
export class DatabasePool {
private pool: Pool;
constructor() {
this.pool = new Pool({
host: process.env.DB_HOST,
port: parseInt(process.env.DB_PORT || '5432'),
database: process.env.DB_NAME,
user: process.env.DB_USER,
password: process.env.DB_PASSWORD,
max: 20, // Maximum pool size
min: 5, // Minimum pool size
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
// Verify connection before using
application_name: 'myapp',
});
// Handle pool errors
this.pool.on('error', (err) => {
console.error('Unexpected error on idle client', err);
process.exit(-1);
});
// Monitor pool metrics
this.pool.on('connect', () => {
console.log('New client connected to pool');
});
this.pool.on('acquire', () => {
console.log('Client acquired from pool');
});
this.pool.on('remove', () => {
console.log('Client removed from pool');
});
}
async query<T>(sql: string, params?: any[]): Promise<T[]> {
const result = await this.pool.query(sql, params);
return result.rows;
}
async transaction<T>(fn: (client: PoolClient) => Promise<T>): Promise<T> {
const client = await this.pool.connect();
try {
await client.query('BEGIN');
const result = await fn(client);
await client.query('COMMIT');
return result;
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
async healthCheck(): Promise<boolean> {
try {
await this.pool.query('SELECT 1');
return true;
} catch (error) {
console.error('Database health check failed:', error);
return false;
}
}
async getPoolStatus() {
return {
total: this.pool.totalCount,
idle: this.pool.idleCount,
waiting: this.pool.waitingCount,
};
}
async close(): Promise<void> {
await this.pool.end();
}
}
```
### 7. Set Up Monitoring and Alerting
```typescript
// Database monitoring utilities
export class DatabaseMonitor {
constructor(private db: Database) {}
async getSlowQueries(minDuration: number = 1000): Promise<SlowQuery[]> {
return this.db.query(`
SELECT
calls,
total_exec_time,
mean_exec_time,
max_exec_time,
stddev_exec_time,
query
FROM pg_stat_statements
WHERE mean_exec_time > $1
ORDER BY mean_exec_time DESC
LIMIT 50
`, [minDuration]);
}
async getTableSizes(): Promise<TableSize[]> {
return this.db.query(`
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) as size,
pg_total_relation_size(schemaname||'.'||tablename) as bytes
FROM pg_tables
WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC
`);
}
async getIndexUsage(): Promise<IndexUsage[]> {
return this.db.query(`
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch,
pg_size_pretty(pg_relation_size(indexrelid)) as index_size
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC
`);
}
async getConnectionStats(): Promise<ConnectionStats> {
const [stats] = await this.db.query(`
SELECT
count(*) as total,
count(*) FILTER (WHERE state = 'active') as active,
count(*) FILTER (WHERE state = 'idle') as idle,
count(*) FILTER (WHERE state = 'idle in transaction') as idle_in_transaction
FROM pg_stat_activity
WHERE datname = current_database()
`);
return stats;
}
async getCacheHitRatio(): Promise<number> {
const [result] = await this.db.query(`
SELECT
sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) as ratio
FROM pg_statio_user_tables
`);
return result.ratio;
}
}
```
## Best Practices to Follow
### 1. Schema Design
- Normalize to reduce redundancy, denormalize for performance when needed
- Use appropriate data types
- Add constraints for data integrity
- Design for future growth
### 2. Indexing
- Index foreign keys
- Index columns used in WHERE, JOIN, ORDER BY
- Use composite indexes for multi-column queries
- Monitor and remove unused indexes
- Don't over-index (impacts write performance)
### 3. Query Optimization
- Always use EXPLAIN ANALYZE
- Avoid SELECT *, fetch only needed columns
- Use prepared statements to prevent SQL injection
- Batch operations when possible
- Use pagination for large result sets
### 4. Transactions
- Keep transactions short
- Use appropriate isolation levels
- Handle deadlocks gracefully
- Use optimistic locking for better concurrency
### 5. Caching
- Cache frequently accessed, slowly changing data
- Implement cache invalidation strategy
- Use appropriate TTLs
- Consider cache warming for critical data
### 6. Monitoring
- Track slow queries
- Monitor connection pool usage
- Alert on high resource usage
- Regular performance reviews
### 7. Security
- Use parameterized queries
- Implement row-level security when needed
- Encrypt sensitive data at rest and in transit
- Regular security audits
- Principle of least privilege for database users
## Deliverables
1. **Database Schema**
- ER diagrams
- SQL schema definitions
- Migration scripts
2. **Indexes and Constraints**
- Index definitions with rationale
- Data integrity constraints
3. **Performance Optimization**
- Query optimization recommendations
- Caching strategy
- Connection pooling configuration
4. **Monitoring Setup**
- Slow query logging
- Performance metrics dashboard
- Alerting rules
5. **Documentation**
- Schema documentation
- Query patterns and examples
- Maintenance procedures
- Backup and recovery strategy

778
commands/db-patterns.md Normal file
View File

@@ -0,0 +1,778 @@
# Database Design & Optimization Patterns
Comprehensive database design, optimization, and performance patterns for SQL and NoSQL databases.
## SQL Database Design Patterns
### Schema Design Best Practices
#### Normalization (3NF)
```sql
-- Properly normalized schema
CREATE TABLE users (
id SERIAL PRIMARY KEY,
email VARCHAR(255) UNIQUE NOT NULL,
username VARCHAR(50) UNIQUE NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE user_profiles (
user_id INTEGER PRIMARY KEY REFERENCES users(id) ON DELETE CASCADE,
first_name VARCHAR(100),
last_name VARCHAR(100),
bio TEXT,
avatar_url VARCHAR(500)
);
CREATE TABLE posts (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title VARCHAR(255) NOT NULL,
content TEXT,
published_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE tags (
id SERIAL PRIMARY KEY,
name VARCHAR(50) UNIQUE NOT NULL,
slug VARCHAR(50) UNIQUE NOT NULL
);
CREATE TABLE post_tags (
post_id INTEGER REFERENCES posts(id) ON DELETE CASCADE,
tag_id INTEGER REFERENCES tags(id) ON DELETE CASCADE,
PRIMARY KEY (post_id, tag_id)
);
```
#### Denormalization for Performance
```sql
-- Denormalized for read performance
CREATE TABLE post_view (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL,
username VARCHAR(50) NOT NULL,
user_avatar VARCHAR(500),
post_id INTEGER NOT NULL,
post_title VARCHAR(255) NOT NULL,
post_content TEXT,
post_published_at TIMESTAMP,
tags TEXT[], -- Array of tag names
comment_count INTEGER DEFAULT 0,
like_count INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Materialized view for complex aggregations
CREATE MATERIALIZED VIEW user_statistics AS
SELECT
u.id,
u.username,
COUNT(DISTINCT p.id) as post_count,
COUNT(DISTINCT c.id) as comment_count,
COUNT(DISTINCT l.id) as like_count,
MAX(p.created_at) as last_post_at
FROM users u
LEFT JOIN posts p ON u.id = p.user_id
LEFT JOIN comments c ON u.id = c.user_id
LEFT JOIN likes l ON u.id = l.user_id
GROUP BY u.id, u.username;
-- Refresh materialized view
REFRESH MATERIALIZED VIEW CONCURRENTLY user_statistics;
```
### Indexing Strategies
#### B-Tree Indexes (Default)
```sql
-- Single column index
CREATE INDEX idx_posts_user_id ON posts(user_id);
-- Composite index (order matters!)
CREATE INDEX idx_posts_user_published ON posts(user_id, published_at DESC);
-- Partial index (for specific conditions)
CREATE INDEX idx_posts_published ON posts(published_at)
WHERE published_at IS NOT NULL;
-- Unique index
CREATE UNIQUE INDEX idx_users_email_lower ON users(LOWER(email));
```
#### Specialized Indexes
```sql
-- GIN index for full-text search
CREATE INDEX idx_posts_content_fts ON posts
USING GIN(to_tsvector('english', content));
-- Search using full-text index
SELECT * FROM posts
WHERE to_tsvector('english', content) @@ to_tsquery('english', 'database & optimization');
-- JSONB GIN index
CREATE TABLE settings (
user_id INTEGER PRIMARY KEY,
preferences JSONB NOT NULL DEFAULT '{}'
);
CREATE INDEX idx_settings_preferences ON settings USING GIN(preferences);
-- Query JSONB efficiently
SELECT * FROM settings
WHERE preferences @> '{"theme": "dark"}';
-- GiST index for geometric and range types
CREATE INDEX idx_events_date_range ON events USING GIST(date_range);
-- Hash index (PostgreSQL 10+, for equality only)
CREATE INDEX idx_users_uuid ON users USING HASH(uuid);
```
#### Index Maintenance
```sql
-- Analyze index usage
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan ASC;
-- Find unused indexes
SELECT
schemaname,
tablename,
indexname
FROM pg_stat_user_indexes
WHERE idx_scan = 0
AND indexname NOT LIKE 'pg_toast%';
-- Reindex to rebuild fragmented indexes
REINDEX INDEX CONCURRENTLY idx_posts_user_id;
REINDEX TABLE CONCURRENTLY posts;
```
### Query Optimization
#### EXPLAIN ANALYZE
```sql
-- Analyze query execution plan
EXPLAIN ANALYZE
SELECT
u.username,
p.title,
COUNT(c.id) as comment_count
FROM users u
JOIN posts p ON u.id = p.user_id
LEFT JOIN comments c ON p.id = c.post_id
WHERE p.published_at > NOW() - INTERVAL '30 days'
GROUP BY u.username, p.title
ORDER BY comment_count DESC
LIMIT 10;
-- Key metrics to look for:
-- - Seq Scan vs Index Scan
-- - Nested Loop vs Hash Join vs Merge Join
-- - Actual time vs Estimated rows
-- - Buffers (shared hit ratio)
```
#### Avoiding N+1 Queries
```sql
-- Bad: N+1 query problem
-- Query 1: Get all posts
SELECT * FROM posts LIMIT 10;
-- Query 2-11: Get author for each post (N queries)
SELECT * FROM users WHERE id = ?;
-- Good: Use JOIN to fetch in one query
SELECT
p.*,
u.username,
u.email
FROM posts p
JOIN users u ON p.user_id = u.id
LIMIT 10;
-- Good: Use subquery or CTE
WITH post_authors AS (
SELECT DISTINCT user_id FROM posts LIMIT 10
)
SELECT u.* FROM users u
WHERE u.id IN (SELECT user_id FROM post_authors);
```
#### Query Optimization Techniques
```sql
-- Use EXISTS instead of COUNT when checking existence
-- Bad
SELECT * FROM users u
WHERE (SELECT COUNT(*) FROM posts WHERE user_id = u.id) > 0;
-- Good
SELECT * FROM users u
WHERE EXISTS (SELECT 1 FROM posts WHERE user_id = u.id);
-- Use DISTINCT ON for getting first row per group (PostgreSQL)
SELECT DISTINCT ON (user_id)
user_id,
created_at,
content
FROM posts
ORDER BY user_id, created_at DESC;
-- Use window functions instead of subqueries
-- Get each user's latest post
SELECT
user_id,
title,
created_at,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) as rn
FROM posts
WHERE rn = 1;
-- Batch updates instead of row-by-row
-- Bad
UPDATE posts SET view_count = view_count + 1 WHERE id = ?; -- Called N times
-- Good
UPDATE posts
SET view_count = view_count + v.increment
FROM (VALUES (1, 5), (2, 3), (3, 10)) AS v(id, increment)
WHERE posts.id = v.id;
```
### Connection Pooling
#### Node.js (pg pool)
```typescript
import { Pool } from 'pg';
const pool = new Pool({
host: 'localhost',
port: 5432,
database: 'myapp',
user: 'postgres',
password: 'password',
max: 20, // Maximum pool size
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
// Use pool for queries
async function getUserById(id: number) {
const client = await pool.connect();
try {
const result = await client.query('SELECT * FROM users WHERE id = $1', [id]);
return result.rows[0];
} finally {
client.release(); // Always release back to pool
}
}
// Or use pool.query directly (handles acquire/release)
async function getUsers() {
const result = await pool.query('SELECT * FROM users LIMIT 100');
return result.rows;
}
// Transaction with pool
async function transferFunds(fromId: number, toId: number, amount: number) {
const client = await pool.connect();
try {
await client.query('BEGIN');
await client.query(
'UPDATE accounts SET balance = balance - $1 WHERE user_id = $2',
[amount, fromId]
);
await client.query(
'UPDATE accounts SET balance = balance + $1 WHERE user_id = $2',
[amount, toId]
);
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
} finally {
client.release();
}
}
```
#### Python (SQLAlchemy)
```python
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sqlalchemy.pool import QueuePool
# Create engine with connection pool
engine = create_engine(
'postgresql://user:password@localhost/dbname',
poolclass=QueuePool,
pool_size=10,
max_overflow=20,
pool_pre_ping=True, # Verify connections before using
pool_recycle=3600, # Recycle connections after 1 hour
)
Session = sessionmaker(bind=engine)
# Use session
def get_user(user_id: int):
session = Session()
try:
user = session.query(User).filter(User.id == user_id).first()
return user
finally:
session.close()
# Context manager for automatic cleanup
from contextlib import contextmanager
@contextmanager
def get_db_session():
session = Session()
try:
yield session
session.commit()
except Exception:
session.rollback()
raise
finally:
session.close()
# Usage
with get_db_session() as session:
user = session.query(User).filter(User.id == 1).first()
user.name = "Updated Name"
```
### Database Migration Patterns
#### Migrations with TypeORM (Node.js)
```typescript
// migrations/1234567890-CreateUsers.ts
import { MigrationInterface, QueryRunner, Table } from 'typeorm';
export class CreateUsers1234567890 implements MigrationInterface {
public async up(queryRunner: QueryRunner): Promise<void> {
await queryRunner.createTable(
new Table({
name: 'users',
columns: [
{
name: 'id',
type: 'int',
isPrimary: true,
isGenerated: true,
generationStrategy: 'increment',
},
{
name: 'email',
type: 'varchar',
length: '255',
isUnique: true,
},
{
name: 'created_at',
type: 'timestamp',
default: 'CURRENT_TIMESTAMP',
},
],
})
);
await queryRunner.createIndex(
'users',
new TableIndex({
name: 'idx_users_email',
columnNames: ['email'],
})
);
}
public async down(queryRunner: QueryRunner): Promise<void> {
await queryRunner.dropTable('users');
}
}
```
#### Alembic Migrations (Python)
```python
# alembic/versions/001_create_users.py
from alembic import op
import sqlalchemy as sa
def upgrade():
op.create_table(
'users',
sa.Column('id', sa.Integer(), primary_key=True),
sa.Column('email', sa.String(255), unique=True, nullable=False),
sa.Column('username', sa.String(50), unique=True, nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.func.now()),
)
op.create_index('idx_users_email', 'users', ['email'])
def downgrade():
op.drop_index('idx_users_email')
op.drop_table('users')
```
#### Zero-Downtime Migration Strategies
```sql
-- Adding a NOT NULL column safely
-- Step 1: Add column as nullable
ALTER TABLE users ADD COLUMN phone VARCHAR(20);
-- Step 2: Backfill data in batches
UPDATE users SET phone = '000-000-0000' WHERE phone IS NULL;
-- Step 3: Add NOT NULL constraint
ALTER TABLE users ALTER COLUMN phone SET NOT NULL;
-- Renaming a column safely
-- Step 1: Add new column
ALTER TABLE users ADD COLUMN full_name VARCHAR(200);
-- Step 2: Dual-write to both columns in application code
-- Step 3: Backfill data
UPDATE users SET full_name = name WHERE full_name IS NULL;
-- Step 4: Switch reads to new column in application
-- Step 5: Drop old column
ALTER TABLE users DROP COLUMN name;
```
## NoSQL Database Patterns
### MongoDB Schema Design
#### Embedding vs Referencing
```javascript
// Embedding (One-to-Few)
{
_id: ObjectId("..."),
username: "johndoe",
email: "john@example.com",
addresses: [
{
type: "home",
street: "123 Main St",
city: "New York",
zip: "10001"
},
{
type: "work",
street: "456 Office Blvd",
city: "New York",
zip: "10002"
}
]
}
// Referencing (One-to-Many or Many-to-Many)
// Users collection
{
_id: ObjectId("user1"),
username: "johndoe",
email: "john@example.com"
}
// Posts collection
{
_id: ObjectId("post1"),
user_id: ObjectId("user1"),
title: "My Post",
content: "...",
created_at: ISODate("2024-01-01")
}
// Extended Reference Pattern (Denormalization)
{
_id: ObjectId("post1"),
user: {
_id: ObjectId("user1"),
username: "johndoe",
avatar: "https://..."
},
title: "My Post",
content: "..."
}
```
#### Compound Indexes
```javascript
// Create compound index
db.posts.createIndex({ user_id: 1, created_at: -1 });
// Index with unique constraint
db.users.createIndex({ email: 1 }, { unique: true });
// Partial index
db.orders.createIndex(
{ status: 1, created_at: -1 },
{ partialFilterExpression: { status: { $in: ["pending", "processing"] } } }
);
// Text index for full-text search
db.articles.createIndex({ title: "text", content: "text" });
// Geospatial index
db.locations.createIndex({ coordinates: "2dsphere" });
```
#### Aggregation Pipeline
```javascript
// Complex aggregation example
db.orders.aggregate([
// Stage 1: Match recent orders
{
$match: {
created_at: { $gte: new Date("2024-01-01") },
status: "completed"
}
},
// Stage 2: Lookup user data
{
$lookup: {
from: "users",
localField: "user_id",
foreignField: "_id",
as: "user"
}
},
// Stage 3: Unwind user array
{ $unwind: "$user" },
// Stage 4: Group by user and calculate totals
{
$group: {
_id: "$user._id",
username: { $first: "$user.username" },
total_orders: { $sum: 1 },
total_revenue: { $sum: "$total_amount" },
avg_order_value: { $avg: "$total_amount" }
}
},
// Stage 5: Sort by revenue
{ $sort: { total_revenue: -1 } },
// Stage 6: Limit results
{ $limit: 10 }
]);
// Use $facet for multiple aggregations in one query
db.products.aggregate([
{
$facet: {
categoryCounts: [
{ $group: { _id: "$category", count: { $sum: 1 } } }
],
priceRanges: [
{
$bucket: {
groupBy: "$price",
boundaries: [0, 25, 50, 100, 500],
default: "500+",
output: { count: { $sum: 1 } }
}
}
],
topRated: [
{ $sort: { rating: -1 } },
{ $limit: 5 }
]
}
}
]);
```
### Redis Patterns
#### Caching Strategy
```typescript
import Redis from 'ioredis';
const redis = new Redis({
host: 'localhost',
port: 6379,
retryStrategy: (times) => Math.min(times * 50, 2000),
});
// Cache-aside pattern
async function getUser(userId: string) {
const cacheKey = `user:${userId}`;
// Try cache first
const cached = await redis.get(cacheKey);
if (cached) {
return JSON.parse(cached);
}
// Cache miss - fetch from database
const user = await db.users.findById(userId);
// Store in cache with TTL
await redis.setex(cacheKey, 3600, JSON.stringify(user));
return user;
}
// Invalidate cache on update
async function updateUser(userId: string, data: UserData) {
const user = await db.users.update(userId, data);
// Invalidate cache
await redis.del(`user:${userId}`);
return user;
}
// Rate limiting with Redis
async function checkRateLimit(userId: string, limit: number, window: number) {
const key = `ratelimit:${userId}`;
const current = await redis.incr(key);
if (current === 1) {
await redis.expire(key, window);
}
return current <= limit;
}
// Usage
const allowed = await checkRateLimit('user123', 100, 60); // 100 requests per minute
if (!allowed) {
throw new Error('Rate limit exceeded');
}
// Distributed locking
async function withLock<T>(
lockKey: string,
ttl: number,
fn: () => Promise<T>
): Promise<T> {
const lockValue = crypto.randomUUID();
const acquired = await redis.set(lockKey, lockValue, 'EX', ttl, 'NX');
if (!acquired) {
throw new Error('Could not acquire lock');
}
try {
return await fn();
} finally {
// Release lock only if we still own it
const script = `
if redis.call("get", KEYS[1]) == ARGV[1] then
return redis.call("del", KEYS[1])
else
return 0
end
`;
await redis.eval(script, 1, lockKey, lockValue);
}
}
// Pub/Sub pattern
const publisher = new Redis();
const subscriber = new Redis();
subscriber.subscribe('notifications', (err, count) => {
console.log(`Subscribed to ${count} channels`);
});
subscriber.on('message', (channel, message) => {
console.log(`Received ${message} from ${channel}`);
});
publisher.publish('notifications', JSON.stringify({
type: 'new_message',
userId: '123',
content: 'Hello!'
}));
```
## Database Performance Best Practices
### 1. Use Connection Pooling
Always use connection pools to avoid connection overhead
### 2. Index Strategically
- Index foreign keys and columns used in WHERE, JOIN, ORDER BY
- Avoid over-indexing (impacts write performance)
- Use composite indexes for multi-column queries
- Monitor index usage and remove unused ones
### 3. Optimize Queries
- Use EXPLAIN to analyze query plans
- Avoid SELECT * - fetch only needed columns
- Use pagination for large result sets
- Batch operations when possible
### 4. Cache Frequently Accessed Data
- Use Redis or Memcached for hot data
- Implement cache invalidation strategy
- Consider read replicas for read-heavy workloads
### 5. Partition Large Tables
```sql
-- Range partitioning by date
CREATE TABLE events (
id SERIAL,
event_type VARCHAR(50),
data JSONB,
created_at TIMESTAMP
) PARTITION BY RANGE (created_at);
CREATE TABLE events_2024_q1 PARTITION OF events
FOR VALUES FROM ('2024-01-01') TO ('2024-04-01');
CREATE TABLE events_2024_q2 PARTITION OF events
FOR VALUES FROM ('2024-04-01') TO ('2024-07-01');
```
### 6. Monitor and Analyze
- Track slow queries
- Monitor connection pool usage
- Analyze query performance trends
- Set up alerts for anomalies
### 7. Use Appropriate Data Types
```sql
-- Good
CREATE TABLE products (
id SERIAL PRIMARY KEY,
price NUMERIC(10,2), -- Exact decimal for money
created_at TIMESTAMP WITH TIME ZONE
);
-- Bad
CREATE TABLE products (
id VARCHAR(255) PRIMARY KEY, -- Wasteful for numeric IDs
price FLOAT, -- Floating point for money (precision issues)
created_at VARCHAR(50) -- String for dates
);
```
### 8. Implement Proper Backup Strategy
- Regular automated backups
- Test restore procedures
- Use point-in-time recovery when possible
- Replicate to multiple regions for disaster recovery

49
plugin.lock.json Normal file
View File

@@ -0,0 +1,49 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:Dieshen/claude_marketplace:plugins/database-design",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "4d7bb646d96c2f580e2bc6aa57575fb20e4ea1c5",
"treeHash": "6d8032099c271048d260bef2ee78f247af22c94ba8e9efbc0ccbb8c0de264b98",
"generatedAt": "2025-11-28T10:10:23.712449Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "database-design",
"description": "Database design, optimization, and query performance patterns for SQL and NoSQL databases",
"version": "1.0.0"
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "9a1370c64a63c03f0613f35d2a847e74ba7569c950c7e9bc1c5ab10d5386c282"
},
{
"path": "agents/database-architect.md",
"sha256": "147dce90a2cd13c76bfd778851404cb26d993244e89e8fdcb3dd30121680d212"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "ac210fcd56ea61d621976930fe928acb9667a49bf4ad85d5318fe88410628567"
},
{
"path": "commands/db-patterns.md",
"sha256": "bd6a9bb583e731de6fa65e27bfd388187e31bd927ffb108efe30e06aec013b63"
}
],
"dirSha256": "6d8032099c271048d260bef2ee78f247af22c94ba8e9efbc0ccbb8c0de264b98"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}