From adfd3add64127e90a946a8ea98900e97d1e4a9db Mon Sep 17 00:00:00 2001
From: Zhongwei Li <lizhongwei.nkcs@gmail.com>
Date: Sun, 30 Nov 2025 08:59:27 +0800
Subject: [PATCH] Initial commit

---
 .claude-plugin/plugin.json                    |   12 +
 README.md                                     |    3 +
 plugin.lock.json                              |   89 ++
 skills/using-web-backend/SKILL.md             |  135 ++
 .../using-web-backend/api-authentication.md   | 1381 +++++++++++++++++
 skills/using-web-backend/api-documentation.md |  944 +++++++++++
 skills/using-web-backend/api-testing.md       | 1013 ++++++++++++
 .../using-web-backend/database-integration.md | 1117 +++++++++++++
 .../using-web-backend/django-development.md   |  890 +++++++++++
 .../using-web-backend/express-development.md  |  872 +++++++++++
 .../using-web-backend/fastapi-development.md  |  500 ++++++
 .../using-web-backend/graphql-api-design.md   |  954 ++++++++++++
 skills/using-web-backend/message-queues.md    |  993 ++++++++++++
 .../microservices-architecture.md             |  592 +++++++
 skills/using-web-backend/rest-api-design.md   |  523 +++++++
 15 files changed, 10018 insertions(+)
 create mode 100644 .claude-plugin/plugin.json
 create mode 100644 README.md
 create mode 100644 plugin.lock.json
 create mode 100644 skills/using-web-backend/SKILL.md
 create mode 100644 skills/using-web-backend/api-authentication.md
 create mode 100644 skills/using-web-backend/api-documentation.md
 create mode 100644 skills/using-web-backend/api-testing.md
 create mode 100644 skills/using-web-backend/database-integration.md
 create mode 100644 skills/using-web-backend/django-development.md
 create mode 100644 skills/using-web-backend/express-development.md
 create mode 100644 skills/using-web-backend/fastapi-development.md
 create mode 100644 skills/using-web-backend/graphql-api-design.md
 create mode 100644 skills/using-web-backend/message-queues.md
 create mode 100644 skills/using-web-backend/microservices-architecture.md
 create mode 100644 skills/using-web-backend/rest-api-design.md

diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json
new file mode 100644
index 0000000..82d14f1
--- /dev/null
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "axiom-web-backend",
+  "description": "Web backend development expertise across FastAPI, Django, Express, REST/GraphQL APIs, microservices, and production deployment patterns",
+  "version": "1.0.2",
+  "author": {
+    "name": "tachyon-beep",
+    "email": "zhongweili@tubi.tv"
+  },
+  "skills": [
+    "./skills"
+  ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..a750686
--- /dev/null
+++ b/README.md
@@ -0,0 +1,3 @@
+# axiom-web-backend
+
+Web backend development expertise across FastAPI, Django, Express, REST/GraphQL APIs, microservices, and production deployment patterns
diff --git a/plugin.lock.json b/plugin.lock.json
new file mode 100644
index 0000000..d9902b4
--- /dev/null
+++ b/plugin.lock.json
@@ -0,0 +1,89 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:tachyon-beep/skillpacks:plugins/axiom-web-backend",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "0764e5b504aacbd4da2f48bdec21358663278d74",
+    "treeHash": "364f2c0aaeb1af51d1ccf2720bc551e971fce5f23cd7f2c2b18f63f76a1f6ac7",
+    "generatedAt": "2025-11-28T10:28:31.553642Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "axiom-web-backend",
+    "description": "Web backend development expertise across FastAPI, Django, Express, REST/GraphQL APIs, microservices, and production deployment patterns",
+    "version": "1.0.2"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "f3e380170674e70ad093e5df7e66f9da0491072355d9e909bd2739bf674f46a2"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "041ef2dbea7ec879a9b9b0ad4c19907669881b319c0833a96ca9ff4c618f6cf8"
+      },
+      {
+        "path": "skills/using-web-backend/database-integration.md",
+        "sha256": "67d46b6ba3e8c054d5b5e2f0a7834de8b8449f2a3f02522bc47176f928efb41a"
+      },
+      {
+        "path": "skills/using-web-backend/api-authentication.md",
+        "sha256": "a39ba1396e008ab506196585cbac3ca1b05d22890db6dce3fee1a74ea346330f"
+      },
+      {
+        "path": "skills/using-web-backend/fastapi-development.md",
+        "sha256": "5625b5728fe03930ddc68fd37457eee2e86037e78b08fed0dd319b11d05eb40d"
+      },
+      {
+        "path": "skills/using-web-backend/rest-api-design.md",
+        "sha256": "612c2c0e04c74868b2f623c1802bcd83b4a4d10958e599a30260eb0be1f477e1"
+      },
+      {
+        "path": "skills/using-web-backend/microservices-architecture.md",
+        "sha256": "b0deaa7b2652ce38dd653402db0feb6c41dc584b3013f2d44d1e68466158d75b"
+      },
+      {
+        "path": "skills/using-web-backend/api-documentation.md",
+        "sha256": "b385ed6a4b78ac2c43cafe90cba81e944574073023a4c6b10d82cf8d48729987"
+      },
+      {
+        "path": "skills/using-web-backend/api-testing.md",
+        "sha256": "bdd989e1ce000b8cff10bfb7a60e68beb12278cb8161805e90855dca16bf4946"
+      },
+      {
+        "path": "skills/using-web-backend/message-queues.md",
+        "sha256": "82069fb6e4d2fea25c60c1ea762a5addf5a8649b67f451fa2c7f626b6601c670"
+      },
+      {
+        "path": "skills/using-web-backend/graphql-api-design.md",
+        "sha256": "5415ec8cec79237f4c60cb1c7949b7091028f669f95853af3baf15c25bf83e55"
+      },
+      {
+        "path": "skills/using-web-backend/SKILL.md",
+        "sha256": "eb0a96a46e863f347512c29ef57624657c3380f7f056023339d3679771659541"
+      },
+      {
+        "path": "skills/using-web-backend/express-development.md",
+        "sha256": "af60cd4f592ea5ba20ff4749e2337932278fb3f4b21fe36834b9a07c6c481fb5"
+      },
+      {
+        "path": "skills/using-web-backend/django-development.md",
+        "sha256": "338e5ac3e7d6cda4138618abe42c0af640497b26e74b1f0c934cf97a493ab078"
+      }
+    ],
+    "dirSha256": "364f2c0aaeb1af51d1ccf2720bc551e971fce5f23cd7f2c2b18f63f76a1f6ac7"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
\ No newline at end of file
diff --git a/skills/using-web-backend/SKILL.md b/skills/using-web-backend/SKILL.md
new file mode 100644
index 0000000..aad2b1e
--- /dev/null
+++ b/skills/using-web-backend/SKILL.md
@@ -0,0 +1,135 @@
+---
+name: using-web-backend
+description: Use when building web APIs, backend services, or encountering FastAPI/Django/Express/GraphQL questions, microservices architecture, authentication, or message queues - routes to 11 specialist skills rather than giving surface-level generic advice
+---
+
+# Using Web Backend Skills
+
+## Overview
+
+**This router directs you to specialized web backend skills. Each specialist provides deep expertise in their domain.**
+
+**Core principle:** Different backend challenges require different specialist knowledge. Routing to the right skill gives better results than generic advice.
+
+## When to Use
+
+Use this router when encountering:
+
+- **Framework-specific questions**: FastAPI, Django, Express implementation details
+- **API design**: REST or GraphQL architecture, versioning, schema design
+- **Architecture patterns**: Microservices, message queues, event-driven systems
+- **Backend infrastructure**: Authentication, database integration, deployment
+- **Testing & documentation**: API testing strategies, documentation approaches
+
+## Quick Reference - Routing Table
+
+| User Question Contains | Route To | Why |
+|------------------------|----------|-----|
+| FastAPI, Pydantic, async Python APIs | [fastapi-development.md](fastapi-development.md) | FastAPI-specific patterns, dependency injection, async |
+| Django, ORM, views, middleware | [django-development.md](django-development.md) | Django conventions, ORM optimization, settings |
+| Express, Node.js backend, middleware | [express-development.md](express-development.md) | Express patterns, error handling, async flow |
+| REST API, endpoints, versioning, pagination | [rest-api-design.md](rest-api-design.md) | REST principles, resource design, hypermedia |
+| GraphQL, schema, resolvers, N+1 | [graphql-api-design.md](graphql-api-design.md) | Schema design, query optimization, federation |
+| Microservices, service mesh, boundaries | [microservices-architecture.md](microservices-architecture.md) | Service design, communication, consistency |
+| Message queues, RabbitMQ, Kafka, events | [message-queues.md](message-queues.md) | Queue patterns, reliability, event-driven |
+| JWT, OAuth2, API keys, auth | [api-authentication.md](api-authentication.md) | Auth patterns, token management, security |
+| Database connections, ORM, migrations | [database-integration.md](database-integration.md) | Connection pooling, query optimization, migrations |
+| API testing, integration tests, mocking | [api-testing.md](api-testing.md) | Testing strategies, contract testing, mocking |
+| OpenAPI, Swagger, API docs | [api-documentation.md](api-documentation.md) | API docs (also see: muna-technical-writer) |
+
+## Cross-References to Other Packs
+
+**Before routing, check if these packs are more appropriate:**
+
+- **Security concerns** → `ordis-security-architect` (threat modeling, OWASP, security patterns)
+- **API usability/UX** → `lyra-ux-designer` (error messages, API ergonomics)
+- **Python code patterns** → `axiom-python-engineering` (Python-specific engineering)
+- **Documentation writing** → `muna-technical-writer` (technical writing, clarity)
+
+## How to Route
+
+**STOP: Do not attempt to answer web backend questions yourself.**
+
+**Instead:**
+
+1. **Identify the specialist domain** from the routing table above
+2. **State which specialist skill you're using**: "I'll use the `[skill-name]` skill for this"
+3. **Apply the specialist skill** to provide deep, expert guidance
+
+## Red Flags - Using Generic Knowledge Instead of Specialists
+
+If you catch yourself doing any of these, STOP and route to a specialist:
+
+- ❌ "I'll answer all these diverse backend questions myself"
+- ❌ Giving code examples from "general backend knowledge"
+- ❌ Providing "competent but broad" answers across multiple domains
+- ❌ Not mentioning specialized skills would give better guidance
+- ❌ "The specialist skill isn't available right now, so I'll answer myself"
+
+**All of these mean: You're being a jack-of-all-trades. Route to the specialist.**
+
+## Rationalization Table
+
+| Excuse | Reality |
+|--------|---------|
+| "I can handle this backend question myself" | Specialists provide deeper expertise than generic knowledge |
+| "These are all backend topics" | Backend spans 11 distinct specialist domains |
+| "I'll just give a quick answer" | Quick answers miss edge cases and production patterns |
+| "The user didn't ask for specialized help" | Better results come from routing to specialists proactively |
+| "Specialist skill not available, I'll answer instead" | State which skill would help, explain routing, guide user to install if needed |
+
+## Example Routing
+
+**User**: "I need to build a REST API with FastAPI. How should I structure my dependency injection?"
+
+**Your response**:
+"I'll use the [fastapi-development.md](fastapi-development.md) skill to help you structure dependency injection properly. This specialist skill covers FastAPI patterns in depth."
+
+[Then apply fastapi-development skill]
+
+**User**: "My GraphQL API has N+1 problems and we're splitting our monolith into microservices."
+
+**Your response**:
+"You have two distinct specialist areas:
+1. GraphQL optimization → I'll use [graphql-api-design.md](graphql-api-design.md)
+2. Microservices architecture → I'll use [microservices-architecture.md](microservices-architecture.md)
+
+Let me address the GraphQL N+1 problem first with the graphql-api-design skill..."
+
+[Apply each specialist skill to its domain]
+
+## Why This Matters
+
+**Without routing**: Surface-level answers covering multiple domains broadly
+**With routing**: Deep expertise addressing edge cases, production patterns, and domain-specific best practices
+
+Specialist skills = better results.
+
+---
+
+## Web Backend Specialist Skills Catalog
+
+After routing, load the appropriate specialist skill for detailed guidance:
+
+### Framework-Specific Skills
+
+1. [fastapi-development.md](fastapi-development.md) - FastAPI patterns, dependency injection, async/await, Pydantic validation, background tasks
+2. [django-development.md](django-development.md) - Django conventions, ORM optimization, middleware, settings, management commands
+3. [express-development.md](express-development.md) - Express patterns, middleware chains, error handling, async flow control
+
+### API Design Skills
+
+4. [rest-api-design.md](rest-api-design.md) - REST principles, resource design, versioning, pagination, HATEOAS, HTTP semantics
+5. [graphql-api-design.md](graphql-api-design.md) - GraphQL schema design, resolver patterns, N+1 query optimization, federation
+
+### Architecture & Infrastructure
+
+6. [microservices-architecture.md](microservices-architecture.md) - Service boundaries, communication patterns, distributed consistency, service mesh
+7. [message-queues.md](message-queues.md) - Queue patterns, reliability guarantees, event-driven architecture, RabbitMQ/Kafka
+
+### Cross-Cutting Concerns
+
+8. [api-authentication.md](api-authentication.md) - JWT, OAuth2, API keys, token management, auth patterns
+9. [database-integration.md](database-integration.md) - Connection pooling, query optimization, migrations, ORM patterns
+10. [api-testing.md](api-testing.md) - Testing strategies, contract testing, integration tests, mocking
+11. [api-documentation.md](api-documentation.md) - OpenAPI/Swagger, API documentation patterns, schema generation
diff --git a/skills/using-web-backend/api-authentication.md b/skills/using-web-backend/api-authentication.md
new file mode 100644
index 0000000..ba0bff0
--- /dev/null
+++ b/skills/using-web-backend/api-authentication.md
@@ -0,0 +1,1381 @@
+
+# API Authentication
+
+## Overview
+
+**API authentication specialist covering token patterns, OAuth2 flows, security hardening, compliance, monitoring, and production operations.**
+
+**Core principle**: Authentication proves identity; authorization controls access - implement defense-in-depth with short-lived tokens, secure storage, rotation, monitoring, and assume breach to minimize blast radius.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Authentication strategy**: JWT vs sessions vs OAuth2 vs API keys
+- **OAuth2 flows**: Authorization Code, PKCE, Client Credentials, token exchange
+- **Token security**: Storage, rotation, revocation, theft detection
+- **Service-to-service**: mTLS, service mesh, zero-trust
+- **Mobile auth**: Secure storage, biometrics, certificate pinning
+- **Security hardening**: Rate limiting, abuse prevention, anomaly detection
+- **Monitoring**: Auth metrics, distributed tracing, audit logs
+- **Compliance**: GDPR, PCI-DSS, SOC 2, audit trails
+- **Multi-tenancy**: Tenant isolation, per-tenant policies
+- **Testing**: Mock auth, development workflows
+
+**Do NOT use for**:
+- Application-specific business logic → Use domain skills
+- Infrastructure security (firewalls, IDS) → `ordis-security-architect`
+- Frontend auth UI → `lyra-ux-designer`
+
+## Quick Reference - Authentication Patterns
+
+| Pattern | Use Case | Security | Complexity | Revocation |
+|---------|----------|----------|------------|------------|
+| **JWT** | Mobile apps, APIs | Medium | Low | Hard (requires blacklist) |
+| **Sessions** | Web apps, admin panels | High | Medium | Easy (delete session) |
+| **OAuth2** | Third-party access, SSO | High | High | Medium (refresh rotation) |
+| **API Keys** | Service-to-service, webhooks | Medium | Low | Easy (rotate keys) |
+| **mTLS** | Service mesh, zero-trust | Very High | High | Medium (cert revocation) |
+
+## JWT vs Sessions Decision Matrix
+
+| Factor | JWT | Server-Side Sessions | Winner |
+|--------|-----|---------------------|--------|
+| **Mobile apps** | Excellent (stateless) | Poor (sticky sessions needed) | JWT |
+| **Horizontal scaling** | Excellent (no shared state) | Requires sticky sessions or Redis | JWT |
+| **Revocation** | Poor (need blacklist or short TTL) | Excellent (delete session) | Sessions |
+| **Payload size** | Large (sent every request) | Small (session ID only) | Sessions |
+| **Server memory** | None (stateless) | High (session store) | JWT |
+| **XSS vulnerability** | High (if stored in localStorage) | Low (httpOnly cookies) | Sessions |
+| **CSRF vulnerability** | None (bearer token) | High (requires CSRF tokens) | JWT |
+
+**Production Recommendation**: **Hybrid Approach**
+
+```
+Architecture:
+- Short-lived JWTs (15 min) for API access
+- Long-lived refresh tokens stored server-side (session-like)
+- Refresh endpoint returns new JWT + rotates refresh token
+
+Benefits:
+- Stateless API access (JWT)
+- Secure revocation (server-side refresh tokens)
+- Mobile-friendly (no cookies required)
+- Horizontal scaling (minimal session state)
+```
+
+## OAuth2 Grant Types
+
+### Grant Type Selection Matrix
+
+| Client Type | Grant Type | Security | Use Case |
+|-------------|-----------|----------|----------|
+| **Web app (server-side)** | Authorization Code + PKCE | High | User login with backend |
+| **SPA** | Authorization Code + PKCE | Medium-High | React/Vue/Angular apps |
+| **Mobile app** | Authorization Code + PKCE | High | iOS/Android apps |
+| **Service-to-service** | Client Credentials | High | Background jobs, APIs |
+| **Device** | Device Authorization Grant | Medium | Smart TV, IoT devices |
+| **Legacy** | ~~Password Grant~~ | DEPRECATED | Don't use |
+
+### Authorization Code + PKCE (RFC 7636)
+
+**Why PKCE?** Prevents authorization code interception attacks
+
+```javascript
+// Step 1: Generate PKCE challenge
+const codeVerifier = crypto.randomBytes(32).toString('base64url');
+const codeChallenge = crypto
+  .createHash('sha256')
+  .update(codeVerifier)
+  .digest('base64url');
+
+// Step 2: Redirect to authorization endpoint
+const authUrl = new URL('https://auth.example.com/authorize');
+authUrl.searchParams.set('response_type', 'code');
+authUrl.searchParams.set('client_id', 'your_client_id');
+authUrl.searchParams.set('redirect_uri', 'https://yourapp.com/callback');
+authUrl.searchParams.set('scope', 'read write offline_access');
+authUrl.searchParams.set('code_challenge', codeChallenge);
+authUrl.searchParams.set('code_challenge_method', 'S256');
+authUrl.searchParams.set('state', generateStateToken());  // CSRF protection
+
+// Step 3: Exchange code for token
+const tokenResponse = await fetch('https://auth.example.com/token', {
+  method: 'POST',
+  headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+  body: new URLSearchParams({
+    grant_type: 'authorization_code',
+    code: receivedCode,
+    redirect_uri: 'https://yourapp.com/callback',
+    client_id: 'your_client_id',
+    code_verifier: codeVerifier  // Proves you initiated the flow
+  })
+});
+
+// Response
+{
+  "access_token": "eyJhbGc...",
+  "token_type": "Bearer",
+  "expires_in": 900,
+  "refresh_token": "zxcvbnm...",
+  "scope": "read write offline_access"
+}
+```
+
+### Client Credentials (Service-to-Service)
+
+```javascript
+const tokenResponse = await fetch('https://auth.example.com/token', {
+  method: 'POST',
+  headers: {
+    'Content-Type': 'application/x-www-form-urlencoded',
+    'Authorization': `Basic ${base64(client_id + ':' + client_secret)}`
+  },
+  body: new URLSearchParams({
+    grant_type: 'client_credentials',
+    scope: 'api.read api.write',
+    audience: 'https://api.example.com'
+  })
+});
+```
+
+## Token Storage Security
+
+### Storage Security Matrix
+
+| Storage Location | XSS Risk | CSRF Risk | Accessible to JS | Production Use |
+|------------------|----------|-----------|------------------|----------------|
+| **localStorage** | ❌ HIGH | ✅ None | Yes | NEVER for tokens |
+| **sessionStorage** | ❌ HIGH | ✅ None | Yes | NEVER for tokens |
+| **Memory only** | ✅ None | ✅ None | Yes (in-app) | ✅ Access tokens (SPA) |
+| **httpOnly cookie** | ✅ None | ❌ HIGH | No | ✅ Refresh tokens (+SameSite) |
+| **Secure + httpOnly + SameSite=Strict** | ✅ None | ✅ Low | No | ✅ BEST for web |
+| **iOS Keychain** | ✅ None | ✅ N/A | No (secure enclave) | ✅ Mobile apps |
+| **Android Keystore** | ✅ None | ✅ N/A | No (hardware-backed) | ✅ Mobile apps |
+
+### Web App Pattern (BFF - Backend For Frontend)
+
+```javascript
+// Frontend - access token in memory only
+class AuthService {
+  #accessToken = null;  // Private field, lost on refresh
+
+  async callAPI(endpoint) {
+    if (!this.#accessToken || this.isExpired(this.#accessToken)) {
+      this.#accessToken = await this.refreshAccessToken();
+    }
+
+    return fetch(endpoint, {
+      headers: { 'Authorization': `Bearer ${this.#accessToken}` }
+    });
+  }
+
+  async refreshAccessToken() {
+    // Calls BFF, which reads httpOnly cookie
+    const response = await fetch('/api/auth/refresh', {
+      method: 'POST',
+      credentials: 'include'  // Send httpOnly cookie
+    });
+
+    const { access_token } = await response.json();
+    return access_token;
+  }
+}
+
+// Backend (BFF) - refresh endpoint
+app.post('/api/auth/refresh', async (req, res) => {
+  const refreshToken = req.cookies.refresh_token;  // httpOnly cookie
+
+  // Validate and rotate refresh token
+  const newTokens = await rotateRefreshToken(refreshToken);
+
+  // Set new httpOnly cookie
+  res.cookie('refresh_token', newTokens.refresh_token, {
+    httpOnly: true,
+    secure: true,
+    sameSite: 'strict',
+    maxAge: 7 * 24 * 60 * 60 * 1000  // 7 days
+  });
+
+  res.json({ access_token: newTokens.access_token, expires_in: 900 });
+});
+```
+
+### Mobile App Pattern
+
+```swift
+// iOS - Keychain storage
+import Security
+
+class TokenStorage {
+    func saveToken(_ token: String, forKey key: String) {
+        let data = token.data(using: .utf8)!
+
+        let query: [String: Any] = [
+            kSecClass as String: kSecClassGenericPassword,
+            kSecAttrAccount as String: key,
+            kSecValueData as String: data,
+            kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlockedThisDeviceOnly
+        ]
+
+        SecItemDelete(query as CFDictionary)  // Delete old
+        SecItemAdd(query as CFDictionary, nil)  // Add new
+    }
+
+    func getToken(forKey key: String) -> String? {
+        let query: [String: Any] = [
+            kSecClass as String: kSecClassGenericPassword,
+            kSecAttrAccount as String: key,
+            kSecReturnData as String: true
+        ]
+
+        var result: AnyObject?
+        SecItemCopyMatching(query as CFDictionary, &result)
+
+        guard let data = result as? Data else { return nil }
+        return String(data: data, encoding: .utf8)
+    }
+}
+```
+
+```kotlin
+// Android - EncryptedSharedPreferences
+import androidx.security.crypto.EncryptedSharedPreferences
+import androidx.security.crypto.MasterKey
+
+class TokenStorage(context: Context) {
+    private val masterKey = MasterKey.Builder(context)
+        .setKeyScheme(MasterKey.KeyScheme.AES256_GCM)
+        .build()
+
+    private val prefs = EncryptedSharedPreferences.create(
+        context,
+        "secure_prefs",
+        masterKey,
+        EncryptedSharedPreferences.PrefKeyEncryptionScheme.AES256_SIV,
+        EncryptedSharedPreferences.PrefValueEncryptionScheme.AES256_GCM
+    )
+
+    fun saveToken(key: String, token: String) {
+        prefs.edit().putString(key, token).apply()
+    }
+
+    fun getToken(key: String): String? {
+        return prefs.getString(key, null)
+    }
+}
+```
+
+## Refresh Token Rotation
+
+### Pattern: Token Families with Replay Detection
+
+```javascript
+// Database schema
+CREATE TABLE refresh_tokens (
+  token_hash VARCHAR(64) PRIMARY KEY,
+  user_id UUID NOT NULL,
+  family_id UUID NOT NULL,
+  parent_token_hash VARCHAR(64),
+  device_id VARCHAR(255),
+  ip_address INET,
+  user_agent TEXT,
+  created_at TIMESTAMP NOT NULL,
+  expires_at TIMESTAMP NOT NULL,
+  revoked BOOLEAN DEFAULT false,
+  revoked_at TIMESTAMP,
+  revoked_reason TEXT,
+  INDEX idx_family (family_id),
+  INDEX idx_user (user_id),
+  INDEX idx_expires (expires_at)
+);
+
+// Refresh endpoint with rotation
+async function refreshTokens(refreshToken, clientInfo) {
+  const tokenHash = sha256(refreshToken);
+  const dbToken = await db.query(
+    'SELECT * FROM refresh_tokens WHERE token_hash = $1',
+    [tokenHash]
+  );
+
+  // Case 1: Token not found or already revoked
+  if (!dbToken || dbToken.revoked) {
+    // Check if this token existed in history
+    const historical = await db.query(
+      'SELECT family_id FROM refresh_tokens WHERE token_hash = $1',
+      [tokenHash]
+    );
+
+    if (historical.length > 0) {
+      // REPLAY ATTACK DETECTED!
+      // Revoke entire token family
+      await db.query(
+        'UPDATE refresh_tokens SET revoked = true, revoked_at = NOW(), ' +
+        'revoked_reason = $1 WHERE family_id = $2',
+        ['Replay attack detected', historical[0].family_id]
+      );
+
+      await auditLog.critical({
+        event: 'token_replay_attack',
+        user_id: historical[0].user_id,
+        family_id: historical[0].family_id,
+        ip: clientInfo.ip
+      });
+
+      throw new SecurityError('Token reuse detected - all sessions revoked');
+    }
+
+    throw new AuthError('Invalid refresh token');
+  }
+
+  // Case 2: Token expired
+  if (dbToken.expires_at < new Date()) {
+    throw new AuthError('Refresh token expired');
+  }
+
+  // Case 3: Valid token - rotate it
+  const newRefreshToken = crypto.randomBytes(32).toString('base64url');
+  const newAccessToken = generateJWT({
+    sub: dbToken.user_id,
+    scopes: ['read', 'write'],
+    exp: Math.floor(Date.now() / 1000) + 900  // 15 min
+  });
+
+  // Revoke current token
+  await db.query(
+    'UPDATE refresh_tokens SET revoked = true WHERE token_hash = $1',
+    [tokenHash]
+  );
+
+  // Create new token in same family
+  await db.query(
+    'INSERT INTO refresh_tokens ' +
+    '(token_hash, user_id, family_id, parent_token_hash, device_id, ' +
+    'ip_address, user_agent, created_at, expires_at) ' +
+    'VALUES ($1, $2, $3, $4, $5, $6, $7, NOW(), NOW() + INTERVAL \'7 days\')',
+    [
+      sha256(newRefreshToken),
+      dbToken.user_id,
+      dbToken.family_id,      // Same family
+      tokenHash,               // Track lineage
+      clientInfo.device_id,
+      clientInfo.ip,
+      clientInfo.user_agent
+    ]
+  );
+
+  return {
+    access_token: newAccessToken,
+    refresh_token: newRefreshToken,
+    expires_in: 900,
+    token_type: 'Bearer'
+  };
+}
+```
+
+### Advanced Refresh Patterns
+
+**Absolute expiry** (max lifetime regardless of rotation):
+
+```javascript
+// Add max_family_age to family tracking
+CREATE TABLE token_families (
+  family_id UUID PRIMARY KEY,
+  user_id UUID NOT NULL,
+  created_at TIMESTAMP NOT NULL,
+  max_lifetime_hours INT DEFAULT 720,  // 30 days max
+  INDEX idx_user (user_id)
+);
+
+// Check absolute expiry
+const familyAge = Date.now() - family.created_at;
+const maxAge = family.max_lifetime_hours * 60 * 60 * 1000;
+
+if (familyAge > maxAge) {
+  throw new AuthError('Session expired - please re-authenticate');
+}
+```
+
+**Grace period for concurrent requests**:
+
+```javascript
+// Allow small window for race conditions
+const ROTATION_GRACE_PERIOD_MS = 5000;  // 5 seconds
+
+if (dbToken.revoked && dbToken.revoked_at) {
+  const timeSinceRevocation = Date.now() - dbToken.revoked_at;
+
+  if (timeSinceRevocation < ROTATION_GRACE_PERIOD_MS) {
+    // Within grace period - might be concurrent refresh
+    // Return cached new tokens instead of replay alert
+    const newTokens = await getChildToken(tokenHash);
+    if (newTokens) return newTokens;
+  }
+
+  // Outside grace period - likely replay attack
+  await revokeTokenFamily(dbToken.family_id);
+}
+```
+
+## Rate Limiting & Abuse Prevention
+
+### Authentication Endpoint Rate Limits
+
+```javascript
+const rateLimit = require('express-rate-limit');
+const RedisStore = require('rate-limit-redis');
+
+// Login endpoint - strict limits
+const loginLimiter = rateLimit({
+  store: new RedisStore({ client: redisClient }),
+  windowMs: 15 * 60 * 1000,  // 15 minutes
+  max: 5,  // Max 5 attempts
+  message: 'Too many login attempts, please try again later',
+  keyGenerator: (req) => {
+    // Rate limit by IP + username combination
+    return `login:${req.ip}:${req.body.username}`;
+  },
+  handler: (req, res) => {
+    auditLog.warning({
+      event: 'rate_limit_exceeded',
+      endpoint: '/auth/login',
+      ip: req.ip,
+      username: req.body.username
+    });
+
+    res.status(429).json({
+      error: 'rate_limit_exceeded',
+      retry_after: res.getHeader('Retry-After')
+    });
+  }
+});
+
+app.post('/auth/login', loginLimiter, async (req, res) => {
+  // Login logic
+});
+
+// Refresh endpoint - moderate limits
+const refreshLimiter = rateLimit({
+  store: new RedisStore({ client: redisClient }),
+  windowMs: 60 * 1000,  // 1 minute
+  max: 10,  // 10 refreshes per minute
+  keyGenerator: (req) => `refresh:${req.ip}`
+});
+
+app.post('/auth/refresh', refreshLimiter, async (req, res) => {
+  // Refresh logic
+});
+```
+
+### Account Lockout After Failed Attempts
+
+```javascript
+async function attemptLogin(username, password, clientInfo) {
+  const lockoutKey = `lockout:${username}`;
+  const attemptsKey = `attempts:${username}`;
+
+  // Check if account is locked
+  const lockedUntil = await redis.get(lockoutKey);
+  if (lockedUntil && Date.now() < parseInt(lockedUntil)) {
+    throw new AuthError('Account temporarily locked due to failed login attempts');
+  }
+
+  // Verify credentials
+  const user = await db.findUser(username);
+  const valid = await bcrypt.compare(password, user.password_hash);
+
+  if (!valid) {
+    // Increment failed attempts
+    const attempts = await redis.incr(attemptsKey);
+    await redis.expire(attemptsKey, 15 * 60);  // 15 min window
+
+    if (attempts >= 5) {
+      // Lock account for 30 minutes
+      const lockUntil = Date.now() + 30 * 60 * 1000;
+      await redis.set(lockoutKey, lockUntil.toString(), 'EX', 30 * 60);
+
+      await auditLog.warning({
+        event: 'account_locked',
+        user_id: user.id,
+        attempts,
+        ip: clientInfo.ip
+      });
+
+      throw new AuthError('Account locked due to too many failed attempts');
+    }
+
+    throw new AuthError('Invalid credentials');
+  }
+
+  // Success - clear attempts
+  await redis.del(attemptsKey);
+
+  // Check for anomalies
+  await detectAnomalies(user.id, clientInfo);
+
+  return generateTokens(user);
+}
+```
+
+### Anomaly Detection
+
+```javascript
+async function detectAnomalies(userId, clientInfo) {
+  // Get user's login history
+  const recentLogins = await db.query(
+    'SELECT ip_address, country, city FROM login_history ' +
+    'WHERE user_id = $1 AND created_at > NOW() - INTERVAL \'30 days\' ' +
+    'ORDER BY created_at DESC LIMIT 100',
+    [userId]
+  );
+
+  // Check for new location
+  const knownLocations = new Set(recentLogins.map(l => `${l.country}:${l.city}`));
+  const currentLocation = `${clientInfo.country}:${clientInfo.city}`;
+
+  if (!knownLocations.has(currentLocation)) {
+    // New location - require additional verification
+    await sendSecurityAlert(userId, {
+      type: 'new_location',
+      location: currentLocation,
+      ip: clientInfo.ip
+    });
+
+    // Could require:
+    // - Email verification
+    // - 2FA challenge
+    // - Security question
+    // - Temporary session with limited access
+  }
+
+  // Check for impossible travel
+  if (recentLogins.length > 0) {
+    const lastLogin = recentLogins[0];
+    const timeDiff = Date.now() - lastLogin.created_at;
+    const distance = calculateDistance(
+      lastLogin.country,
+      clientInfo.country
+    );
+
+    // If 500+ km traveled in < 1 hour, flag as suspicious
+    if (distance > 500 && timeDiff < 60 * 60 * 1000) {
+      await auditLog.warning({
+        event: 'impossible_travel',
+        user_id: userId,
+        from: lastLogin.country,
+        to: clientInfo.country,
+        time_diff_minutes: timeDiff / 60000
+      });
+
+      // Require step-up authentication
+      return { require_2fa: true };
+    }
+  }
+}
+```
+
+## Monitoring & Observability
+
+### Key Metrics to Track
+
+| Metric | Alert Threshold | Why It Matters |
+|--------|----------------|----------------|
+| **Login success rate** | < 80% | Credentials issues, attacks |
+| **Token refresh failures** | > 5% | Rotation bugs, clock skew |
+| **Rate limit hits** | > 100/hour | Brute force attempts |
+| **Account lockouts** | > 10/hour | Credential stuffing attack |
+| **Token replay attempts** | > 0 | Security breach |
+| **Failed 2FA attempts** | > 3/user/day | Account compromise |
+| **New device logins** | Monitor trends | Unusual activity |
+| **p99 auth latency** | > 500ms | Performance degradation |
+
+### Distributed Tracing for Auth Flows
+
+```javascript
+const { trace, context } = require('@opentelemetry/api');
+
+const tracer = trace.getTracer('auth-service');
+
+async function handleLogin(req, res) {
+  return tracer.startActiveSpan('auth.login', async (span) => {
+    span.setAttribute('user.username', req.body.username);
+    span.setAttribute('client.ip', req.ip);
+    span.setAttribute('client.user_agent', req.headers['user-agent']);
+
+    try {
+      // Nested span for credential validation
+      const user = await tracer.startActiveSpan('auth.validate_credentials', async (validateSpan) => {
+        const result = await validateCredentials(req.body.username, req.body.password);
+        validateSpan.setAttribute('validation.success', !!result);
+        validateSpan.end();
+        return result;
+      });
+
+      if (!user) {
+        span.setAttribute('auth.result', 'invalid_credentials');
+        throw new AuthError('Invalid credentials');
+      }
+
+      // Nested span for token generation
+      const tokens = await tracer.startActiveSpan('auth.generate_tokens', async (tokenSpan) => {
+        const result = await generateTokens(user);
+        tokenSpan.setAttribute('tokens.access_expiry', result.expires_in);
+        tokenSpan.end();
+        return result;
+      });
+
+      span.setAttribute('auth.result', 'success');
+      span.setAttribute('user.id', user.id);
+
+      res.json(tokens);
+    } catch (error) {
+      span.recordException(error);
+      span.setAttribute('auth.result', 'error');
+      throw error;
+    } finally {
+      span.end();
+    }
+  });
+}
+
+// Trace shows:
+// auth.login (500ms)
+//   ├── auth.validate_credentials (300ms)  // DB query
+//   ├── auth.generate_tokens (50ms)        // JWT signing
+//   └── auth.audit_log (150ms)             // Logging
+
+// Can identify bottlenecks:
+// - Slow password hashing (increase bcrypt rounds?)
+// - Slow DB queries (add indexes?)
+// - Network latency to Redis
+```
+
+### Audit Logging
+
+```javascript
+class AuditLogger {
+  async log(event) {
+    const entry = {
+      timestamp: new Date().toISOString(),
+      event_type: event.type,
+      user_id: event.user_id,
+      ip_address: event.ip,
+      user_agent: event.user_agent,
+      resource: event.resource,
+      action: event.action,
+      result: event.result,
+      metadata: event.metadata,
+      trace_id: context.active().getValue('trace_id')
+    };
+
+    // Write to multiple destinations
+    await Promise.all([
+      // 1. Append-only audit table (compliance)
+      db.query('INSERT INTO audit_log (...) VALUES (...)', entry),
+
+      // 2. Time-series database (analytics)
+      influxdb.write('auth_events', entry),
+
+      // 3. SIEM (security monitoring)
+      siem.send(entry),
+
+      // 4. Compliance log (immutable, encrypted)
+      complianceLog.append(encrypt(entry))
+    ]);
+  }
+
+  async critical(event) {
+    await this.log({ ...event, severity: 'critical' });
+
+    // Alert on critical events
+    await alerting.send({
+      title: `Critical Auth Event: ${event.event_type}`,
+      details: event,
+      severity: 'critical'
+    });
+  }
+}
+
+// Usage
+await auditLog.log({
+  type: 'login_success',
+  user_id: user.id,
+  ip: req.ip,
+  user_agent: req.headers['user-agent'],
+  result: 'success'
+});
+
+await auditLog.critical({
+  type: 'token_replay_attack',
+  user_id: user.id,
+  family_id: token.family_id,
+  ip: req.ip
+});
+```
+
+## Multi-Tenancy Patterns
+
+### Tenant Isolation in Tokens
+
+```javascript
+// JWT with tenant claim
+const accessToken = jwt.sign({
+  sub: user.id,
+  tenant_id: user.tenant_id,        // Tenant isolation
+  tenant_tier: tenant.tier,         // For rate limiting
+  roles: user.roles,                // ['admin', 'user']
+  scopes: ['read:orders', 'write:orders'],
+  iss: 'https://auth.example.com',
+  aud: 'https://api.example.com',
+  exp: Math.floor(Date.now() / 1000) + 900
+}, privateKey, { algorithm: 'RS256' });
+
+// Middleware to enforce tenant isolation
+function tenantIsolation(req, res, next) {
+  const token = verifyJWT(req.headers.authorization);
+
+  // Extract tenant from token
+  req.tenant_id = token.tenant_id;
+
+  // Add tenant filter to all DB queries
+  req.dbFilter = { tenant_id: req.tenant_id };
+
+  next();
+}
+
+// All queries automatically filtered
+app.get('/orders', tenantIsolation, async (req, res) => {
+  // Automatically filtered by tenant
+  const orders = await db.query(
+    'SELECT * FROM orders WHERE tenant_id = $1',
+    [req.tenant_id]
+  );
+  res.json(orders);
+});
+```
+
+### Per-Tenant Rate Limits
+
+```javascript
+const getTenantRateLimit = (tier) => {
+  const limits = {
+    free: { windowMs: 60000, max: 100 },      // 100/min
+    pro: { windowMs: 60000, max: 1000 },      // 1000/min
+    enterprise: { windowMs: 60000, max: 10000 } // 10k/min
+  };
+  return limits[tier] || limits.free;
+};
+
+app.use(async (req, res, next) => {
+  const token = verifyJWT(req.headers.authorization);
+  const tenant = await getTenant(token.tenant_id);
+
+  const limit = getTenantRateLimit(tenant.tier);
+
+  // Apply tenant-specific rate limit
+  const limiter = rateLimit({
+    ...limit,
+    keyGenerator: () => `api:${tenant.id}`
+  });
+
+  limiter(req, res, next);
+});
+```
+
+## Service-to-Service Authentication
+
+### Zero-Trust Architecture
+
+```
+Principles:
+1. Never trust, always verify
+2. Assume breach
+3. Verify explicitly (identity + device + location)
+4. Least privilege access
+5. Micro-segmentation
+```
+
+### Mutual TLS (mTLS) Pattern
+
+```yaml
+# Kubernetes with cert-manager
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: service-a-cert
+spec:
+  secretName: service-a-tls
+  issuerRef:
+    name: internal-ca
+    kind: ClusterIssuer
+  dnsNames:
+  - service-a.default.svc.cluster.local
+  usages:
+  - digital signature
+  - key encipherment
+  - client auth  # Client authentication
+  - server auth  # Server authentication
+
+# Service configuration
+apiVersion: v1
+kind: Service
+metadata:
+  name: service-b
+  annotations:
+    service.alpha.kubernetes.io/app-protocols: '{"https":"HTTPS"}'
+spec:
+  ports:
+  - port: 443
+    protocol: TCP
+    targetPort: 8443
+
+# Pod configuration
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: service-a
+spec:
+  template:
+    spec:
+      containers:
+      - name: app
+        volumeMounts:
+        - name: tls
+          mountPath: /etc/tls
+          readOnly: true
+      volumes:
+      - name: tls
+        secret:
+          secretName: service-a-tls
+```
+
+```javascript
+// Node.js client with mTLS
+const https = require('https');
+const fs = require('fs');
+
+const options = {
+  hostname: 'service-b.default.svc.cluster.local',
+  port: 443,
+  path: '/api/orders',
+  method: 'GET',
+
+  // Client certificate
+  cert: fs.readFileSync('/etc/tls/tls.crt'),
+  key: fs.readFileSync('/etc/tls/tls.key'),
+
+  // CA certificate to verify server
+  ca: fs.readFileSync('/etc/tls/ca.crt'),
+
+  // Verify server identity
+  checkServerIdentity: (hostname, cert) => {
+    // Custom verification logic
+    if (cert.subject.CN !== 'service-b.default.svc.cluster.local') {
+      throw new Error('Server identity mismatch');
+    }
+  }
+};
+
+https.get(options, (res) => {
+  // Handle response
+});
+```
+
+### Service Mesh (Istio) Pattern
+
+```yaml
+# Automatic mTLS for all services
+apiVersion: security.istio.io/v1beta1
+kind: PeerAuthentication
+metadata:
+  name: default
+  namespace: default
+spec:
+  mtls:
+    mode: STRICT  # Require mTLS
+
+# Authorization policy
+apiVersion: security.istio.io/v1beta1
+kind: AuthorizationPolicy
+metadata:
+  name: service-b-policy
+spec:
+  selector:
+    matchLabels:
+      app: service-b
+  rules:
+  - from:
+    - source:
+        principals: ["cluster.local/ns/default/sa/service-a"]
+    to:
+    - operation:
+        methods: ["GET", "POST"]
+        paths: ["/api/orders/*"]
+  - from:
+    - source:
+        principals: ["cluster.local/ns/default/sa/service-c"]
+    to:
+    - operation:
+        methods: ["GET"]
+        paths: ["/api/orders/*/status"]
+
+# Request authentication (JWT validation)
+apiVersion: security.istio.io/v1beta1
+kind: RequestAuthentication
+metadata:
+  name: jwt-auth
+spec:
+  selector:
+    matchLabels:
+      app: service-b
+  jwtRules:
+  - issuer: "https://auth.example.com"
+    jwksUri: "https://auth.example.com/.well-known/jwks.json"
+    audiences:
+    - "service-b"
+```
+
+## Mobile-Specific Patterns
+
+### Certificate Pinning
+
+```swift
+// iOS - Certificate pinning with URLSession
+class CertificatePinner: NSObject, URLSessionDelegate {
+    let pinnedCertificates: [SecCertificate]
+
+    init(pinnedCertificates: [SecCertificate]) {
+        self.pinnedCertificates = pinnedCertificates
+    }
+
+    func urlSession(
+        _ session: URLSession,
+        didReceive challenge: URLAuthenticationChallenge,
+        completionHandler: @escaping (URLSession.AuthChallengeDisposition, URLCredential?) -> Void
+    ) {
+        guard challenge.protectionSpace.authenticationMethod == NSURLAuthenticationMethodServerTrust,
+              let serverTrust = challenge.protectionSpace.serverTrust else {
+            completionHandler(.cancelAuthenticationChallenge, nil)
+            return
+        }
+
+        // Get server certificate
+        guard let serverCertificate = SecTrustGetCertificateAtIndex(serverTrust, 0) else {
+            completionHandler(.cancelAuthenticationChallenge, nil)
+            return
+        }
+
+        // Check if server cert matches any pinned cert
+        let serverCertData = SecCertificateCopyData(serverCertificate) as Data
+
+        for pinnedCert in pinnedCertificates {
+            let pinnedCertData = SecCertificateCopyData(pinnedCert) as Data
+
+            if serverCertData == pinnedCertData {
+                // Certificate matches - allow connection
+                let credential = URLCredential(trust: serverTrust)
+                completionHandler(.useCredential, credential)
+                return
+            }
+        }
+
+        // Certificate not pinned - reject connection
+        completionHandler(.cancelAuthenticationChallenge, nil)
+    }
+}
+```
+
+### Biometric Authentication
+
+```swift
+// iOS - Biometric auth (Face ID / Touch ID)
+import LocalAuthentication
+
+class BiometricAuth {
+    func authenticate(reason: String, completion: @escaping (Bool, Error?) -> Void) {
+        let context = LAContext()
+        var error: NSError?
+
+        // Check if biometric auth is available
+        guard context.canEvaluatePolicy(.deviceOwnerAuthenticationWithBiometrics, error: &error) else {
+            completion(false, error)
+            return
+        }
+
+        // Attempt biometric authentication
+        context.evaluatePolicy(
+            .deviceOwnerAuthenticationWithBiometrics,
+            localizedReason: reason
+        ) { success, error in
+            DispatchQueue.main.async {
+                if success {
+                    // Biometric auth successful - retrieve token from Keychain
+                    let token = TokenStorage().getToken(forKey: "refresh_token")
+                    completion(true, nil)
+                } else {
+                    completion(false, error)
+                }
+            }
+        }
+    }
+}
+```
+
+## Compliance & Regulations
+
+### GDPR Considerations
+
+```javascript
+// Right to be forgotten - token revocation
+async function deleteUserData(userId) {
+  await db.transaction(async (tx) => {
+    // 1. Revoke all active tokens
+    await tx.query(
+      'UPDATE refresh_tokens SET revoked = true, ' +
+      'revoked_reason = $1 WHERE user_id = $2',
+      ['GDPR deletion request', userId]
+    );
+
+    // 2. Anonymize audit logs (keep for compliance)
+    await tx.query(
+      'UPDATE audit_log SET user_id = NULL, ' +
+      'ip_address = NULL, user_agent = NULL WHERE user_id = $1',
+      [userId]
+    );
+
+    // 3. Delete user data
+    await tx.query('DELETE FROM users WHERE id = $1', [userId]);
+  });
+}
+
+// Data portability - export auth history
+async function exportAuthData(userId) {
+  const data = {
+    login_history: await db.query(
+      'SELECT created_at, ip_address, user_agent, result ' +
+      'FROM login_history WHERE user_id = $1',
+      [userId]
+    ),
+    active_sessions: await db.query(
+      'SELECT created_at, device_id, ip_address, expires_at ' +
+      'FROM refresh_tokens WHERE user_id = $1 AND revoked = false',
+      [userId]
+    )
+  };
+
+  return JSON.stringify(data, null, 2);
+}
+```
+
+### PCI-DSS for Payment Systems
+
+```javascript
+// Requirements for authentication in payment systems
+
+// 1. Strong access control (8.2)
+const PASSWORD_REQUIREMENTS = {
+  minLength: 12,
+  requireUppercase: true,
+  requireLowercase: true,
+  requireNumbers: true,
+  requireSpecialChars: true,
+  preventReuse: 4,  // Can't reuse last 4 passwords
+  maxAge: 90 * 24 * 60 * 60 * 1000  // 90 days
+};
+
+// 2. Multi-factor authentication (8.3)
+async function loginWithMFA(username, password, mfaCode) {
+  const user = await validateCredentials(username, password);
+  if (!user) throw new AuthError('Invalid credentials');
+
+  // Require MFA for all administrative access
+  if (user.roles.includes('admin')) {
+    const validMFA = await validateTOTP(user.id, mfaCode);
+    if (!validMFA) throw new AuthError('Invalid MFA code');
+  }
+
+  return generateTokens(user);
+}
+
+// 3. Session timeout (8.1.8)
+const SESSION_TIMEOUT = 15 * 60 * 1000;  // 15 minutes idle
+
+// 4. Audit logging (10.2)
+await auditLog.log({
+  type: 'cardholder_data_access',
+  user_id: user.id,
+  resource: 'payment_methods',
+  action: 'read',
+  result: 'success',
+  timestamp: new Date().toISOString()
+});
+```
+
+## Testing Strategies
+
+### Mock Auth for Development
+
+```javascript
+// Development-only bypass (NEVER in production)
+if (process.env.NODE_ENV === 'development') {
+  app.use('/dev-auth/login-as/:userId', async (req, res) => {
+    if (process.env.ENABLE_DEV_AUTH !== 'true') {
+      return res.status(403).json({ error: 'Dev auth not enabled' });
+    }
+
+    const user = await db.findUser(req.params.userId);
+    const tokens = await generateTokens(user);
+
+    res.json(tokens);
+  });
+}
+
+// Environment check middleware
+app.use((req, res, next) => {
+  if (req.path.startsWith('/dev-auth') && process.env.NODE_ENV !== 'development') {
+    return res.status(404).json({ error: 'Not found' });
+  }
+  next();
+});
+```
+
+### Integration Testing
+
+```javascript
+const request = require('supertest');
+const app = require('./app');
+
+describe('OAuth2 Authorization Code Flow', () => {
+  let authCode, codeVerifier;
+
+  it('should initiate authorization', async () => {
+    codeVerifier = generatePKCEVerifier();
+    const codeChallenge = generatePKCEChallenge(codeVerifier);
+
+    const res = await request(app)
+      .get('/oauth/authorize')
+      .query({
+        response_type: 'code',
+        client_id: 'test_client',
+        redirect_uri: 'http://localhost:3000/callback',
+        scope: 'read write',
+        code_challenge: codeChallenge,
+        code_challenge_method: 'S256',
+        state: 'random_state'
+      });
+
+    expect(res.status).toBe(302);
+    expect(res.headers.location).toContain('code=');
+
+    // Extract code from redirect
+    const url = new URL(res.headers.location);
+    authCode = url.searchParams.get('code');
+  });
+
+  it('should exchange code for tokens', async () => {
+    const res = await request(app)
+      .post('/oauth/token')
+      .send({
+        grant_type: 'authorization_code',
+        code: authCode,
+        redirect_uri: 'http://localhost:3000/callback',
+        client_id: 'test_client',
+        code_verifier: codeVerifier
+      });
+
+    expect(res.status).toBe(200);
+    expect(res.body).toHaveProperty('access_token');
+    expect(res.body).toHaveProperty('refresh_token');
+    expect(res.body.token_type).toBe('Bearer');
+    expect(res.body.expires_in).toBe(900);
+  });
+
+  it('should detect PKCE verification failure', async () => {
+    const res = await request(app)
+      .post('/oauth/token')
+      .send({
+        grant_type: 'authorization_code',
+        code: authCode,
+        redirect_uri: 'http://localhost:3000/callback',
+        client_id: 'test_client',
+        code_verifier: 'wrong_verifier'  // Wrong verifier
+      });
+
+    expect(res.status).toBe(400);
+    expect(res.body.error).toBe('invalid_grant');
+  });
+});
+
+describe('Refresh Token Rotation', () => {
+  let refreshToken1, refreshToken2;
+
+  it('should rotate refresh token on use', async () => {
+    // First refresh
+    const res1 = await request(app)
+      .post('/auth/refresh')
+      .send({ refresh_token: originalRefreshToken });
+
+    expect(res1.status).toBe(200);
+    refreshToken1 = res1.body.refresh_token;
+
+    // Second refresh with new token
+    const res2 = await request(app)
+      .post('/auth/refresh')
+      .send({ refresh_token: refreshToken1 });
+
+    expect(res2.status).toBe(200);
+    refreshToken2 = res2.body.refresh_token;
+
+    expect(refreshToken1).not.toBe(refreshToken2);
+  });
+
+  it('should detect refresh token replay', async () => {
+    // Try to reuse first refresh token (already rotated)
+    const res = await request(app)
+      .post('/auth/refresh')
+      .send({ refresh_token: refreshToken1 });
+
+    expect(res.status).toBe(401);
+    expect(res.body.error).toContain('replay');
+
+    // Entire family should be revoked
+    const familyCheck = await request(app)
+      .post('/auth/refresh')
+      .send({ refresh_token: refreshToken2 });
+
+    expect(familyCheck.status).toBe(401);  // Also revoked
+  });
+});
+```
+
+## Token Validation Patterns
+
+### JWT Validation with Caching
+
+```javascript
+const jwt = require('jsonwebtoken');
+const { NodeCache } = require('node-cache');
+
+const publicKeyCache = new NodeCache({ stdTTL: 3600 });  // 1 hour
+
+async function validateJWT(token) {
+  // Decode without verification to get header
+  const decoded = jwt.decode(token, { complete: true });
+  if (!decoded) throw new AuthError('Invalid token format');
+
+  const keyId = decoded.header.kid;
+
+  // Try cache first
+  let publicKey = publicKeyCache.get(keyId);
+
+  if (!publicKey) {
+    // Fetch from JWKS endpoint
+    const jwks = await fetch('https://auth.example.com/.well-known/jwks.json');
+    const keys = await jwks.json();
+
+    const key = keys.keys.find(k => k.kid === keyId);
+    if (!key) throw new AuthError('Public key not found');
+
+    publicKey = jwkToPem(key);
+    publicKeyCache.set(keyId, publicKey);
+  }
+
+  // Verify signature and claims
+  try {
+    const payload = jwt.verify(token, publicKey, {
+      algorithms: ['RS256'],
+      issuer: 'https://auth.example.com',
+      audience: 'https://api.example.com'
+    });
+
+    // Additional validation
+    if (!payload.sub) throw new AuthError('Missing subject claim');
+    if (!payload.scopes || !Array.isArray(payload.scopes)) {
+      throw new AuthError('Missing or invalid scopes');
+    }
+
+    return payload;
+  } catch (error) {
+    if (error.name === 'TokenExpiredError') {
+      throw new AuthError('Token expired');
+    }
+    throw new AuthError('Token validation failed');
+  }
+}
+```
+
+### Key Rotation Without Downtime
+
+```javascript
+// Support multiple signing keys simultaneously
+const CURRENT_KEY_ID = 'key-2024-11';
+const PREVIOUS_KEY_ID = 'key-2024-10';
+
+const signingKeys = new Map([
+  [CURRENT_KEY_ID, fs.readFileSync('/keys/current-private.pem')],
+  [PREVIOUS_KEY_ID, fs.readFileSync('/keys/previous-private.pem')]
+]);
+
+// Sign with current key
+function generateJWT(payload) {
+  return jwt.sign(payload, signingKeys.get(CURRENT_KEY_ID), {
+    algorithm: 'RS256',
+    keyid: CURRENT_KEY_ID,
+    expiresIn: '15m'
+  });
+}
+
+// Validate with either key (grace period)
+function validateJWT(token) {
+  const decoded = jwt.decode(token, { complete: true });
+  const keyId = decoded.header.kid;
+
+  if (!signingKeys.has(keyId)) {
+    throw new AuthError('Unknown signing key');
+  }
+
+  return jwt.verify(token, signingKeys.get(keyId), {
+    algorithms: ['RS256']
+  });
+}
+
+// Key rotation process:
+// 1. Generate new key pair → key-2024-12
+// 2. Add to signingKeys map (validation now accepts 3 keys)
+// 3. Update CURRENT_KEY_ID to key-2024-12 (new tokens use new key)
+// 4. Wait for old tokens to expire (15 min)
+// 5. Remove key-2024-10 from signingKeys map
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Long-lived JWTs** | Can't revoke, security risk | Max 15-60 min, use refresh tokens |
+| **Tokens in localStorage** | XSS vulnerability | httpOnly cookies or memory-only |
+| **No refresh rotation** | Stolen token = permanent access | Rotate on every use, detect replay |
+| **Password Grant** | App handles credentials, no MFA | Authorization Code + PKCE |
+| **Shared secrets across services** | One breach = all compromised | Per-service secrets, rotate regularly |
+| **No rate limiting** | Brute force attacks | Rate limit login, refresh, sensitive endpoints |
+| **Ignoring anomalies** | Account takeover undetected | Monitor location, device, behavior |
+| **No audit logging** | Can't investigate breaches | Log all auth events, immutable storage |
+| **Weak password requirements** | Easy to crack | 12+ chars, complexity, no common passwords |
+| **No MFA for admins** | Privileged account compromise | Require MFA for elevated access |
+
+## Cross-References
+
+**Related skills**:
+- **Security architecture** → `ordis-security-architect` (threat modeling, defense-in-depth)
+- **FastAPI implementation** → `fastapi-development` (FastAPI auth middleware)
+- **REST API design** → `rest-api-design` (Bearer tokens, auth headers)
+- **GraphQL auth** → `graphql-api-design` (context-based auth, directives)
+- **Microservices** → `microservices-architecture` (service mesh, mTLS)
+
+## Further Reading
+
+- **OAuth 2.1**: Latest OAuth spec (consolidates best practices)
+- **RFC 7636**: PKCE specification
+- **RFC 8693**: Token exchange for delegation
+- **OWASP Auth Cheat Sheet**: https://cheatsheetseries.owasp.org/cheatsheets/Authentication_Cheat_Sheet.html
+- **JWT Best Practices**: https://datatracker.ietf.org/doc/html/rfc8725
+- **Zero Trust Architecture**: NIST SP 800-207
diff --git a/skills/using-web-backend/api-documentation.md b/skills/using-web-backend/api-documentation.md
new file mode 100644
index 0000000..458d1ba
--- /dev/null
+++ b/skills/using-web-backend/api-documentation.md
@@ -0,0 +1,944 @@
+
+# API Documentation
+
+## Overview
+
+**API documentation specialist covering OpenAPI specs, documentation-as-code, testing docs, SDK generation, and preventing documentation debt.**
+
+**Core principle**: Documentation is a product feature that directly impacts developer adoption - invest in keeping it accurate, tested, and discoverable.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **OpenAPI/Swagger**: Auto-generating docs, customizing Swagger UI, maintaining specs
+- **Documentation testing**: Ensuring examples work, preventing stale docs
+- **Versioning**: Managing multi-version docs, deprecation notices
+- **Documentation-as-code**: Keeping docs in sync with code changes
+- **SDK generation**: Generating client libraries from OpenAPI specs
+- **Documentation debt**: Detecting and preventing outdated documentation
+- **Metrics**: Tracking documentation usage and effectiveness
+- **Community docs**: Managing contributions, improving discoverability
+
+**Do NOT use for**:
+- General technical writing (see `muna-technical-writer` skill)
+- API design principles (see `rest-api-design`, `graphql-api-design`)
+- Authentication implementation (see `api-authentication`)
+
+## OpenAPI Specification Best Practices
+
+### Production-Quality OpenAPI Specs
+
+**Complete FastAPI example**:
+
+```python
+from fastapi import FastAPI, Path, Query, Body
+from pydantic import BaseModel, Field
+from typing import Optional, List
+
+app = FastAPI(
+    title="Payment Processing API",
+    description="""
+    # Payment API
+
+    Process payments with PCI-DSS compliance.
+
+    ## Features
+    - Multiple payment methods (cards, ACH, digital wallets)
+    - Fraud detection
+    - Webhook notifications
+    - Test mode for development
+
+    ## Rate Limits
+    - Standard: 100 requests/minute
+    - Premium: 1000 requests/minute
+
+    ## Support
+    - Documentation: https://docs.example.com
+    - Status: https://status.example.com
+    - Support: api-support@example.com
+    """,
+    version="2.1.0",
+    terms_of_service="https://example.com/terms",
+    contact={
+        "name": "API Support",
+        "url": "https://example.com/support",
+        "email": "api-support@example.com"
+    },
+    license_info={
+        "name": "Apache 2.0",
+        "url": "https://www.apache.org/licenses/LICENSE-2.0.html"
+    },
+    servers=[
+        {"url": "https://api.example.com", "description": "Production"},
+        {"url": "https://sandbox-api.example.com", "description": "Sandbox"}
+    ]
+)
+
+# Tag organization
+tags_metadata = [
+    {
+        "name": "payments",
+        "description": "Payment operations",
+        "externalDocs": {
+            "description": "Payment Guide",
+            "url": "https://docs.example.com/guides/payments"
+        }
+    }
+]
+
+app = FastAPI(openapi_tags=tags_metadata)
+
+# Rich schema with examples
+class PaymentRequest(BaseModel):
+    amount: float = Field(
+        ...,
+        gt=0,
+        le=999999.99,
+        description="Payment amount in USD",
+        example=99.99
+    )
+    currency: str = Field(
+        default="USD",
+        pattern="^[A-Z]{3}$",
+        description="ISO 4217 currency code",
+        example="USD"
+    )
+
+    class Config:
+        schema_extra = {
+            "examples": [
+                {
+                    "amount": 149.99,
+                    "currency": "USD",
+                    "payment_method": "card_visa_4242",
+                    "description": "Premium subscription"
+                },
+                {
+                    "amount": 29.99,
+                    "currency": "EUR",
+                    "payment_method": "paypal_account",
+                    "description": "Monthly plan"
+                }
+            ]
+        }
+
+# Comprehensive error documentation
+@app.post(
+    "/payments",
+    summary="Create payment",
+    description="""
+    Creates a new payment transaction.
+
+    ## Processing Time
+    Typically 2-5 seconds for card payments.
+
+    ## Idempotency
+    Use `Idempotency-Key` header to prevent duplicates.
+
+    ## Test Mode
+    Use test payment methods in sandbox environment.
+    """,
+    responses={
+        201: {"description": "Payment created", "model": PaymentResponse},
+        400: {
+            "description": "Invalid request",
+            "content": {
+                "application/json": {
+                    "examples": {
+                        "invalid_amount": {
+                            "summary": "Amount validation failed",
+                            "value": {
+                                "error_code": "INVALID_AMOUNT",
+                                "message": "Amount must be between 0.01 and 999999.99"
+                            }
+                        }
+                    }
+                }
+            }
+        },
+        402: {"description": "Payment declined"},
+        429: {"description": "Rate limit exceeded"}
+    },
+    tags=["payments"]
+)
+async def create_payment(payment: PaymentRequest):
+    pass
+```
+
+### Custom OpenAPI Generation
+
+**Add security schemes, custom extensions**:
+
+```python
+from fastapi.openapi.utils import get_openapi
+
+def custom_openapi():
+    if app.openapi_schema:
+        return app.openapi_schema
+
+    openapi_schema = get_openapi(
+        title=app.title,
+        version=app.version,
+        description=app.description,
+        routes=app.routes,
+    )
+
+    # Security schemes
+    openapi_schema["components"]["securitySchemes"] = {
+        "ApiKeyAuth": {
+            "type": "apiKey",
+            "in": "header",
+            "name": "X-API-Key",
+            "description": "Get your API key at https://dashboard.example.com/api-keys"
+        },
+        "OAuth2": {
+            "type": "oauth2",
+            "flows": {
+                "authorizationCode": {
+                    "authorizationUrl": "https://auth.example.com/oauth/authorize",
+                    "tokenUrl": "https://auth.example.com/oauth/token",
+                    "scopes": {
+                        "payments:read": "Read payment data",
+                        "payments:write": "Create payments"
+                    }
+                },
+                "clientCredentials": {
+                    "tokenUrl": "https://auth.example.com/oauth/token",
+                    "scopes": {
+                        "payments:read": "Read payment data",
+                        "payments:write": "Create payments"
+                    }
+                }
+            }
+        }
+    }
+
+    # Global security requirement
+    openapi_schema["security"] = [{"ApiKeyAuth": []}]
+
+    # Custom extensions for tooling
+    openapi_schema["x-api-id"] = "payments-api-v2"
+    openapi_schema["x-audience"] = "external"
+    openapi_schema["x-ratelimit-default"] = 100
+
+    # Add code samples extension (for Swagger UI)
+    for path_data in openapi_schema["paths"].values():
+        for operation in path_data.values():
+            if isinstance(operation, dict) and "operationId" in operation:
+                operation["x-code-samples"] = [
+                    {
+                        "lang": "curl",
+                        "source": generate_curl_example(operation)
+                    },
+                    {
+                        "lang": "python",
+                        "source": generate_python_example(operation)
+                    }
+                ]
+
+    app.openapi_schema = openapi_schema
+    return app.openapi_schema
+
+app.openapi = custom_openapi
+```
+
+## Documentation-as-Code
+
+### Keep Docs in Sync with Code
+
+**Anti-pattern**: Docs in separate repo, manually updated, always stale
+
+**Pattern**: Co-locate docs with code, auto-generate from source
+
+**Implementation**:
+
+```python
+# Source of truth: Pydantic models
+class PaymentRequest(BaseModel):
+    """
+    Payment request model.
+
+    Examples:
+        Basic payment:
+        ```python
+        payment = PaymentRequest(
+            amount=99.99,
+            currency="USD",
+            payment_method="pm_card_visa"
+        )
+        ```
+    """
+    amount: float = Field(..., description="Amount in USD")
+    currency: str = Field(default="USD", description="ISO 4217 currency code")
+
+    class Config:
+        schema_extra = {
+            "examples": [
+                {"amount": 99.99, "currency": "USD", "payment_method": "pm_card_visa"}
+            ]
+        }
+
+# Docs auto-generated from model
+# - OpenAPI spec from Field descriptions
+# - Examples from schema_extra
+# - Code samples from docstring examples
+```
+
+**Prevent schema drift**:
+
+```python
+import pytest
+from fastapi.testclient import TestClient
+
+def test_openapi_schema_matches_committed():
+    """Ensure OpenAPI spec is committed and up-to-date"""
+    client = TestClient(app)
+
+    # Get current OpenAPI spec
+    current_spec = client.get("/openapi.json").json()
+
+    # Load committed spec
+    with open("docs/openapi.json") as f:
+        committed_spec = json.load(f)
+
+    # Fail if specs don't match
+    assert current_spec == committed_spec, \
+        "OpenAPI spec has changed. Run 'make update-openapi-spec' and commit"
+
+def test_all_endpoints_have_examples():
+    """Ensure all endpoints have request/response examples"""
+    client = TestClient(app)
+    spec = client.get("/openapi.json").json()
+
+    for path, methods in spec["paths"].items():
+        for method, details in methods.items():
+            if method in ["get", "post", "put", "patch", "delete"]:
+                # Check request body has example
+                if "requestBody" in details:
+                    assert "examples" in details["requestBody"]["content"]["application/json"], \
+                        f"{method.upper()} {path} missing request examples"
+
+                # Check responses have examples
+                for status_code, response in details.get("responses", {}).items():
+                    if "content" in response and "application/json" in response["content"]:
+                        assert "examples" in response["content"]["application/json"] or \
+                               "example" in response["content"]["application/json"]["schema"], \
+                               f"{method.upper()} {path} response {status_code} missing examples"
+```
+
+### Documentation Pre-Commit Hook
+
+```bash
+# .git/hooks/pre-commit
+#!/bin/bash
+
+# Regenerate OpenAPI spec
+python -c "
+from app.main import app
+import json
+
+with open('docs/openapi.json', 'w') as f:
+    json.dump(app.openapi(), f, indent=2)
+"
+
+# Check if spec changed
+git add docs/openapi.json
+
+# Validate spec
+npm run validate:openapi
+
+# Run doc tests
+pytest tests/test_documentation.py
+```
+
+## Documentation Testing
+
+### Ensure Examples Actually Work
+
+**Problem**: Examples in docs become stale, don't work
+
+**Solution**: Test every code example automatically
+
+```python
+# Extract examples from OpenAPI spec
+import pytest
+import requests
+from app.main import app
+
+def get_all_examples_from_openapi():
+    """Extract all examples from OpenAPI spec"""
+    spec = app.openapi()
+    examples = []
+
+    for path, methods in spec["paths"].items():
+        for method, details in methods.items():
+            if "examples" in details.get("requestBody", {}).get("content", {}).get("application/json", {}):
+                for example_name, example_data in details["requestBody"]["content"]["application/json"]["examples"].items():
+                    examples.append({
+                        "path": path,
+                        "method": method,
+                        "example_name": example_name,
+                        "data": example_data["value"]
+                    })
+
+    return examples
+
+@pytest.mark.parametrize("example", get_all_examples_from_openapi(), ids=lambda e: f"{e['method']}_{e['path']}_{e['example_name']}")
+def test_openapi_examples_are_valid(example, client):
+    """Test that all OpenAPI examples are valid requests"""
+    method = example["method"]
+    path = example["path"]
+    data = example["data"]
+
+    response = client.request(method, path, json=data)
+
+    # Examples should either succeed or fail with expected error
+    assert response.status_code in [200, 201, 400, 401, 402, 403, 404], \
+        f"Example {example['example_name']} for {method.upper()} {path} returned unexpected status {response.status_code}"
+```
+
+**Test markdown code samples**:
+
+```python
+import pytest
+import re
+import tempfile
+import subprocess
+
+def extract_code_blocks_from_markdown(markdown_file):
+    """Extract code blocks from markdown"""
+    with open(markdown_file) as f:
+        content = f.read()
+
+    # Find code blocks with language
+    pattern = r'```(\w+)\n(.*?)```'
+    return re.findall(pattern, content, re.DOTALL)
+
+def test_python_examples_in_quickstart():
+    """Test that Python examples in quickstart.md execute without errors"""
+    code_blocks = extract_code_blocks_from_markdown("docs/quickstart.md")
+
+    for lang, code in code_blocks:
+        if lang == "python":
+            # Write code to temp file
+            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+                # Replace placeholders
+                code = code.replace("sk_test_abc123...", "test_api_key")
+                code = code.replace("https://api.example.com", "http://localhost:8000")
+                f.write(code)
+                f.flush()
+
+                # Run code
+                result = subprocess.run(
+                    ["python", f.name],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
+
+                assert result.returncode == 0, \
+                    f"Python example failed:\n{code}\n\nError:\n{result.stderr}"
+```
+
+### Documentation Coverage Metrics
+
+```python
+def test_documentation_coverage():
+    """Ensure all endpoints are documented"""
+    from fastapi.openapi.utils import get_openapi
+
+    spec = get_openapi(title="Test", version="1.0.0", routes=app.routes)
+
+    missing_docs = []
+
+    for path, methods in spec["paths"].items():
+        for method, details in methods.items():
+            # Check summary
+            if not details.get("summary"):
+                missing_docs.append(f"{method.upper()} {path}: Missing summary")
+
+            # Check description
+            if not details.get("description"):
+                missing_docs.append(f"{method.upper()} {path}: Missing description")
+
+            # Check examples
+            if "requestBody" in details:
+                content = details["requestBody"].get("content", {}).get("application/json", {})
+                if "examples" not in content and "example" not in content.get("schema", {}):
+                    missing_docs.append(f"{method.upper()} {path}: Missing request example")
+
+    assert not missing_docs, \
+        f"Documentation incomplete:\n" + "\n".join(missing_docs)
+```
+
+## Interactive Documentation
+
+### Swagger UI Customization
+
+**Custom Swagger UI with branding**:
+
+```python
+from fastapi import FastAPI
+from fastapi.openapi.docs import get_swagger_ui_html
+from fastapi.staticfiles import StaticFiles
+
+app = FastAPI(docs_url=None)  # Disable default docs
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+@app.get("/docs", include_in_schema=False)
+async def custom_swagger_ui_html():
+    return get_swagger_ui_html(
+        openapi_url=app.openapi_url,
+        title=f"{app.title} - API Documentation",
+        oauth2_redirect_url=app.swagger_ui_oauth2_redirect_url,
+        swagger_js_url="/static/swagger-ui-bundle.js",
+        swagger_css_url="/static/swagger-ui.css",
+        swagger_favicon_url="/static/favicon.png",
+        swagger_ui_parameters={
+            "deepLinking": True,
+            "displayRequestDuration": True,
+            "filter": True,
+            "showExtensions": True,
+            "tryItOutEnabled": True,
+            "persistAuthorization": True,
+            "defaultModelsExpandDepth": 1,
+            "defaultModelExpandDepth": 1
+        }
+    )
+```
+
+**Add "Try It Out" authentication**:
+
+```python
+from fastapi.openapi.docs import get_swagger_ui_html
+
+@app.get("/docs")
+async def custom_swagger_ui():
+    return get_swagger_ui_html(
+        openapi_url="/openapi.json",
+        title="API Docs",
+        init_oauth={
+            "clientId": "swagger-ui-client",
+            "appName": "API Documentation",
+            "usePkceWithAuthorizationCodeGrant": True
+        }
+    )
+```
+
+### ReDoc Customization
+
+```python
+from fastapi.openapi.docs import get_redoc_html
+
+@app.get("/redoc", include_in_schema=False)
+async def redoc_html():
+    return get_redoc_html(
+        openapi_url="/openapi.json",
+        title="API Documentation - ReDoc",
+        redoc_js_url="/static/redoc.standalone.js",
+        redoc_favicon_url="/static/favicon.png",
+        with_google_fonts=True
+    )
+```
+
+**ReDoc configuration options**:
+
+```html
+<!-- static/redoc-config.html -->
+<redoc
+  spec-url="/openapi.json"
+  expand-responses="200,201"
+  required-props-first="true"
+  sort-props-alphabetically="true"
+  hide-download-button="false"
+  native-scrollbars="false"
+  path-in-middle-panel="true"
+  theme='{
+    "colors": {
+      "primary": {"main": "#32329f"}
+    },
+    "typography": {
+      "fontSize": "14px",
+      "fontFamily": "Roboto, sans-serif"
+    }
+  }'
+></redoc>
+```
+
+## SDK Generation
+
+### Generate Client SDKs from OpenAPI
+
+**OpenAPI Generator**:
+
+```bash
+# Install openapi-generator
+npm install -g @openapitools/openapi-generator-cli
+
+# Generate Python SDK
+openapi-generator-cli generate \
+  -i docs/openapi.json \
+  -g python \
+  -o sdks/python \
+  --additional-properties=packageName=payment_api,projectName=payment-api-python
+
+# Generate TypeScript SDK
+openapi-generator-cli generate \
+  -i docs/openapi.json \
+  -g typescript-fetch \
+  -o sdks/typescript \
+  --additional-properties=npmName=@example/payment-api,supportsES6=true
+
+# Generate Go SDK
+openapi-generator-cli generate \
+  -i docs/openapi.json \
+  -g go \
+  -o sdks/go \
+  --additional-properties=packageName=paymentapi
+```
+
+**Automate SDK generation in CI**:
+
+```yaml
+# .github/workflows/generate-sdks.yml
+name: Generate SDKs
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'docs/openapi.json'
+
+jobs:
+  generate:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Generate Python SDK
+        run: |
+          docker run --rm \
+            -v ${PWD}:/local \
+            openapitools/openapi-generator-cli generate \
+            -i /local/docs/openapi.json \
+            -g python \
+            -o /local/sdks/python
+
+      - name: Test Python SDK
+        run: |
+          cd sdks/python
+          pip install -e .
+          pytest
+
+      - name: Publish to PyPI
+        if: github.ref == 'refs/heads/main'
+        run: |
+          cd sdks/python
+          python -m build
+          twine upload dist/*
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
+```
+
+**Custom SDK templates**:
+
+```
+templates/
+├── python/
+│   ├── api.mustache           # Custom API client template
+│   ├── model.mustache          # Custom model template
+│   └── README.mustache         # Custom README
+```
+
+```bash
+# Generate with custom templates
+openapi-generator-cli generate \
+  -i docs/openapi.json \
+  -g python \
+  -o sdks/python \
+  -t templates/python \
+  --additional-properties=packageName=payment_api
+```
+
+## Documentation Versioning
+
+### Version Documentation Separately from API
+
+**Documentation versions**:
+
+```
+docs/
+├── v1/
+│   ├── quickstart.md
+│   ├── api-reference.md
+│   └── migration-to-v2.md  ← Deprecation notice
+├── v2/
+│   ├── quickstart.md
+│   ├── api-reference.md
+│   └── whats-new.md
+└── latest -> v2/  # Symlink to current version
+```
+
+**Documentation routing**:
+
+```python
+from fastapi import Request
+from fastapi.responses import HTMLResponse, RedirectResponse
+from jinja2 import Environment, FileSystemLoader
+
+env = Environment(loader=FileSystemLoader("docs"))
+
+@app.get("/docs")
+async def docs_redirect():
+    """Redirect to latest docs"""
+    return RedirectResponse(url="/docs/v2/")
+
+@app.get("/docs/{version}/{page}")
+async def serve_docs(version: str, page: str):
+    """Serve versioned documentation"""
+    if version not in ["v1", "v2"]:
+        raise HTTPException(404)
+
+    # Add deprecation warning for v1
+    deprecated = version == "v1"
+
+    template = env.get_template(f"{version}/{page}.md")
+    content = template.render(deprecated=deprecated)
+
+    return HTMLResponse(content)
+```
+
+**Deprecation banner**:
+
+```html
+<!-- docs/templates/base.html -->
+{% if deprecated %}
+<div class="deprecation-banner">
+  ⚠️ <strong>Deprecated</strong>: This documentation is for API v1,
+  which will be sunset on June 1, 2025.
+  <a href="/docs/v2/migration">Migrate to v2</a>
+</div>
+{% endif %}
+```
+
+## Documentation Debt Detection
+
+### Prevent Stale Documentation
+
+**Detect outdated docs**:
+
+```python
+import pytest
+from datetime import datetime, timedelta
+
+def test_documentation_freshness():
+    """Ensure docs have been updated recently"""
+    docs_modified = datetime.fromtimestamp(
+        os.path.getmtime("docs/api-reference.md")
+    )
+
+    # Fail if docs haven't been updated in 90 days
+    max_age = timedelta(days=90)
+    age = datetime.now() - docs_modified
+
+    assert age < max_age, \
+        f"API docs are {age.days} days old. Review and update or add exemption comment."
+```
+
+**Track documentation TODOs**:
+
+```python
+def test_no_documentation_todos():
+    """Ensure no TODO comments in docs"""
+    import re
+
+    doc_files = glob.glob("docs/**/*.md", recursive=True)
+    todos = []
+
+    for doc_file in doc_files:
+        with open(doc_file) as f:
+            for line_num, line in enumerate(f, 1):
+                if re.search(r'TODO|FIXME|XXX', line):
+                    todos.append(f"{doc_file}:{line_num}: {line.strip()}")
+
+    assert not todos, \
+        f"Documentation has {len(todos)} TODOs:\n" + "\n".join(todos)
+```
+
+**Broken link detection**:
+
+```python
+import pytest
+import requests
+from bs4 import BeautifulSoup
+import re
+
+def extract_links_from_markdown(markdown_file):
+    """Extract all HTTP(S) links from markdown"""
+    with open(markdown_file) as f:
+        content = f.read()
+
+    # Find markdown links [text](url)
+    links = re.findall(r'\[([^\]]+)\]\(([^)]+)\)', content)
+    return [(text, url) for text, url in links if url.startswith('http')]
+
+def test_no_broken_links_in_docs():
+    """Ensure all external links in docs are valid"""
+    doc_files = glob.glob("docs/**/*.md", recursive=True)
+    broken_links = []
+
+    for doc_file in doc_files:
+        for text, url in extract_links_from_markdown(doc_file):
+            try:
+                response = requests.head(url, timeout=5, allow_redirects=True)
+                if response.status_code >= 400:
+                    broken_links.append(f"{doc_file}: {url} ({response.status_code})")
+            except requests.RequestException as e:
+                broken_links.append(f"{doc_file}: {url} (error: {e})")
+
+    assert not broken_links, \
+        f"Found {len(broken_links)} broken links:\n" + "\n".join(broken_links)
+```
+
+## Documentation Metrics
+
+### Track Documentation Usage
+
+**Analytics integration**:
+
+```python
+from fastapi import Request
+import analytics
+
+@app.middleware("http")
+async def track_doc_views(request: Request, call_next):
+    if request.url.path.startswith("/docs"):
+        # Track page view
+        analytics.track(
+            user_id="anonymous",
+            event="Documentation Viewed",
+            properties={
+                "page": request.url.path,
+                "version": request.url.path.split("/")[2] if len(request.url.path.split("/")) > 2 else "latest",
+                "referrer": request.headers.get("referer")
+            }
+        )
+
+    return await call_next(request)
+```
+
+**Track "Try It Out" usage**:
+
+```javascript
+// Inject into Swagger UI
+const originalExecute = swagger.presets.apis.execute;
+swagger.presets.apis.execute = function(spec) {
+  // Track API call from docs
+  analytics.track('API Call from Docs', {
+    endpoint: spec.path,
+    method: spec.method,
+    success: spec.response.status < 400
+  });
+
+  return originalExecute(spec);
+};
+```
+
+**Documentation health dashboard**:
+
+```python
+from fastapi import APIRouter
+from datetime import datetime, timedelta
+
+router = APIRouter()
+
+@router.get("/admin/docs-metrics")
+async def get_doc_metrics(db: Session = Depends(get_db)):
+    """Dashboard for documentation health"""
+
+    # Page views by version
+    views_by_version = analytics.query(
+        "Documentation Viewed",
+        group_by="version",
+        since=datetime.now() - timedelta(days=30)
+    )
+
+    # Most viewed pages
+    top_pages = analytics.query(
+        "Documentation Viewed",
+        group_by="page",
+        since=datetime.now() - timedelta(days=30),
+        limit=10
+    )
+
+    # Try it out usage
+    api_calls = analytics.query(
+        "API Call from Docs",
+        since=datetime.now() - timedelta(days=30)
+    )
+
+    # Documentation freshness
+    freshness = {
+        "quickstart.md": get_file_age("docs/quickstart.md"),
+        "api-reference.md": get_file_age("docs/api-reference.md")
+    }
+
+    return {
+        "views_by_version": views_by_version,
+        "top_pages": top_pages,
+        "api_calls_from_docs": api_calls,
+        "freshness": freshness,
+        "health_score": calculate_doc_health_score()
+    }
+
+def calculate_doc_health_score():
+    """Calculate documentation health (0-100)"""
+    score = 100
+
+    # Deduct for stale docs (>90 days old)
+    for doc_file in glob.glob("docs/**/*.md", recursive=True):
+        age_days = (datetime.now() - datetime.fromtimestamp(os.path.getmtime(doc_file))).days
+        if age_days > 90:
+            score -= 10
+
+    # Deduct for broken links
+    broken_links = count_broken_links()
+    score -= min(broken_links * 5, 30)
+
+    # Deduct for missing examples
+    endpoints_without_examples = count_endpoints_without_examples()
+    score -= min(endpoints_without_examples * 3, 20)
+
+    return max(score, 0)
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Docs in separate repo** | Always out of sync | Co-locate with code |
+| **Manual example updates** | Examples become stale | Test examples in CI |
+| **No deprecation notices** | Breaking changes surprise users | Document deprecation 6+ months ahead |
+| **Generic descriptions** | Doesn't help developers | Specific use cases, edge cases |
+| **No versioned docs** | Can't reference old versions | Version docs separately |
+| **Untested SDKs** | Generated SDKs don't work | Test generated SDKs in CI |
+| **No documentation metrics** | Can't measure effectiveness | Track page views, usage |
+| **Single example per endpoint** | Doesn't show edge cases | Multiple examples (success, errors) |
+
+## Cross-References
+
+**Related skills**:
+- **Technical writing** → `muna-technical-writer` (writing style, organization)
+- **API design** → `rest-api-design`, `graphql-api-design` (design patterns)
+- **API testing** → `api-testing` (contract testing, examples)
+- **Authentication** → `api-authentication` (auth flow documentation)
+
+## Further Reading
+
+- **OpenAPI Specification**: https://spec.openapis.org/oas/v3.1.0
+- **FastAPI docs**: https://fastapi.tiangolo.com/tutorial/metadata/
+- **Swagger UI**: https://swagger.io/docs/open-source-tools/swagger-ui/
+- **ReDoc**: https://redoc.ly/docs/
+- **Write the Docs**: https://www.writethedocs.org/
diff --git a/skills/using-web-backend/api-testing.md b/skills/using-web-backend/api-testing.md
new file mode 100644
index 0000000..6c09b41
--- /dev/null
+++ b/skills/using-web-backend/api-testing.md
@@ -0,0 +1,1013 @@
+
+# API Testing
+
+## Overview
+
+**API testing specialist covering test organization, integration testing, performance testing, security testing, and production test strategies.**
+
+**Core principle**: Tests are executable documentation that verify correctness, prevent regressions, and enable confident refactoring - invest in test quality as you would production code.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Test organization**: Structuring test suites, fixtures, test discovery
+- **Integration testing**: Testing with databases, external APIs, authentication
+- **Performance testing**: Load testing, stress testing, benchmarking
+- **Security testing**: Auth testing, injection testing, CORS validation
+- **Test quality**: Coverage analysis, mutation testing, flaky test detection
+- **CI/CD integration**: Running tests in pipelines, test reporting
+- **Test debugging**: Debugging failing tests, using pytest features
+
+**Do NOT use for**:
+- Unit testing business logic (use general Python testing resources)
+- Frontend testing (use frontend testing tools)
+- Database-specific patterns (see `database-integration` skill)
+
+## Test Organization
+
+### Test Structure Conventions
+
+**Directory layout**:
+
+```
+project/
+├── app/
+│   ├── __init__.py
+│   ├── main.py
+│   ├── routes/
+│   │   ├── users.py
+│   │   └── orders.py
+│   └── services/
+│       └── payment.py
+└── tests/
+    ├── __init__.py
+    ├── conftest.py          # Shared fixtures
+    ├── unit/                # Fast, isolated tests
+    │   ├── test_services.py
+    │   └── test_schemas.py
+    ├── integration/         # Tests with database/external deps
+    │   ├── test_users_api.py
+    │   └── test_orders_api.py
+    ├── e2e/                 # End-to-end tests
+    │   └── test_checkout_flow.py
+    ├── performance/         # Load/stress tests
+    │   └── test_load.py
+    └── security/            # Security-specific tests
+        └── test_auth.py
+```
+
+**Naming conventions**:
+- Test files: `test_*.py` or `*_test.py`
+- Test functions: `test_<what>_<when>_<expected>`
+- Test classes: `Test<Feature>`
+
+```python
+# Good naming
+def test_create_user_with_valid_data_returns_201():
+    pass
+
+def test_create_user_with_duplicate_email_returns_409():
+    pass
+
+def test_get_user_when_not_found_returns_404():
+    pass
+
+# Bad naming
+def test_user():  # Too vague
+    pass
+
+def test_1():  # No context
+    pass
+```
+
+### Test Markers for Organization
+
+**Define markers in pytest.ini**:
+
+```ini
+# pytest.ini
+[pytest]
+markers =
+    unit: Unit tests (fast, no external dependencies)
+    integration: Integration tests (database, external APIs)
+    e2e: End-to-end tests (full system)
+    slow: Tests that take > 1 second
+    security: Security-focused tests
+    smoke: Critical smoke tests (run first)
+    wip: Work in progress (skip in CI)
+```
+
+**Apply markers**:
+
+```python
+import pytest
+
+@pytest.mark.unit
+def test_calculate_discount():
+    """Unit test - no dependencies"""
+    assert calculate_discount(100, 0.1) == 90
+
+@pytest.mark.integration
+@pytest.mark.slow
+def test_create_order_end_to_end(client, test_db):
+    """Integration test with database"""
+    response = client.post("/orders", json={...})
+    assert response.status_code == 201
+
+@pytest.mark.security
+def test_unauthorized_access_returns_401(client):
+    """Security test for auth"""
+    response = client.get("/admin/users")
+    assert response.status_code == 401
+
+@pytest.mark.smoke
+def test_health_endpoint(client):
+    """Critical smoke test"""
+    response = client.get("/health")
+    assert response.status_code == 200
+```
+
+**Run specific test categories**:
+
+```bash
+# Run only unit tests (fast)
+pytest -m unit
+
+# Run only integration tests
+pytest -m integration
+
+# Run everything except slow tests
+pytest -m "not slow"
+
+# Run smoke tests first, then rest
+pytest -m smoke && pytest -m "not smoke"
+
+# Run security tests
+pytest -m security
+
+# Skip work-in-progress tests
+pytest -m "not wip"
+```
+
+### Parametrized Testing
+
+**Test same logic with multiple inputs**:
+
+```python
+import pytest
+
+@pytest.mark.parametrize("email,expected_valid", [
+    ("user@example.com", True),
+    ("user+tag@example.co.uk", True),
+    ("invalid.email", False),
+    ("@example.com", False),
+    ("user@", False),
+    ("", False),
+])
+def test_email_validation(email, expected_valid):
+    """Test email validation with multiple cases"""
+    assert is_valid_email(email) == expected_valid
+
+@pytest.mark.parametrize("status_code,expected_retry", [
+    (500, True),   # Internal error - retry
+    (502, True),   # Bad gateway - retry
+    (503, True),   # Service unavailable - retry
+    (400, False),  # Bad request - don't retry
+    (401, False),  # Unauthorized - don't retry
+    (404, False),  # Not found - don't retry
+])
+def test_should_retry_request(status_code, expected_retry):
+    """Test retry logic for different status codes"""
+    assert should_retry(status_code) == expected_retry
+
+@pytest.mark.parametrize("role,endpoint,expected_status", [
+    ("admin", "/admin/users", 200),
+    ("user", "/admin/users", 403),
+    ("guest", "/admin/users", 401),
+    ("admin", "/users/me", 200),
+    ("user", "/users/me", 200),
+    ("guest", "/users/me", 401),
+])
+def test_authorization_matrix(client, role, endpoint, expected_status):
+    """Test authorization for different role/endpoint combinations"""
+    token = create_token_with_role(role)
+    response = client.get(endpoint, headers={"Authorization": f"Bearer {token}"})
+    assert response.status_code == expected_status
+```
+
+**Parametrize with IDs for readability**:
+
+```python
+@pytest.mark.parametrize("input_data,expected_error", [
+    ({"email": ""}, "Email is required"),
+    ({"email": "invalid"}, "Invalid email format"),
+    ({"email": "user@example.com", "age": -1}, "Age must be positive"),
+], ids=["missing_email", "invalid_email", "negative_age"])
+def test_validation_errors(input_data, expected_error):
+    with pytest.raises(ValidationError, match=expected_error):
+        validate_user(input_data)
+```
+
+## Test Doubles: Mocks, Stubs, Fakes, Spies
+
+### Taxonomy and When to Use Each
+
+| Type | Purpose | Use When | Example |
+|------|---------|----------|---------|
+| **Mock** | Verify interactions (method calls) | Testing behavior, not state | Verify email service was called |
+| **Stub** | Return predefined responses | Testing with controlled inputs | Return fake user data |
+| **Fake** | Working implementation (simpler) | Need real behavior without dependencies | In-memory database |
+| **Spy** | Record calls while preserving real behavior | Testing interactions + real logic | Count cache hits |
+
+### Mocks (Verify Behavior)
+
+```python
+from unittest.mock import Mock, patch, call
+
+def test_send_welcome_email_called_on_registration(client, mocker):
+    """Mock to verify email service was called"""
+    mock_send_email = mocker.patch("app.services.email.send_email")
+
+    response = client.post("/register", json={
+        "email": "user@example.com",
+        "name": "Alice"
+    })
+
+    assert response.status_code == 201
+
+    # Verify email service was called with correct arguments
+    mock_send_email.assert_called_once_with(
+        to="user@example.com",
+        template="welcome",
+        context={"name": "Alice"}
+    )
+
+def test_payment_failure_triggers_rollback(client, mocker):
+    """Mock to verify rollback is called on payment failure"""
+    mock_payment = mocker.patch("app.services.payment.charge")
+    mock_payment.side_effect = PaymentError("Card declined")
+
+    mock_rollback = mocker.patch("app.database.rollback")
+
+    response = client.post("/orders", json={"total": 100})
+
+    assert response.status_code == 402
+    mock_rollback.assert_called_once()
+```
+
+### Stubs (Return Predefined Data)
+
+```python
+def test_user_profile_with_stubbed_external_api(client, mocker):
+    """Stub external API to return controlled data"""
+    # Stub returns predefined response
+    mock_external_api = mocker.patch("app.services.profile.fetch_profile_data")
+    mock_external_api.return_value = {
+        "avatar_url": "https://example.com/avatar.jpg",
+        "bio": "Test bio"
+    }
+
+    response = client.get("/users/123/profile")
+
+    assert response.status_code == 200
+    data = response.json()
+    assert data["avatar_url"] == "https://example.com/avatar.jpg"
+
+def test_payment_processing_with_different_responses(client, mocker):
+    """Test different payment responses using stubs"""
+    mock_payment = mocker.patch("app.services.payment.charge")
+
+    # Test success
+    mock_payment.return_value = {"status": "success", "id": "pay_123"}
+    response = client.post("/orders", json={"total": 100})
+    assert response.status_code == 201
+
+    # Test failure
+    mock_payment.return_value = {"status": "declined", "reason": "insufficient_funds"}
+    response = client.post("/orders", json={"total": 100})
+    assert response.status_code == 402
+```
+
+### Fakes (Working Implementation)
+
+```python
+class FakePaymentGateway:
+    """Fake payment gateway with working implementation"""
+    def __init__(self):
+        self.charges = []
+        self.fail_next = False
+
+    def charge(self, amount, customer_id):
+        """Fake charge that tracks calls"""
+        if self.fail_next:
+            self.fail_next = False
+            raise PaymentError("Simulated failure")
+
+        charge_id = f"fake_charge_{len(self.charges) + 1}"
+        self.charges.append({
+            "id": charge_id,
+            "amount": amount,
+            "customer_id": customer_id,
+            "status": "success"
+        })
+        return {"id": charge_id, "status": "success"}
+
+    def refund(self, charge_id):
+        """Fake refund"""
+        for charge in self.charges:
+            if charge["id"] == charge_id:
+                charge["status"] = "refunded"
+                return True
+        return False
+
+@pytest.fixture
+def fake_payment():
+    return FakePaymentGateway()
+
+def test_order_with_fake_payment(client, fake_payment):
+    """Test using fake payment gateway"""
+    app.dependency_overrides[get_payment_gateway] = lambda: fake_payment
+
+    # Create order
+    response = client.post("/orders", json={"total": 100})
+    assert response.status_code == 201
+
+    # Verify payment was charged
+    assert len(fake_payment.charges) == 1
+    assert fake_payment.charges[0]["amount"] == 100
+
+    # Test refund
+    charge_id = fake_payment.charges[0]["id"]
+    response = client.post(f"/orders/{charge_id}/refund")
+
+    assert response.status_code == 200
+    assert fake_payment.charges[0]["status"] == "refunded"
+```
+
+### Spies (Record Calls + Real Behavior)
+
+```python
+def test_cache_hit_rate_with_spy(client, mocker):
+    """Spy on cache to measure hit rate"""
+    real_cache_get = cache.get
+
+    call_count = {"hits": 0, "misses": 0}
+
+    def spy_cache_get(key):
+        result = real_cache_get(key)
+        if result is not None:
+            call_count["hits"] += 1
+        else:
+            call_count["misses"] += 1
+        return result
+
+    mocker.patch("app.cache.get", side_effect=spy_cache_get)
+
+    # Make requests
+    for _ in range(10):
+        client.get("/users/123")
+
+    # Verify cache behavior
+    assert call_count["hits"] > 5  # Most should hit cache
+    assert call_count["misses"] <= 1  # Only first miss
+```
+
+## Performance Testing
+
+### Load Testing with Locust
+
+**Setup Locust test**:
+
+```python
+# tests/performance/locustfile.py
+from locust import HttpUser, task, between
+import random
+
+class APIUser(HttpUser):
+    """Simulate API user behavior"""
+    wait_time = between(1, 3)  # Wait 1-3 seconds between requests
+
+    def on_start(self):
+        """Login once per user"""
+        response = self.client.post("/login", json={
+            "email": "test@example.com",
+            "password": "password123"
+        })
+        self.token = response.json()["access_token"]
+
+    @task(3)  # Weight: 3x more likely than other tasks
+    def get_users(self):
+        """GET /users (most common operation)"""
+        self.client.get(
+            "/users",
+            headers={"Authorization": f"Bearer {self.token}"}
+        )
+
+    @task(2)
+    def get_user_detail(self):
+        """GET /users/{id}"""
+        user_id = random.randint(1, 1000)
+        self.client.get(
+            f"/users/{user_id}",
+            headers={"Authorization": f"Bearer {self.token}"}
+        )
+
+    @task(1)
+    def create_order(self):
+        """POST /orders (less common)"""
+        self.client.post(
+            "/orders",
+            json={"total": 99.99, "items": ["item1", "item2"]},
+            headers={"Authorization": f"Bearer {self.token}"}
+        )
+```
+
+**Run load test**:
+
+```bash
+# Start Locust
+locust -f tests/performance/locustfile.py --host=http://localhost:8000
+
+# Command-line load test (no web UI)
+locust -f tests/performance/locustfile.py \
+    --host=http://localhost:8000 \
+    --users 100 \
+    --spawn-rate 10 \
+    --run-time 60s \
+    --headless
+```
+
+**Performance thresholds in tests**:
+
+```python
+import pytest
+from locust import stats
+from locust.env import Environment
+
+def test_api_handles_load():
+    """Test API handles 100 concurrent users"""
+    env = Environment(user_classes=[APIUser])
+    runner = env.create_local_runner()
+
+    # Run load test
+    runner.start(user_count=100, spawn_rate=10)
+    runner.greenlet.join(timeout=60)
+
+    # Assert performance requirements
+    stats_dict = runner.stats.total
+
+    assert stats_dict.avg_response_time < 200, "Average response time too high"
+    assert stats_dict.fail_ratio < 0.01, "Error rate above 1%"
+    assert stats_dict.get_response_time_percentile(0.95) < 500, "95th percentile too high"
+```
+
+### Benchmark Testing with pytest-benchmark
+
+```python
+import pytest
+
+def test_user_query_performance(benchmark, test_db):
+    """Benchmark user query performance"""
+    # Setup test data
+    UserFactory.create_batch(1000)
+
+    # Benchmark the query
+    result = benchmark(lambda: test_db.query(User).filter(User.is_active == True).all())
+
+    # Assertions on benchmark
+    assert len(result) == 1000
+    assert benchmark.stats["mean"] < 0.1, "Query too slow (>100ms)"
+
+def test_endpoint_response_time(benchmark, client):
+    """Benchmark endpoint response time"""
+    def make_request():
+        return client.get("/users")
+
+    result = benchmark(make_request)
+
+    assert result.status_code == 200
+    assert benchmark.stats["mean"] < 0.050, "Endpoint too slow (>50ms)"
+```
+
+**Benchmark comparison** (track performance over time):
+
+```bash
+# Save benchmark results
+pytest tests/performance/ --benchmark-save=baseline
+
+# Compare against baseline
+pytest tests/performance/ --benchmark-compare=baseline
+
+# Fail if performance degrades >10%
+pytest tests/performance/ --benchmark-compare=baseline --benchmark-compare-fail=mean:10%
+```
+
+## Security Testing
+
+### Authentication Testing
+
+**Test auth flows**:
+
+```python
+import pytest
+
+def test_login_with_valid_credentials(client):
+    """Test successful login"""
+    response = client.post("/login", json={
+        "email": "user@example.com",
+        "password": "correct_password"
+    })
+
+    assert response.status_code == 200
+    data = response.json()
+    assert "access_token" in data
+    assert "refresh_token" in data
+
+def test_login_with_invalid_credentials(client):
+    """Test failed login"""
+    response = client.post("/login", json={
+        "email": "user@example.com",
+        "password": "wrong_password"
+    })
+
+    assert response.status_code == 401
+    assert "invalid credentials" in response.json()["detail"].lower()
+
+def test_access_protected_endpoint_without_token(client):
+    """Test unauthorized access"""
+    response = client.get("/users/me")
+    assert response.status_code == 401
+
+def test_access_protected_endpoint_with_valid_token(client, auth_token):
+    """Test authorized access"""
+    response = client.get(
+        "/users/me",
+        headers={"Authorization": f"Bearer {auth_token}"}
+    )
+    assert response.status_code == 200
+
+def test_access_with_expired_token(client):
+    """Test expired token rejection"""
+    expired_token = create_expired_token(user_id=1)
+
+    response = client.get(
+        "/users/me",
+        headers={"Authorization": f"Bearer {expired_token}"}
+    )
+
+    assert response.status_code == 401
+    assert "expired" in response.json()["detail"].lower()
+
+def test_token_refresh(client, refresh_token):
+    """Test refresh token flow"""
+    response = client.post("/refresh", json={
+        "refresh_token": refresh_token
+    })
+
+    assert response.status_code == 200
+    data = response.json()
+    assert "access_token" in data
+    assert data["access_token"] != refresh_token
+```
+
+### Authorization Testing
+
+```python
+@pytest.mark.parametrize("role,endpoint,expected_status", [
+    ("admin", "/admin/users", 200),
+    ("admin", "/admin/settings", 200),
+    ("user", "/admin/users", 403),
+    ("user", "/admin/settings", 403),
+    ("user", "/users/me", 200),
+    ("guest", "/users/me", 401),
+])
+def test_role_based_access_control(client, role, endpoint, expected_status):
+    """Test RBAC for different roles"""
+    if role == "guest":
+        response = client.get(endpoint)
+    else:
+        token = create_token_with_role(role)
+        response = client.get(endpoint, headers={"Authorization": f"Bearer {token}"})
+
+    assert response.status_code == expected_status
+```
+
+### Injection Testing
+
+**SQL injection testing**:
+
+```python
+def test_sql_injection_in_query_params(client):
+    """Test SQL injection is prevented"""
+    malicious_input = "1' OR '1'='1"
+
+    response = client.get(f"/users?name={malicious_input}")
+
+    # Should return empty or error, not all users
+    assert response.status_code in [200, 400]
+    if response.status_code == 200:
+        assert len(response.json()) == 0
+
+def test_sql_injection_in_json_body(client):
+    """Test SQL injection in request body"""
+    response = client.post("/users", json={
+        "name": "'; DROP TABLE users; --",
+        "email": "test@example.com"
+    })
+
+    # Should succeed (string is escaped) or fail validation
+    assert response.status_code in [201, 400]
+
+    # Verify table still exists
+    verify_response = client.get("/users")
+    assert verify_response.status_code == 200
+```
+
+**Command injection testing**:
+
+```python
+def test_command_injection_in_file_path(client):
+    """Test command injection is prevented"""
+    malicious_path = "../../etc/passwd"
+
+    response = client.get(f"/files/{malicious_path}")
+
+    assert response.status_code in [400, 404]
+    assert "etc/passwd" not in response.text
+```
+
+### CORS Testing
+
+```python
+def test_cors_headers_present(client):
+    """Test CORS headers are set"""
+    response = client.options(
+        "/users",
+        headers={"Origin": "https://example.com"}
+    )
+
+    assert response.headers.get("Access-Control-Allow-Origin") == "https://example.com"
+    assert "GET" in response.headers.get("Access-Control-Allow-Methods", "")
+    assert "POST" in response.headers.get("Access-Control-Allow-Methods", "")
+
+def test_cors_blocks_unauthorized_origin(client):
+    """Test CORS blocks unauthorized origins"""
+    response = client.options(
+        "/users",
+        headers={"Origin": "https://malicious.com"}
+    )
+
+    # Should not include CORS headers for unauthorized origin
+    assert response.headers.get("Access-Control-Allow-Origin") is None
+```
+
+### Rate Limiting Testing
+
+```python
+def test_rate_limit_enforced(client):
+    """Test rate limiting blocks excessive requests"""
+    # Make requests up to limit (e.g., 100/minute)
+    for _ in range(100):
+        response = client.get("/users")
+        assert response.status_code == 200
+
+    # Next request should be rate limited
+    response = client.get("/users")
+    assert response.status_code == 429
+    assert "rate limit" in response.json()["detail"].lower()
+
+def test_rate_limit_reset_after_window(client, mocker):
+    """Test rate limit resets after time window"""
+    # Exhaust rate limit
+    for _ in range(100):
+        client.get("/users")
+
+    # Fast-forward time
+    mocker.patch("time.time", return_value=time.time() + 61)  # 61 seconds
+
+    # Should work again
+    response = client.get("/users")
+    assert response.status_code == 200
+```
+
+## Test Quality and Coverage
+
+### Coverage Analysis
+
+**Run tests with coverage**:
+
+```bash
+# Generate coverage report
+pytest --cov=app --cov-report=html --cov-report=term
+
+# Fail if coverage below threshold
+pytest --cov=app --cov-fail-under=80
+
+# Show missing lines
+pytest --cov=app --cov-report=term-missing
+```
+
+**Coverage configuration (.coveragerc)**:
+
+```ini
+[run]
+source = app
+omit =
+    */tests/*
+    */migrations/*
+    */__pycache__/*
+    */venv/*
+
+[report]
+exclude_lines =
+    pragma: no cover
+    def __repr__
+    raise AssertionError
+    raise NotImplementedError
+    if __name__ == .__main__.:
+    if TYPE_CHECKING:
+    @abstractmethod
+```
+
+**Branch coverage** (more thorough):
+
+```bash
+# Test both branches of conditionals
+pytest --cov=app --cov-branch
+```
+
+### Mutation Testing
+
+**Install mutation testing tools**:
+
+```bash
+pip install mutpy cosmic-ray
+```
+
+**Run mutation tests** (test the tests):
+
+```bash
+# Using mutpy
+mut.py --target app.services.payment \
+       --unit-test tests.test_payment \
+       --report-html mutation-report
+
+# Using cosmic-ray
+cosmic-ray init cosmic-ray.conf payment_session
+cosmic-ray exec payment_session
+cosmic-ray report payment_session
+```
+
+**Mutation testing concept**:
+- Introduces bugs into code (mutations)
+- Runs tests against mutated code
+- If tests still pass, they didn't catch the mutation (weak tests)
+- Goal: 100% mutation score (all mutations caught)
+
+### Detecting Flaky Tests
+
+**Repeat tests to find flakiness**:
+
+```bash
+# Run tests 100 times to detect flaky tests
+pytest --count=100 tests/test_orders.py
+
+# Run with pytest-flakefinder
+pytest --flake-finder --flake-runs=50
+```
+
+**Common flaky test causes**:
+- Non-deterministic data (random, timestamps)
+- Async race conditions
+- Test order dependencies
+- External service dependencies
+- Shared test state
+
+**Fix flaky tests**:
+
+```python
+# BAD: Non-deterministic timestamp
+def test_user_creation_time(client):
+    response = client.post("/users", json={...})
+    # Flaky: timestamp might differ by milliseconds
+    assert response.json()["created_at"] == datetime.now().isoformat()
+
+# GOOD: Relative time check
+def test_user_creation_time(client):
+    before = datetime.now()
+    response = client.post("/users", json={...})
+    after = datetime.now()
+
+    created_at = datetime.fromisoformat(response.json()["created_at"])
+    assert before <= created_at <= after
+
+# BAD: Random data without seed
+def test_user_name():
+    name = random.choice(["Alice", "Bob", "Charlie"])
+    # Flaky: different name each run
+    assert create_user(name).name == name
+
+# GOOD: Seeded random or fixed data
+def test_user_name():
+    random.seed(42)  # Deterministic
+    name = random.choice(["Alice", "Bob", "Charlie"])
+    assert create_user(name).name == name
+```
+
+## CI/CD Integration
+
+### GitHub Actions Workflow
+
+```yaml
+# .github/workflows/test.yml
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    services:
+      postgres:
+        image: postgres:15
+        env:
+          POSTGRES_PASSWORD: postgres
+          POSTGRES_DB: test_db
+        options: >-
+          --health-cmd pg_isready
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 5432:5432
+
+      redis:
+        image: redis:7
+        options: >-
+          --health-cmd "redis-cli ping"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+        ports:
+          - 6379:6379
+
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.11'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+          pip install -r requirements-dev.txt
+
+      - name: Run migrations
+        run: alembic upgrade head
+        env:
+          DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
+
+      - name: Run unit tests
+        run: pytest -m unit --cov=app --cov-report=xml
+
+      - name: Run integration tests
+        run: pytest -m integration --cov=app --cov-append --cov-report=xml
+        env:
+          DATABASE_URL: postgresql://postgres:postgres@localhost:5432/test_db
+          REDIS_URL: redis://localhost:6379
+
+      - name: Run security tests
+        run: pytest -m security
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          files: ./coverage.xml
+          fail_ci_if_error: true
+
+      - name: Check coverage threshold
+        run: pytest --cov=app --cov-fail-under=80
+```
+
+### Test Reporting
+
+**Generate JUnit XML for CI**:
+
+```bash
+pytest --junitxml=test-results.xml
+```
+
+**HTML test report**:
+
+```bash
+pytest --html=test-report.html --self-contained-html
+```
+
+## Debugging Failing Tests
+
+### pytest Debugging Flags
+
+```bash
+# Stop on first failure
+pytest -x
+
+# Show local variables on failure
+pytest -l
+
+# Enter debugger on failure
+pytest --pdb
+
+# Enter debugger on first failure
+pytest -x --pdb
+
+# Show print statements
+pytest -s
+
+# Verbose output
+pytest -v
+
+# Very verbose output (show full diff)
+pytest -vv
+
+# Run last failed tests only
+pytest --lf
+
+# Run failed tests first, then rest
+pytest --ff
+
+# Show slowest 10 tests
+pytest --durations=10
+```
+
+### Using pdb for Interactive Debugging
+
+```python
+import pytest
+
+def test_complex_calculation(client):
+    """Debug this test interactively"""
+    response = client.post("/calculate", json={"x": 10, "y": 20})
+
+    # Set breakpoint
+    import pdb; pdb.set_trace()
+
+    # Interactive debugging from here
+    result = response.json()
+    assert result["sum"] == 30
+```
+
+**pdb commands**:
+- `n` (next): Execute next line
+- `s` (step): Step into function
+- `c` (continue): Continue execution
+- `p variable`: Print variable value
+- `pp variable`: Pretty-print variable
+- `l` (list): Show current location in code
+- `w` (where): Show stack trace
+- `q` (quit): Exit debugger
+
+### Debugging with pytest-timeout
+
+```python
+import pytest
+
+@pytest.mark.timeout(5)  # Fail if test takes >5 seconds
+def test_slow_operation(client):
+    """This test might hang - timeout prevents infinite wait"""
+    response = client.get("/slow-endpoint")
+    assert response.status_code == 200
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Tests depend on each other** | Brittle, can't run in parallel | Use fixtures for shared setup |
+| **Testing implementation details** | Breaks when refactoring | Test behavior/outcomes, not internals |
+| **No test isolation** | One test affects another | Use transaction rollback, clean state |
+| **Mocking too much** | Tests don't reflect reality | Use real dependencies where feasible |
+| **No performance tests** | Production slowdowns surprise you | Add load/benchmark tests |
+| **Ignoring flaky tests** | Erodes trust in test suite | Fix or remove flaky tests |
+| **Low coverage with poor tests** | False confidence | Focus on quality, not just coverage |
+| **Testing private methods** | Couples tests to implementation | Test public interface only |
+
+## Cross-References
+
+**Related skills**:
+- **Database testing** → `database-integration` (test database setup, query testing)
+- **FastAPI patterns** → `fastapi-development` (dependency injection for tests)
+- **Security** → `ordis-security-architect` (security testing strategies)
+- **Authentication** → `api-authentication` (auth testing patterns)
+
+## Further Reading
+
+- **pytest documentation**: https://docs.pytest.org/
+- **Testing FastAPI**: https://fastapi.tiangolo.com/tutorial/testing/
+- **Locust load testing**: https://docs.locust.io/
+- **Test Driven Development** by Kent Beck
+- **Growing Object-Oriented Software, Guided by Tests** by Freeman & Pryce
diff --git a/skills/using-web-backend/database-integration.md b/skills/using-web-backend/database-integration.md
new file mode 100644
index 0000000..656ee8b
--- /dev/null
+++ b/skills/using-web-backend/database-integration.md
@@ -0,0 +1,1117 @@
+
+# Database Integration
+
+## Overview
+
+**Database integration specialist covering SQLAlchemy, connection pooling, query optimization, migrations, transactions, and production patterns.**
+
+**Core principle**: Databases are stateful, high-latency external systems requiring careful connection management, query optimization, and migration strategies to maintain performance and reliability at scale.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Connection pooling**: Pool exhaustion, "too many connections" errors, pool configuration
+- **Query optimization**: N+1 queries, slow endpoints, eager loading strategies
+- **Migrations**: Schema changes, zero-downtime deployments, data backfills
+- **Transactions**: Multi-step operations, rollback strategies, isolation levels
+- **ORM vs Raw SQL**: Complex queries, performance optimization, query readability
+- **Testing**: Database test strategies, fixtures, test isolation
+- **Monitoring**: Query performance tracking, connection pool health
+
+**Do NOT use for**:
+- Database selection (PostgreSQL vs MySQL vs MongoDB)
+- Database administration (backup, replication, sharding)
+- Schema design principles (see general architecture resources)
+
+## Connection Pool Configuration
+
+### Pool Sizing Formula
+
+**Calculate pool size based on deployment architecture**:
+
+```python
+# Formula: pool_size × num_workers ≤ (postgres_max_connections - reserved)
+# Example: 10 workers × 5 connections = 50 total ≤ (100 - 10) reserved
+
+from sqlalchemy import create_engine
+from sqlalchemy.pool import QueuePool
+
+DATABASE_URL = "postgresql://user:pass@host/db"
+
+engine = create_engine(
+    DATABASE_URL,
+    poolclass=QueuePool,
+    pool_size=5,              # Connections per worker
+    max_overflow=10,          # Additional connections during spikes
+    pool_pre_ping=True,       # CRITICAL: Verify connection before use
+    pool_recycle=3600,        # Recycle after 1 hour (prevent stale connections)
+    pool_timeout=30,          # Wait max 30s for connection from pool
+    echo_pool=False,          # Enable for debugging pool issues
+    connect_args={
+        "connect_timeout": 10,
+        "options": "-c statement_timeout=30000"  # 30s query timeout
+    }
+)
+```
+
+**Environment-based configuration**:
+
+```python
+import os
+from pydantic import BaseSettings
+
+class DatabaseSettings(BaseSettings):
+    database_url: str
+    pool_size: int = 5
+    max_overflow: int = 10
+    pool_pre_ping: bool = True
+    pool_recycle: int = 3600
+
+    class Config:
+        env_file = ".env"
+
+settings = DatabaseSettings()
+
+engine = create_engine(
+    settings.database_url,
+    pool_size=settings.pool_size,
+    max_overflow=settings.max_overflow,
+    pool_pre_ping=settings.pool_pre_ping,
+    pool_recycle=settings.pool_recycle
+)
+```
+
+**Async configuration** (asyncpg):
+
+```python
+from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
+from sqlalchemy.orm import sessionmaker
+
+engine = create_async_engine(
+    "postgresql+asyncpg://user:pass@host/db",
+    pool_size=20,           # Async handles more concurrent connections
+    max_overflow=0,         # No overflow - fail fast
+    pool_pre_ping=False,    # asyncpg handles internally
+    pool_recycle=3600
+)
+
+async_session = sessionmaker(
+    engine, class_=AsyncSession, expire_on_commit=False
+)
+```
+
+### Pool Health Monitoring
+
+**Health check endpoint**:
+
+```python
+from fastapi import FastAPI, HTTPException
+from sqlalchemy import text
+
+app = FastAPI()
+
+@app.get("/health/database")
+async def database_health(db: Session = Depends(get_db)):
+    """Check database connectivity and pool status"""
+    try:
+        # Simple query to verify connection
+        result = db.execute(text("SELECT 1"))
+
+        # Check pool statistics
+        pool = db.get_bind().pool
+        pool_status = {
+            "size": pool.size(),
+            "checked_in": pool.checkedin(),
+            "checked_out": pool.checkedout(),
+            "overflow": pool.overflow(),
+            "total_connections": pool.size() + pool.overflow()
+        }
+
+        return {
+            "status": "healthy",
+            "pool": pool_status
+        }
+    except Exception as e:
+        raise HTTPException(status_code=503, detail=f"Database unhealthy: {e}")
+```
+
+**Pool exhaustion debugging**:
+
+```python
+import logging
+
+logger = logging.getLogger(__name__)
+
+# Enable pool event logging
+from sqlalchemy import event
+
+@event.listens_for(engine, "connect")
+def receive_connect(dbapi_conn, connection_record):
+    logger.info(f"New connection created: {id(dbapi_conn)}")
+
+@event.listens_for(engine, "checkout")
+def receive_checkout(dbapi_conn, connection_record, connection_proxy):
+    logger.debug(f"Connection checked out: {id(dbapi_conn)}")
+
+    pool = connection_proxy._pool
+    logger.debug(
+        f"Pool status - size: {pool.size()}, "
+        f"checked_out: {pool.checkedout()}, "
+        f"overflow: {pool.overflow()}"
+    )
+
+@event.listens_for(engine, "checkin")
+def receive_checkin(dbapi_conn, connection_record):
+    logger.debug(f"Connection checked in: {id(dbapi_conn)}")
+```
+
+### Testing with NullPool
+
+**Disable pooling in tests**:
+
+```python
+from sqlalchemy.pool import NullPool
+
+# Test configuration - no connection pooling
+test_engine = create_engine(
+    "postgresql://user:pass@localhost/test_db",
+    poolclass=NullPool,  # No pooling - each query gets new connection
+    echo=True            # Log all SQL queries
+)
+```
+
+## Query Optimization
+
+### N+1 Query Detection
+
+**Automatic detection in tests**:
+
+```python
+from sqlalchemy import event
+from sqlalchemy.engine import Engine
+import pytest
+
+class QueryCounter:
+    """Count queries executed during test"""
+    def __init__(self):
+        self.queries = []
+
+    def __enter__(self):
+        event.listen(Engine, "before_cursor_execute", self._before_cursor_execute)
+        return self
+
+    def __exit__(self, *args):
+        event.remove(Engine, "before_cursor_execute", self._before_cursor_execute)
+
+    def _before_cursor_execute(self, conn, cursor, statement, *args):
+        self.queries.append(statement)
+
+    @property
+    def count(self):
+        return len(self.queries)
+
+# Test usage
+def test_no_n_plus_1():
+    with QueryCounter() as counter:
+        users = get_users_with_posts()  # Should use eager loading
+
+        # Access posts (should not trigger additional queries)
+        for user in users:
+            _ = [post.title for post in user.posts]
+
+        # Should be 1-2 queries, not 101
+        assert counter.count <= 2, f"N+1 detected: {counter.count} queries"
+```
+
+### Eager Loading Strategies
+
+**Decision matrix**:
+
+| Pattern | Queries | Use When | Example |
+|---------|---------|----------|---------|
+| `joinedload()` | 1 (JOIN) | One-to-one, small one-to-many | User → Profile |
+| `selectinload()` | 2 (IN clause) | One-to-many with many rows | User → Posts |
+| `subqueryload()` | 2 (subquery) | Legacy alternative | Use selectinload instead |
+| `raiseload()` | 0 (raises error) | Prevent lazy loading | Production safety |
+
+**joinedload() - Single query with JOIN**:
+
+```python
+from sqlalchemy.orm import joinedload
+
+# Single query: SELECT * FROM users LEFT OUTER JOIN posts ON ...
+users = db.query(User).options(
+    joinedload(User.posts)
+).all()
+
+# Best for one-to-one or small one-to-many
+user = db.query(User).options(
+    joinedload(User.profile)  # One-to-one
+).filter(User.id == user_id).first()
+```
+
+**selectinload() - Two queries (more efficient for many rows)**:
+
+```python
+from sqlalchemy.orm import selectinload
+
+# Query 1: SELECT * FROM users
+# Query 2: SELECT * FROM posts WHERE user_id IN (1, 2, 3, ...)
+users = db.query(User).options(
+    selectinload(User.posts)
+).all()
+
+# Best for one-to-many with many related rows
+```
+
+**Nested eager loading**:
+
+```python
+# Load users → posts → comments (3 queries total)
+users = db.query(User).options(
+    selectinload(User.posts).selectinload(Post.comments)
+).all()
+```
+
+**Conditional eager loading**:
+
+```python
+from sqlalchemy.orm import selectinload, Load
+
+# Only load published posts
+users = db.query(User).options(
+    selectinload(User.posts).options(
+        Load(Post).filter(Post.published == True)
+    )
+).all()
+```
+
+**Prevent lazy loading in production** (raiseload):
+
+```python
+from sqlalchemy.orm import raiseload
+
+# Raise error if any relationship accessed without eager loading
+users = db.query(User).options(
+    raiseload('*')  # Disable all lazy loading
+).all()
+
+# This will raise an error:
+# user.posts  # InvalidRequestError: 'User.posts' is not available due to lazy='raise'
+```
+
+### Query Performance Measurement
+
+**Log slow queries**:
+
+```python
+from sqlalchemy import event
+from sqlalchemy.engine import Engine
+import time
+import logging
+
+logger = logging.getLogger(__name__)
+
+SLOW_QUERY_THRESHOLD = 1.0  # seconds
+
+@event.listens_for(Engine, "before_cursor_execute")
+def before_cursor_execute(conn, cursor, statement, parameters, context, executemany):
+    conn.info.setdefault('query_start_time', []).append(time.time())
+
+@event.listens_for(Engine, "after_cursor_execute")
+def after_cursor_execute(conn, cursor, statement, parameters, context, executemany):
+    total_time = time.time() - conn.info['query_start_time'].pop()
+
+    if total_time > SLOW_QUERY_THRESHOLD:
+        logger.warning(
+            f"Slow query ({total_time:.2f}s): {statement[:200]}",
+            extra={
+                "duration": total_time,
+                "statement": statement,
+                "parameters": parameters
+            }
+        )
+```
+
+**EXPLAIN ANALYZE for query optimization**:
+
+```python
+from sqlalchemy import text
+
+def explain_query(db: Session, query):
+    """Get query execution plan"""
+    compiled = query.statement.compile(
+        compile_kwargs={"literal_binds": True}
+    )
+
+    explain_result = db.execute(
+        text(f"EXPLAIN ANALYZE {compiled}")
+    ).fetchall()
+
+    return "\n".join([row[0] for row in explain_result])
+
+# Usage
+query = db.query(User).join(Post).filter(Post.published == True)
+plan = explain_query(db, query)
+print(plan)
+```
+
+### Deferred Column Loading
+
+**Exclude large columns from initial query**:
+
+```python
+from sqlalchemy.orm import defer, undefer
+
+# Don't load large 'bio' column initially
+users = db.query(User).options(
+    defer(User.bio),  # Skip this column
+    defer(User.profile_image)  # Skip binary data
+).all()
+
+# Load specific user's bio when needed
+user = db.query(User).options(
+    undefer(User.bio)  # Load this column
+).filter(User.id == user_id).first()
+```
+
+**Load only specific columns**:
+
+```python
+from sqlalchemy.orm import load_only
+
+# Only load id and name (ignore all other columns)
+users = db.query(User).options(
+    load_only(User.id, User.name)
+).all()
+```
+
+## Zero-Downtime Migrations
+
+### Migration Decision Matrix
+
+| Operation | Locking | Approach | Downtime |
+|-----------|---------|----------|----------|
+| Add nullable column | None | Single migration | No |
+| Add NOT NULL column | Table lock | Multi-phase (nullable → backfill → NOT NULL) | No |
+| Add index | Share lock | `CREATE INDEX CONCURRENTLY` | No |
+| Add foreign key | Share lock | `NOT VALID` → `VALIDATE` | No |
+| Drop column | None | Multi-phase (stop using → drop) | No |
+| Rename column | None | Multi-phase (add new → dual write → drop old) | No |
+| Alter column type | Table lock | Multi-phase or rebuild table | Maybe |
+
+### Multi-Phase NOT NULL Migration
+
+**Phase 1: Add nullable column**:
+
+```python
+# migrations/versions/001_add_email_verified.py
+def upgrade():
+    # Fast: no table rewrite
+    op.add_column('users', sa.Column('email_verified', sa.Boolean(), nullable=True))
+
+    # Set default for new rows
+    op.execute("ALTER TABLE users ALTER COLUMN email_verified SET DEFAULT false")
+
+def downgrade():
+    op.drop_column('users', 'email_verified')
+```
+
+**Phase 2: Backfill in batches**:
+
+```python
+# migrations/versions/002_backfill_email_verified.py
+from alembic import op
+import sqlalchemy as sa
+
+def upgrade():
+    """Backfill existing rows in batches"""
+    connection = op.get_bind()
+
+    # Process in batches to avoid long transactions
+    batch_size = 10000
+    total_updated = 0
+
+    while True:
+        result = connection.execute(sa.text("""
+            UPDATE users
+            SET email_verified = false
+            WHERE email_verified IS NULL
+            AND id IN (
+                SELECT id FROM users
+                WHERE email_verified IS NULL
+                ORDER BY id
+                LIMIT :batch_size
+            )
+        """), {"batch_size": batch_size})
+
+        rows_updated = result.rowcount
+        total_updated += rows_updated
+
+        if rows_updated == 0:
+            break
+
+        print(f"Backfilled {total_updated} rows")
+
+def downgrade():
+    pass  # No rollback needed
+```
+
+**Phase 3: Add NOT NULL constraint**:
+
+```python
+# migrations/versions/003_make_email_verified_not_null.py
+def upgrade():
+    # Verify no NULLs remain
+    connection = op.get_bind()
+    result = connection.execute(sa.text(
+        "SELECT COUNT(*) FROM users WHERE email_verified IS NULL"
+    ))
+    null_count = result.scalar()
+
+    if null_count > 0:
+        raise Exception(f"Cannot add NOT NULL: {null_count} NULL values remain")
+
+    # Add NOT NULL constraint (fast since all values are set)
+    op.alter_column('users', 'email_verified', nullable=False)
+
+def downgrade():
+    op.alter_column('users', 'email_verified', nullable=True)
+```
+
+### Concurrent Index Creation
+
+**Without CONCURRENTLY (blocks writes)**:
+
+```python
+# BAD: Locks table during index creation
+def upgrade():
+    op.create_index('idx_users_email', 'users', ['email'])
+```
+
+**With CONCURRENTLY (no locks)**:
+
+```python
+# GOOD: No blocking, safe for production
+def upgrade():
+    # Requires raw SQL for CONCURRENTLY
+    op.execute("""
+        CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_users_email
+        ON users (email)
+    """)
+
+def downgrade():
+    op.execute("DROP INDEX CONCURRENTLY IF EXISTS idx_users_email")
+```
+
+**Partial index for efficiency**:
+
+```python
+def upgrade():
+    op.execute("""
+        CREATE INDEX CONCURRENTLY idx_users_active_email
+        ON users (email)
+        WHERE deleted_at IS NULL
+    """)
+```
+
+### Adding Foreign Keys Without Blocking
+
+**Using NOT VALID constraint**:
+
+```python
+# migrations/versions/004_add_foreign_key.py
+def upgrade():
+    # Phase 1: Add constraint without validating existing rows (fast)
+    op.execute("""
+        ALTER TABLE posts
+        ADD CONSTRAINT fk_posts_user_id
+        FOREIGN KEY (user_id)
+        REFERENCES users (id)
+        NOT VALID
+    """)
+
+    # Phase 2: Validate constraint in background (can be canceled/restarted)
+    op.execute("""
+        ALTER TABLE posts
+        VALIDATE CONSTRAINT fk_posts_user_id
+    """)
+
+def downgrade():
+    op.drop_constraint('fk_posts_user_id', 'posts', type_='foreignkey')
+```
+
+### Migration Monitoring
+
+**Track migration progress**:
+
+```sql
+-- Check backfill progress
+SELECT
+    COUNT(*) FILTER (WHERE email_verified IS NULL) as null_count,
+    COUNT(*) as total_count,
+    ROUND(100.0 * COUNT(*) FILTER (WHERE email_verified IS NOT NULL) / COUNT(*), 2) as pct_complete
+FROM users;
+
+-- Check index creation progress (PostgreSQL 12+)
+SELECT
+    phase,
+    ROUND(100.0 * blocks_done / NULLIF(blocks_total, 0), 2) as pct_complete
+FROM pg_stat_progress_create_index
+WHERE relid = 'users'::regclass;
+```
+
+## Transaction Management
+
+### Basic Transaction Pattern
+
+**Context manager with automatic rollback**:
+
+```python
+from contextlib import contextmanager
+from sqlalchemy.orm import Session
+
+@contextmanager
+def transactional_session(db: Session):
+    """Context manager for automatic rollback on error"""
+    try:
+        yield db
+        db.commit()
+    except Exception as e:
+        db.rollback()
+        raise
+    finally:
+        db.close()
+
+# Usage
+with transactional_session(db) as session:
+    user = User(name="Alice")
+    session.add(user)
+    # Automatic commit on success, rollback on exception
+```
+
+### Savepoints for Partial Rollback
+
+**Nested transactions with savepoints**:
+
+```python
+def create_order_with_retry(db: Session, order_data: dict):
+    """Use savepoints to retry failed steps without losing entire transaction"""
+    # Start main transaction
+    order = Order(**order_data)
+    db.add(order)
+    db.flush()  # Get order.id
+
+    # Try payment with savepoint
+    sp = db.begin_nested()  # Create savepoint
+    try:
+        payment = process_payment(order.total)
+        order.payment_id = payment.id
+    except PaymentError as e:
+        sp.rollback()  # Rollback to savepoint (keep order)
+
+        # Try alternative payment method
+        sp = db.begin_nested()
+        try:
+            payment = process_backup_payment(order.total)
+            order.payment_id = payment.id
+        except PaymentError:
+            sp.rollback()
+            raise HTTPException(status_code=402, detail="All payment methods failed")
+
+    db.commit()  # Commit entire transaction
+    return order
+```
+
+### Locking Strategies
+
+**Optimistic locking with version column**:
+
+```python
+from sqlalchemy import Column, Integer, String
+
+class Product(Base):
+    __tablename__ = "products"
+    id = Column(Integer, primary_key=True)
+    name = Column(String)
+    inventory = Column(Integer)
+    version = Column(Integer, nullable=False, default=1)  # Version column
+
+# Usage
+def decrement_inventory(db: Session, product_id: int, quantity: int):
+    product = db.query(Product).filter(Product.id == product_id).first()
+
+    if product.inventory < quantity:
+        raise ValueError("Insufficient inventory")
+
+    # Update with version check
+    rows_updated = db.execute(
+        sa.update(Product)
+        .where(Product.id == product_id)
+        .where(Product.version == product.version)  # Check version hasn't changed
+        .values(
+            inventory=Product.inventory - quantity,
+            version=Product.version + 1
+        )
+    ).rowcount
+
+    if rows_updated == 0:
+        # Version mismatch - another transaction modified this row
+        raise HTTPException(status_code=409, detail="Product was modified by another transaction")
+
+    db.commit()
+```
+
+**Pessimistic locking with SELECT FOR UPDATE**:
+
+```python
+def decrement_inventory_with_lock(db: Session, product_id: int, quantity: int):
+    """Acquire row lock to prevent concurrent modifications"""
+    # Lock the row (blocks other transactions)
+    product = db.query(Product).filter(
+        Product.id == product_id
+    ).with_for_update().first()  # SELECT ... FOR UPDATE
+
+    if not product:
+        raise HTTPException(status_code=404, detail="Product not found")
+
+    if product.inventory < quantity:
+        raise HTTPException(status_code=400, detail="Insufficient inventory")
+
+    product.inventory -= quantity
+    db.commit()
+    # Lock released after commit
+```
+
+**Lock timeout to prevent deadlocks**:
+
+```python
+from sqlalchemy import text
+
+def with_lock_timeout(db: Session, timeout_ms: int = 5000):
+    """Set lock timeout for this transaction"""
+    db.execute(text(f"SET LOCAL lock_timeout = '{timeout_ms}ms'"))
+
+# Usage
+try:
+    with_lock_timeout(db, 3000)  # 3 second timeout
+    product = db.query(Product).with_for_update().filter(...).first()
+except Exception as e:
+    if "lock timeout" in str(e).lower():
+        raise HTTPException(status_code=409, detail="Resource locked by another transaction")
+    raise
+```
+
+### Isolation Levels
+
+**Configure isolation level**:
+
+```python
+from sqlalchemy import create_engine
+
+# Default: READ COMMITTED
+engine = create_engine(
+    DATABASE_URL,
+    isolation_level="REPEATABLE READ"  # Options: READ UNCOMMITTED, READ COMMITTED, REPEATABLE READ, SERIALIZABLE
+)
+
+# Per-transaction isolation
+from sqlalchemy.orm import Session
+
+with Session(engine) as session:
+    session.connection(execution_options={"isolation_level": "SERIALIZABLE"})
+    # ... transaction logic ...
+```
+
+## Raw SQL vs ORM
+
+### Decision Matrix
+
+| Use ORM When | Use Raw SQL When |
+|--------------|------------------|
+| CRUD operations | Complex CTEs (Common Table Expressions) |
+| Simple joins (<3 tables) | Window functions with PARTITION BY |
+| Type safety critical | Performance-critical queries |
+| Database portability needed | Database-specific optimizations (PostgreSQL arrays, JSONB) |
+| Code readability with ORM is good | ORM query becomes unreadable (>10 lines) |
+
+### Raw SQL with Type Safety
+
+**Parameterized queries with Pydantic results**:
+
+```python
+from sqlalchemy import text
+from pydantic import BaseModel
+from typing import List
+
+class CustomerReport(BaseModel):
+    id: int
+    name: str
+    region: str
+    total_spent: float
+    order_count: int
+    rank_in_region: int
+
+@app.get("/reports/top-customers")
+def get_top_customers(
+    db: Session = Depends(get_db),
+    region: str = None,
+    limit: int = 100
+) -> List[CustomerReport]:
+    """Complex report with CTEs and window functions"""
+    query = text("""
+        WITH customer_totals AS (
+            SELECT
+                u.id,
+                u.name,
+                u.region,
+                COUNT(o.id) as order_count,
+                COALESCE(SUM(o.total), 0) as total_spent
+            FROM users u
+            LEFT JOIN orders o ON u.id = o.user_id
+            WHERE u.deleted_at IS NULL
+                AND (:region IS NULL OR u.region = :region)
+            GROUP BY u.id, u.name, u.region
+        ),
+        ranked AS (
+            SELECT
+                *,
+                ROW_NUMBER() OVER (
+                    PARTITION BY region
+                    ORDER BY total_spent DESC
+                ) as rank_in_region
+            FROM customer_totals
+        )
+        SELECT * FROM ranked
+        WHERE total_spent > 0
+        ORDER BY total_spent DESC
+        LIMIT :limit
+    """)
+
+    result = db.execute(query, {"region": region, "limit": limit})
+
+    # Type-safe results with Pydantic
+    return [CustomerReport(**dict(row._mapping)) for row in result]
+```
+
+### Hybrid Approach
+
+**Combine ORM and raw SQL**:
+
+```python
+def get_user_analytics(db: Session, user_id: int):
+    """Use raw SQL for complex aggregation, ORM for simple queries"""
+
+    # Complex aggregation in raw SQL
+    analytics_query = text("""
+        SELECT
+            COUNT(*) as total_orders,
+            SUM(total) as lifetime_value,
+            AVG(total) as avg_order_value,
+            MAX(created_at) as last_order_date,
+            MIN(created_at) as first_order_date
+        FROM orders
+        WHERE user_id = :user_id
+    """)
+
+    analytics = db.execute(analytics_query, {"user_id": user_id}).first()
+
+    # Simple ORM query for user details
+    user = db.query(User).filter(User.id == user_id).first()
+
+    return {
+        "user": {
+            "id": user.id,
+            "name": user.name,
+            "email": user.email
+        },
+        "analytics": {
+            "total_orders": analytics.total_orders,
+            "lifetime_value": float(analytics.lifetime_value or 0),
+            "avg_order_value": float(analytics.avg_order_value or 0),
+            "first_order": analytics.first_order_date,
+            "last_order": analytics.last_order_date
+        }
+    }
+```
+
+### Query Optimization Checklist
+
+**Before optimizing**:
+
+1. **Measure with EXPLAIN ANALYZE**:
+   ```sql
+   EXPLAIN ANALYZE
+   SELECT * FROM users JOIN orders ON users.id = orders.user_id;
+   ```
+
+2. **Look for**:
+   - Sequential scans on large tables → Add index
+   - High loop counts → N+1 query problem
+   - Hash joins on small tables → Consider nested loop
+   - Sort operations → Consider index on ORDER BY columns
+
+3. **Optimize**:
+   - Add indexes on foreign keys, WHERE clauses, ORDER BY columns
+   - Use LIMIT for pagination
+   - Use EXISTS instead of IN for large subqueries
+   - Denormalize for read-heavy workloads
+
+**Index usage verification**:
+
+```sql
+-- Check if index is being used
+EXPLAIN SELECT * FROM users WHERE email = 'test@example.com';
+-- Look for "Index Scan using idx_users_email"
+
+-- Check index statistics
+SELECT
+    schemaname,
+    tablename,
+    indexname,
+    idx_scan as index_scans,
+    idx_tup_read as tuples_read,
+    idx_tup_fetch as tuples_fetched
+FROM pg_stat_user_indexes
+WHERE tablename = 'users';
+```
+
+## Testing Strategies
+
+### Test Database Setup
+
+**Separate test database with fixtures**:
+
+```python
+import pytest
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+from sqlalchemy.pool import NullPool
+
+@pytest.fixture(scope="session")
+def test_engine():
+    """Create test database engine"""
+    engine = create_engine(
+        "postgresql://user:pass@localhost/test_db",
+        poolclass=NullPool,  # No pooling in tests
+        echo=True            # Log all queries
+    )
+
+    # Create all tables
+    Base.metadata.create_all(engine)
+
+    yield engine
+
+    # Drop all tables after tests
+    Base.metadata.drop_all(engine)
+
+@pytest.fixture(scope="function")
+def db_session(test_engine):
+    """Create fresh database session for each test"""
+    connection = test_engine.connect()
+    transaction = connection.begin()
+
+    Session = sessionmaker(bind=connection)
+    session = Session()
+
+    yield session
+
+    # Rollback transaction (undo all changes)
+    session.close()
+    transaction.rollback()
+    connection.close()
+```
+
+### Factory Pattern for Test Data
+
+**Use factories for consistent test data**:
+
+```python
+from factory import Factory, Faker, SubFactory
+from factory.alchemy import SQLAlchemyModelFactory
+
+class UserFactory(SQLAlchemyModelFactory):
+    class Meta:
+        model = User
+        sqlalchemy_session = db_session
+
+    name = Faker('name')
+    email = Faker('email')
+    created_at = Faker('date_time')
+
+class PostFactory(SQLAlchemyModelFactory):
+    class Meta:
+        model = Post
+        sqlalchemy_session = db_session
+
+    title = Faker('sentence')
+    content = Faker('text')
+    user = SubFactory(UserFactory)  # Auto-create related user
+
+# Test usage
+def test_get_user_posts(db_session):
+    user = UserFactory.create()
+    PostFactory.create_batch(5, user=user)  # Create 5 posts for user
+
+    posts = db_session.query(Post).filter(Post.user_id == user.id).all()
+    assert len(posts) == 5
+```
+
+### Testing Transactions
+
+**Test rollback behavior**:
+
+```python
+def test_transaction_rollback(db_session):
+    """Verify rollback on error"""
+    user = User(name="Alice", email="alice@example.com")
+    db_session.add(user)
+
+    with pytest.raises(IntegrityError):
+        # This should fail (duplicate email)
+        user2 = User(name="Bob", email="alice@example.com")
+        db_session.add(user2)
+        db_session.commit()
+
+    # Verify rollback occurred
+    db_session.rollback()
+    assert db_session.query(User).count() == 0
+```
+
+### Testing Migrations
+
+**Test migration up and down**:
+
+```python
+from alembic import command
+from alembic.config import Config
+
+def test_migration_upgrade_downgrade():
+    """Test migration can be applied and reversed"""
+    alembic_cfg = Config("alembic.ini")
+    alembic_cfg.set_main_option("sqlalchemy.url", TEST_DATABASE_URL)
+
+    # Apply migration
+    command.upgrade(alembic_cfg, "head")
+
+    # Verify schema changes
+    # ... assertions ...
+
+    # Rollback migration
+    command.downgrade(alembic_cfg, "-1")
+
+    # Verify rollback
+    # ... assertions ...
+```
+
+## Monitoring and Observability
+
+### Query Performance Tracking
+
+**Track slow queries with middleware**:
+
+```python
+from fastapi import Request
+import time
+import logging
+
+logger = logging.getLogger(__name__)
+
+@app.middleware("http")
+async def track_db_queries(request: Request, call_next):
+    """Track database query performance per request"""
+    query_count = 0
+    total_query_time = 0.0
+
+    def track_query(conn, cursor, statement, parameters, context, executemany):
+        nonlocal query_count, total_query_time
+        start = time.time()
+
+        # Execute query
+        cursor.execute(statement, parameters)
+
+        duration = time.time() - start
+        query_count += 1
+        total_query_time += duration
+
+        if duration > 1.0:  # Log slow queries
+            logger.warning(
+                f"Slow query ({duration:.2f}s): {statement[:200]}",
+                extra={
+                    "duration": duration,
+                    "path": request.url.path
+                }
+            )
+
+    # Attach listener
+    event.listen(Engine, "before_cursor_execute", track_query)
+
+    response = await call_next(request)
+
+    # Remove listener
+    event.remove(Engine, "before_cursor_execute", track_query)
+
+    # Add headers
+    response.headers["X-DB-Query-Count"] = str(query_count)
+    response.headers["X-DB-Query-Time"] = f"{total_query_time:.3f}s"
+
+    return response
+```
+
+### Connection Pool Metrics
+
+**Expose pool metrics for monitoring**:
+
+```python
+from prometheus_client import Gauge
+
+pool_size_gauge = Gauge('db_pool_size', 'Number of connections in pool')
+pool_checked_out_gauge = Gauge('db_pool_checked_out', 'Connections currently checked out')
+pool_overflow_gauge = Gauge('db_pool_overflow', 'Overflow connections')
+
+@app.on_event("startup")
+async def start_pool_metrics():
+    """Collect pool metrics periodically"""
+    import asyncio
+
+    async def collect_metrics():
+        while True:
+            pool = engine.pool
+            pool_size_gauge.set(pool.size())
+            pool_checked_out_gauge.set(pool.checkedout())
+            pool_overflow_gauge.set(pool.overflow())
+
+            await asyncio.sleep(10)  # Every 10 seconds
+
+    asyncio.create_task(collect_metrics())
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **No connection pooling** | Creates new connection per request (slow) | Use `create_engine()` with pool |
+| **pool_pre_ping=False** | Fails on stale connections | Always `pool_pre_ping=True` in production |
+| **Lazy loading in loops** | N+1 query problem | Use `joinedload()` or `selectinload()` |
+| **No query timeout** | Slow queries block workers | Set `statement_timeout` in connect_args |
+| **Large transactions** | Locks held too long, blocking | Break into smaller transactions |
+| **No migration rollback** | Can't undo bad migrations | Always test downgrade path |
+| **String interpolation in SQL** | SQL injection vulnerability | Use parameterized queries with `text()` |
+| **No index on foreign keys** | Slow joins | Add index on all foreign key columns |
+| **Blocking migrations** | Downtime during deployment | Use `CONCURRENTLY`, `NOT VALID` patterns |
+
+## Cross-References
+
+**Related skills**:
+- **FastAPI dependency injection** → `fastapi-development` (database dependencies)
+- **API testing** → `api-testing` (testing database code)
+- **Microservices** → `microservices-architecture` (per-service databases)
+- **Security** → `ordis-security-architect` (SQL injection, connection security)
+
+## Further Reading
+
+- **SQLAlchemy docs**: https://docs.sqlalchemy.org/
+- **Alembic migrations**: https://alembic.sqlalchemy.org/
+- **PostgreSQL performance**: https://www.postgresql.org/docs/current/performance-tips.html
+- **Database Reliability Engineering** by Laine Campbell
diff --git a/skills/using-web-backend/django-development.md b/skills/using-web-backend/django-development.md
new file mode 100644
index 0000000..ce33b6e
--- /dev/null
+++ b/skills/using-web-backend/django-development.md
@@ -0,0 +1,890 @@
+
+# Django Development
+
+## Overview
+
+**Django development specialist covering Django ORM optimization, DRF best practices, caching strategies, migrations, testing, and production deployment.**
+
+**Core principle**: Django's "batteries included" philosophy is powerful but requires understanding which battery to use when - master Django's tools to avoid reinventing wheels or choosing wrong patterns.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **ORM optimization**: N+1 queries, select_related vs prefetch_related, query performance
+- **DRF patterns**: Serializers, ViewSets, permissions, nested relationships
+- **Caching**: Cache framework, per-view caching, template fragment caching
+- **Migrations**: Zero-downtime migrations, data migrations, squashing
+- **Testing**: Django TestCase, fixtures, factories, mocking
+- **Deployment**: Gunicorn, static files, database pooling
+- **Async Django**: Channels, async views, WebSockets
+- **Admin customization**: Custom admin actions, list filters, inlines
+
+**Do NOT use for**:
+- General Python patterns (use `axiom-python-engineering`)
+- API design principles (use `rest-api-design`)
+- Database-agnostic patterns (use `database-integration`)
+- Authentication flows (use `api-authentication`)
+
+## Django ORM Optimization
+
+### select_related vs prefetch_related
+
+**Decision matrix**:
+
+| Relationship | Method | SQL Strategy | Use When |
+|--------------|--------|--------------|----------|
+| ForeignKey (many-to-one) | `select_related` | JOIN | Book → Author |
+| OneToOneField | `select_related` | JOIN | User → Profile |
+| Reverse ForeignKey (one-to-many) | `prefetch_related` | Separate query + IN | Author → Books |
+| ManyToManyField | `prefetch_related` | Separate query + IN | Book → Tags |
+
+**Example - select_related (JOIN)**:
+
+```python
+# BAD: N+1 queries (1 + N)
+books = Book.objects.all()
+for book in books:
+    print(book.author.name)  # Additional query per book
+
+# GOOD: Single JOIN query
+books = Book.objects.select_related('author').all()
+for book in books:
+    print(book.author.name)  # No additional queries
+
+# SQL generated:
+# SELECT book.*, author.* FROM book JOIN author ON book.author_id = author.id
+```
+
+**Example - prefetch_related (IN query)**:
+
+```python
+# BAD: N+1 queries
+authors = Author.objects.all()
+for author in authors:
+    print(author.books.count())  # Query per author
+
+# GOOD: 2 queries total
+authors = Author.objects.prefetch_related('books').all()
+for author in authors:
+    print(author.books.count())  # No additional queries
+
+# SQL generated:
+# Query 1: SELECT * FROM author
+# Query 2: SELECT * FROM book WHERE author_id IN (1, 2, 3, ...)
+```
+
+**Nested prefetching**:
+
+```python
+from django.db.models import Prefetch
+
+# Fetch authors → books → reviews (3 queries)
+authors = Author.objects.prefetch_related(
+    Prefetch('books', queryset=Book.objects.prefetch_related('reviews'))
+)
+
+# Custom filtering on prefetch
+recent_books = Book.objects.filter(
+    published_date__gte=timezone.now() - timedelta(days=30)
+).order_by('-published_date')
+
+authors = Author.objects.prefetch_related(
+    Prefetch('books', queryset=recent_books, to_attr='recent_books')
+)
+
+# Access via custom attribute
+for author in authors:
+    for book in author.recent_books:  # Only recent books
+        print(book.title)
+```
+
+### Query Debugging
+
+```python
+from django.db import connection, reset_queries
+from django.conf import settings
+
+# Enable in settings.py: DEBUG = True
+# Or use django-debug-toolbar
+
+def debug_queries(func):
+    """Decorator to debug query counts"""
+    def wrapper(*args, **kwargs):
+        reset_queries()
+        result = func(*args, **kwargs)
+        print(f"Queries: {len(connection.queries)}")
+        for query in connection.queries:
+            print(f"  {query['time']}s: {query['sql'][:100]}")
+        return result
+    return wrapper
+
+@debug_queries
+def get_books():
+    return list(Book.objects.select_related('author').prefetch_related('tags'))
+```
+
+**Django Debug Toolbar** (production alternative - django-silk):
+
+```python
+# settings.py
+INSTALLED_APPS = [
+    'debug_toolbar',
+    # ...
+]
+
+MIDDLEWARE = [
+    'debug_toolbar.middleware.DebugToolbarMiddleware',
+    # ...
+]
+
+INTERNAL_IPS = ['127.0.0.1']
+
+# For production: use django-silk for profiling
+INSTALLED_APPS += ['silk']
+MIDDLEWARE += ['silk.middleware.SilkyMiddleware']
+```
+
+### Annotation and Aggregation
+
+**Annotate** (add computed fields):
+
+```python
+from django.db.models import Count, Avg, Sum, F, Q
+
+# Add book count to each author
+authors = Author.objects.annotate(
+    book_count=Count('books'),
+    avg_rating=Avg('books__rating'),
+    total_sales=Sum('books__sales')
+)
+
+for author in authors:
+    print(f"{author.name}: {author.book_count} books, avg rating {author.avg_rating}")
+```
+
+**Aggregate** (single value across queryset):
+
+```python
+from django.db.models import Avg
+
+# Get average rating across all books
+avg_rating = Book.objects.aggregate(Avg('rating'))
+# Returns: {'rating__avg': 4.2}
+
+# Multiple aggregations
+stats = Book.objects.aggregate(
+    avg_rating=Avg('rating'),
+    total_sales=Sum('sales'),
+    book_count=Count('id')
+)
+```
+
+**Conditional aggregation with Q**:
+
+```python
+from django.db.models import Q, Count
+
+# Count books by rating category
+Author.objects.annotate(
+    high_rated_books=Count('books', filter=Q(books__rating__gte=4.0)),
+    low_rated_books=Count('books', filter=Q(books__rating__lt=3.0))
+)
+```
+
+## Django REST Framework Patterns
+
+### ViewSet vs APIView
+
+**Decision matrix**:
+
+| Use | Pattern | When |
+|-----|---------|------|
+| Standard CRUD | `ModelViewSet` | Full REST API for model |
+| Custom actions only | `ViewSet` | Non-standard endpoints |
+| Read-only API | `ReadOnlyModelViewSet` | GET/LIST only |
+| Fine control | `APIView` or `@api_view` | Custom business logic |
+
+**ModelViewSet** (full CRUD):
+
+```python
+from rest_framework import viewsets, filters
+from rest_framework.decorators import action
+from rest_framework.response import Response
+
+class BookViewSet(viewsets.ModelViewSet):
+    """
+    Provides: list, create, retrieve, update, partial_update, destroy
+    """
+    queryset = Book.objects.select_related('author').prefetch_related('tags')
+    serializer_class = BookSerializer
+    permission_classes = [IsAuthenticatedOrReadOnly]
+    filter_backends = [filters.SearchFilter, filters.OrderingFilter]
+    search_fields = ['title', 'author__name']
+    ordering_fields = ['published_date', 'rating']
+
+    def get_queryset(self):
+        """Optimize queryset based on action"""
+        queryset = super().get_queryset()
+
+        if self.action == 'list':
+            # List doesn't need full detail
+            return queryset.only('id', 'title', 'author__name')
+
+        return queryset
+
+    @action(detail=True, methods=['post'])
+    def publish(self, request, pk=None):
+        """Custom action: POST /books/123/publish/"""
+        book = self.get_object()
+        book.status = 'published'
+        book.published_date = timezone.now()
+        book.save()
+        return Response({'status': 'published'})
+
+    @action(detail=False, methods=['get'])
+    def bestsellers(self, request):
+        """Custom list action: GET /books/bestsellers/"""
+        books = self.get_queryset().filter(sales__gte=10000).order_by('-sales')[:10]
+        serializer = self.get_serializer(books, many=True)
+        return Response(serializer.data)
+```
+
+### Serializer Patterns
+
+**Basic serializer with validation**:
+
+```python
+from rest_framework import serializers
+from django.contrib.auth.password_validation import validate_password
+
+class UserSerializer(serializers.ModelSerializer):
+    password = serializers.CharField(
+        write_only=True,
+        required=True,
+        validators=[validate_password]
+    )
+    password_confirm = serializers.CharField(write_only=True, required=True)
+
+    class Meta:
+        model = User
+        fields = ['id', 'username', 'email', 'password', 'password_confirm']
+        read_only_fields = ['id']
+
+    # Field-level validation
+    def validate_email(self, value):
+        if User.objects.filter(email__iexact=value).exists():
+            raise serializers.ValidationError("Email already in use")
+        return value.lower()
+
+    # Object-level validation (cross-field)
+    def validate(self, attrs):
+        if attrs['password'] != attrs['password_confirm']:
+            raise serializers.ValidationError({
+                'password_confirm': "Passwords don't match"
+            })
+        attrs.pop('password_confirm')
+        return attrs
+
+    def create(self, validated_data):
+        password = validated_data.pop('password')
+        user = User.objects.create(**validated_data)
+        user.set_password(password)
+        user.save()
+        return user
+```
+
+**Nested serializers (read-only)**:
+
+```python
+class AuthorSerializer(serializers.ModelSerializer):
+    book_count = serializers.IntegerField(read_only=True)
+
+    class Meta:
+        model = Author
+        fields = ['id', 'name', 'bio', 'book_count']
+
+class BookSerializer(serializers.ModelSerializer):
+    author = AuthorSerializer(read_only=True)
+    author_id = serializers.PrimaryKeyRelatedField(
+        queryset=Author.objects.all(),
+        source='author',
+        write_only=True
+    )
+
+    class Meta:
+        model = Book
+        fields = ['id', 'title', 'author', 'author_id', 'published_date']
+```
+
+**Dynamic fields** (include/exclude fields via query params):
+
+```python
+class DynamicFieldsModelSerializer(serializers.ModelSerializer):
+    """
+    Usage: /api/books/?fields=id,title,author
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        request = self.context.get('request')
+        if request:
+            fields = request.query_params.get('fields')
+            if fields:
+                fields = fields.split(',')
+                allowed = set(fields)
+                existing = set(self.fields.keys())
+                for field_name in existing - allowed:
+                    self.fields.pop(field_name)
+
+class BookSerializer(DynamicFieldsModelSerializer):
+    class Meta:
+        model = Book
+        fields = '__all__'
+```
+
+## Django Caching
+
+### Cache Framework Setup
+
+```python
+# settings.py
+
+# Redis cache (production)
+CACHES = {
+    'default': {
+        'BACKEND': 'django_redis.cache.RedisCache',
+        'LOCATION': 'redis://127.0.0.1:6379/1',
+        'OPTIONS': {
+            'CLIENT_CLASS': 'django_redis.client.DefaultClient',
+            'CONNECTION_POOL_KWARGS': {'max_connections': 50},
+            'PARSER_CLASS': 'redis.connection.HiredisParser',
+        },
+        'KEY_PREFIX': 'myapp',
+        'TIMEOUT': 300,  # Default 5 minutes
+    }
+}
+
+# Memcached (alternative)
+CACHES = {
+    'default': {
+        'BACKEND': 'django.core.cache.backends.memcached.PyMemcacheCache',
+        'LOCATION': '127.0.0.1:11211',
+    }
+}
+
+# Local memory (development only)
+CACHES = {
+    'default': {
+        'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
+        'LOCATION': 'unique-snowflake',
+    }
+}
+```
+
+### Per-View Caching
+
+```python
+from django.views.decorators.cache import cache_page
+from django.utils.decorators import method_decorator
+
+# Function-based view
+@cache_page(60 * 15)  # Cache for 15 minutes
+def book_list(request):
+    books = Book.objects.all()
+    return render(request, 'books/list.html', {'books': books})
+
+# Class-based view
+class BookListView(ListView):
+    model = Book
+
+    @method_decorator(cache_page(60 * 15))
+    def dispatch(self, *args, **kwargs):
+        return super().dispatch(*args, **kwargs)
+
+# DRF ViewSet
+from rest_framework_extensions.cache.decorators import cache_response
+
+class BookViewSet(viewsets.ModelViewSet):
+    @cache_response(timeout=60*15, key_func='calculate_cache_key')
+    def list(self, request, *args, **kwargs):
+        return super().list(request, *args, **kwargs)
+
+    def calculate_cache_key(self, view_instance, view_method, request, args, kwargs):
+        # Custom cache key including user, filters
+        return f"books:list:{request.user.id}:{request.GET.urlencode()}"
+```
+
+### Low-Level Cache API
+
+```python
+from django.core.cache import cache
+
+# Set cache
+cache.set('my_key', 'my_value', timeout=300)
+
+# Get cache
+value = cache.get('my_key')
+if value is None:
+    value = expensive_computation()
+    cache.set('my_key', value, timeout=300)
+
+# Get or set (atomic)
+value = cache.get_or_set('my_key', lambda: expensive_computation(), timeout=300)
+
+# Delete cache
+cache.delete('my_key')
+
+# Clear all
+cache.clear()
+
+# Multiple keys
+cache.set_many({'key1': 'value1', 'key2': 'value2'}, timeout=300)
+values = cache.get_many(['key1', 'key2'])
+
+# Increment/decrement
+cache.set('counter', 0)
+cache.incr('counter')  # 1
+cache.incr('counter', delta=5)  # 6
+```
+
+### Cache Invalidation Patterns
+
+```python
+from django.db.models.signals import post_save, post_delete
+from django.dispatch import receiver
+
+@receiver([post_save, post_delete], sender=Book)
+def invalidate_book_cache(sender, instance, **kwargs):
+    """Invalidate cache when book changes"""
+    cache.delete(f'book:{instance.id}')
+    cache.delete('books:list')  # Invalidate list cache
+    cache.delete(f'author:{instance.author_id}:books')
+
+# Pattern: Cache with version tags
+def get_books():
+    version = cache.get('books:version', 0)
+    cache_key = f'books:list:v{version}'
+    books = cache.get(cache_key)
+
+    if books is None:
+        books = list(Book.objects.all())
+        cache.set(cache_key, books, timeout=3600)
+
+    return books
+
+def invalidate_books():
+    """Bump version to invalidate all book caches"""
+    version = cache.get('books:version', 0)
+    cache.set('books:version', version + 1)
+```
+
+## Django Migrations
+
+### Zero-Downtime Migration Pattern
+
+**Adding NOT NULL column to large table**:
+
+```python
+# Step 1: Add nullable field (migration 0002)
+class Migration(migrations.Migration):
+    operations = [
+        migrations.AddField(
+            model_name='user',
+            name='department',
+            field=models.CharField(max_length=100, null=True, blank=True),
+        ),
+    ]
+
+# Step 2: Populate data in batches (migration 0003)
+from django.db import migrations
+
+def populate_department(apps, schema_editor):
+    User = apps.get_model('myapp', 'User')
+
+    # Batch update for performance
+    batch_size = 10000
+    total = User.objects.filter(department__isnull=True).count()
+
+    for offset in range(0, total, batch_size):
+        users = User.objects.filter(department__isnull=True)[offset:offset+batch_size]
+        for user in users:
+            user.department = determine_department(user)  # Your logic
+        User.objects.bulk_update(users, ['department'], batch_size=batch_size)
+
+class Migration(migrations.Migration):
+    dependencies = [('myapp', '0002_add_department')],
+    operations = [
+        migrations.RunPython(populate_department, migrations.RunPython.noop),
+    ]
+
+# Step 3: Make NOT NULL (migration 0004)
+class Migration(migrations.Migration):
+    dependencies = [('myapp', '0003_populate_department')],
+    operations = [
+        migrations.AlterField(
+            model_name='user',
+            name='department',
+            field=models.CharField(max_length=100),  # NOT NULL
+        ),
+    ]
+```
+
+### Concurrent Index Creation (PostgreSQL)
+
+```python
+from django.contrib.postgres.operations import AddIndexConcurrently
+from django.db import migrations, models
+
+class Migration(migrations.Migration):
+    atomic = False  # Required for CONCURRENTLY operations
+
+    operations = [
+        AddIndexConcurrently(
+            model_name='book',
+            index=models.Index(fields=['published_date'], name='book_published_idx'),
+        ),
+    ]
+```
+
+### Squashing Migrations
+
+```bash
+# Squash migrations 0001 through 0020 into single migration
+python manage.py squashmigrations myapp 0001 0020
+
+# This creates migrations/0001_squashed_0020.py
+# After deploying squashed migration, delete originals:
+# migrations/0001.py through migrations/0020.py
+```
+
+## Django Testing
+
+### TestCase vs TransactionTestCase
+
+| Feature | TestCase | TransactionTestCase |
+|---------|----------|---------------------|
+| Speed | Fast (no DB reset between tests) | Slow (resets DB each test) |
+| Transactions | Wrapped in transaction, rolled back | No automatic transaction |
+| Use for | Most tests | Testing transaction behavior, signals |
+
+**Example - TestCase**:
+
+```python
+from django.test import TestCase
+from myapp.models import Book
+
+class BookModelTest(TestCase):
+    @classmethod
+    def setUpTestData(cls):
+        """Run once for entire test class (fast)"""
+        cls.author = Author.objects.create(name="Test Author")
+
+    def setUp(self):
+        """Run before each test method"""
+        self.book = Book.objects.create(
+            title="Test Book",
+            author=self.author
+        )
+
+    def test_book_str(self):
+        self.assertEqual(str(self.book), "Test Book")
+
+    def test_book_author_relationship(self):
+        self.assertEqual(self.book.author.name, "Test Author")
+```
+
+### API Testing with DRF
+
+```python
+from rest_framework.test import APITestCase, APIClient
+from rest_framework import status
+from django.contrib.auth.models import User
+
+class BookAPITest(APITestCase):
+    def setUp(self):
+        self.client = APIClient()
+        self.user = User.objects.create_user(
+            username='testuser',
+            password='testpass123'
+        )
+        self.book = Book.objects.create(title="Test Book")
+
+    def test_list_books_unauthenticated(self):
+        response = self.client.get('/api/books/')
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+    def test_create_book_authenticated(self):
+        self.client.force_authenticate(user=self.user)
+        data = {'title': 'New Book', 'author': self.author.id}
+        response = self.client.post('/api/books/', data)
+        self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+        self.assertEqual(Book.objects.count(), 2)
+
+    def test_update_book_unauthorized(self):
+        other_user = User.objects.create_user(username='other', password='pass')
+        self.client.force_authenticate(user=other_user)
+        data = {'title': 'Updated Title'}
+        response = self.client.patch(f'/api/books/{self.book.id}/', data)
+        self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
+```
+
+### Factory Pattern with factory_boy
+
+```python
+# tests/factories.py
+import factory
+from myapp.models import Author, Book
+
+class AuthorFactory(factory.django.DjangoModelFactory):
+    class Meta:
+        model = Author
+
+    name = factory.Faker('name')
+    bio = factory.Faker('text', max_nb_chars=200)
+
+class BookFactory(factory.django.DjangoModelFactory):
+    class Meta:
+        model = Book
+
+    title = factory.Faker('sentence', nb_words=4)
+    author = factory.SubFactory(AuthorFactory)
+    published_date = factory.Faker('date_this_decade')
+    isbn = factory.Sequence(lambda n: f'978-0-{n:09d}')
+
+# Usage in tests
+class BookTest(TestCase):
+    def test_book_creation(self):
+        book = BookFactory.create()  # Creates Author too
+        self.assertIsNotNone(book.id)
+
+    def test_multiple_books(self):
+        books = BookFactory.create_batch(10)  # Create 10 books
+        self.assertEqual(len(books), 10)
+
+    def test_author_with_books(self):
+        author = AuthorFactory.create()
+        BookFactory.create_batch(5, author=author)
+        self.assertEqual(author.books.count(), 5)
+```
+
+## Django Settings Organization
+
+### Multiple Environment Configs
+
+```
+myproject/
+└── settings/
+    ├── __init__.py
+    ├── base.py          # Common settings
+    ├── development.py   # Dev overrides
+    ├── production.py    # Prod overrides
+    └── test.py          # Test overrides
+```
+
+**settings/base.py**:
+
+```python
+import os
+from pathlib import Path
+
+BASE_DIR = Path(__file__).resolve().parent.parent.parent
+
+SECRET_KEY = os.environ.get('DJANGO_SECRET_KEY')
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    # ...
+    'rest_framework',
+    'myapp',
+]
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.postgresql',
+        'NAME': os.environ.get('DB_NAME'),
+        'USER': os.environ.get('DB_USER'),
+        'PASSWORD': os.environ.get('DB_PASSWORD'),
+        'HOST': os.environ.get('DB_HOST', 'localhost'),
+        'PORT': os.environ.get('DB_PORT', '5432'),
+    }
+}
+```
+
+**settings/development.py**:
+
+```python
+from .base import *
+
+DEBUG = True
+
+ALLOWED_HOSTS = ['localhost', '127.0.0.1']
+
+# Use console email backend
+EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend'
+
+# Local cache
+CACHES = {
+    'default': {
+        'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
+    }
+}
+
+# Debug toolbar
+INSTALLED_APPS += ['debug_toolbar']
+MIDDLEWARE += ['debug_toolbar.middleware.DebugToolbarMiddleware']
+INTERNAL_IPS = ['127.0.0.1']
+```
+
+**settings/production.py**:
+
+```python
+from .base import *
+
+DEBUG = False
+
+ALLOWED_HOSTS = [os.environ.get('ALLOWED_HOST')]
+
+# Security settings
+SECURE_SSL_REDIRECT = True
+SESSION_COOKIE_SECURE = True
+CSRF_COOKIE_SECURE = True
+SECURE_HSTS_SECONDS = 31536000
+SECURE_HSTS_INCLUDE_SUBDOMAINS = True
+SECURE_HSTS_PRELOAD = True
+
+# Redis cache
+CACHES = {
+    'default': {
+        'BACKEND': 'django_redis.cache.RedisCache',
+        'LOCATION': os.environ.get('REDIS_URL'),
+    }
+}
+
+# Real email
+EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
+EMAIL_HOST = os.environ.get('EMAIL_HOST')
+EMAIL_PORT = int(os.environ.get('EMAIL_PORT', 587))
+EMAIL_USE_TLS = True
+```
+
+**Usage**:
+
+```bash
+# Development
+export DJANGO_SETTINGS_MODULE=myproject.settings.development
+python manage.py runserver
+
+# Production
+export DJANGO_SETTINGS_MODULE=myproject.settings.production
+gunicorn myproject.wsgi:application
+```
+
+## Django Deployment
+
+### Gunicorn Configuration
+
+```python
+# gunicorn_config.py
+import multiprocessing
+
+bind = "0.0.0.0:8000"
+workers = multiprocessing.cpu_count() * 2 + 1
+worker_class = "sync"  # or "gevent" for async
+worker_connections = 1000
+max_requests = 1000  # Restart workers after N requests (prevent memory leaks)
+max_requests_jitter = 100
+timeout = 30
+keepalive = 2
+
+# Logging
+accesslog = "-"  # stdout
+errorlog = "-"   # stderr
+loglevel = "info"
+
+# Process naming
+proc_name = "myproject"
+
+# Server mechanics
+daemon = False
+pidfile = "/var/run/gunicorn.pid"
+```
+
+**Systemd service**:
+
+```ini
+# /etc/systemd/system/myproject.service
+[Unit]
+Description=MyProject Django Application
+After=network.target
+
+[Service]
+Type=notify
+User=www-data
+Group=www-data
+WorkingDirectory=/var/www/myproject
+Environment="DJANGO_SETTINGS_MODULE=myproject.settings.production"
+ExecStart=/var/www/myproject/venv/bin/gunicorn \
+    --config /var/www/myproject/gunicorn_config.py \
+    myproject.wsgi:application
+ExecReload=/bin/kill -s HUP $MAINPID
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+```
+
+### Static and Media Files
+
+```python
+# settings/production.py
+STATIC_URL = '/static/'
+STATIC_ROOT = BASE_DIR / 'staticfiles'
+
+MEDIA_URL = '/media/'
+MEDIA_ROOT = BASE_DIR / 'media'
+
+# Use WhiteNoise for static files
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'whitenoise.middleware.WhiteNoiseMiddleware',  # After SecurityMiddleware
+    # ...
+]
+
+STATICFILES_STORAGE = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
+```
+
+**Collect static files**:
+
+```bash
+python manage.py collectstatic --noinput
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Lazy loading in loops** | N+1 queries | Use `select_related`/`prefetch_related` |
+| **No database indexing** | Slow queries | Add `db_index=True` or Meta indexes |
+| **Signals for async work** | Blocks requests | Use Celery tasks instead |
+| **Generic serializers for everything** | Over-fetching data | Create optimized serializers per use case |
+| **No caching** | Repeated expensive queries | Cache querysets, views, template fragments |
+| **Migrations in production without testing** | Downtime, data loss | Test on production-sized datasets first |
+| **DEBUG=True in production** | Security risk, slow | Always DEBUG=False in production |
+| **No connection pooling** | Exhausts DB connections | Use pgBouncer or django-db-geventpool |
+
+## Cross-References
+
+**Related skills**:
+- **Database optimization** → `database-integration` (connection pooling, migrations)
+- **API testing** → `api-testing` (DRF testing patterns)
+- **Authentication** → `api-authentication` (DRF token auth, JWT)
+- **REST API design** → `rest-api-design` (API patterns)
+
+## Further Reading
+
+- **Django docs**: https://docs.djangoproject.com/
+- **DRF docs**: https://www.django-rest-framework.org/
+- **Two Scoops of Django**: Best practices book
+- **Classy Class-Based Views**: https://ccbv.co.uk/
+- **Classy Django REST Framework**: https://www.cdrf.co/
diff --git a/skills/using-web-backend/express-development.md b/skills/using-web-backend/express-development.md
new file mode 100644
index 0000000..ee1e673
--- /dev/null
+++ b/skills/using-web-backend/express-development.md
@@ -0,0 +1,872 @@
+
+# Express Development
+
+## Overview
+
+**Express.js development specialist covering middleware organization, error handling, validation, database integration, testing, and production deployment.**
+
+**Core principle**: Express's minimalist philosophy requires disciplined patterns - without structure, Express apps become tangled middleware chains with inconsistent error handling and poor testability.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Middleware organization**: Ordering, async error handling, custom middleware
+- **Error handling**: Centralized handlers, custom error classes, async/await errors
+- **Request validation**: Zod, express-validator, type-safe validation
+- **Database patterns**: Connection pooling, transactions, graceful shutdown
+- **Testing**: Supertest, mocking, middleware isolation
+- **Production deployment**: PM2, clustering, Docker, environment management
+- **Performance**: Compression, caching, clustering
+- **Security**: Helmet, rate limiting, CORS, input sanitization
+
+**DO NOT use for**:
+- General TypeScript patterns (use `axiom-python-engineering` equivalents)
+- API design principles (use `rest-api-design`)
+- Database-agnostic patterns (use `database-integration`)
+
+## Middleware Organization
+
+### Correct Middleware Order
+
+**Order matters** - middleware executes top to bottom:
+
+```typescript
+import express from 'express';
+import helmet from 'helmet';
+import cors from 'cors';
+import compression from 'compression';
+
+const app = express();
+
+// 1. Security (FIRST - before any parsing)
+app.use(helmet({
+  contentSecurityPolicy: {
+    directives: {
+      defaultSrc: ["'self'"],
+      styleSrc: ["'self'", "'unsafe-inline'"],
+    },
+  },
+}));
+
+// 2. CORS (before routes)
+app.use(cors({
+  origin: process.env.ALLOWED_ORIGINS?.split(','),
+  credentials: true,
+  maxAge: 86400, // 24 hours
+}));
+
+// 3. Parsing
+app.use(express.json({ limit: '10mb' }));
+app.use(express.urlencoded({ extended: true, limit: '10mb' }));
+
+// 4. Compression
+app.use(compression());
+
+// 5. Logging
+app.use(morgan('combined', { stream: logger.stream }));
+
+// 6. Authentication (before routes that need it)
+app.use('/api', authenticationMiddleware);
+
+// 7. Routes
+app.use('/api/users', userRoutes);
+app.use('/api/posts', postRoutes);
+
+// 8. 404 handler (AFTER all routes)
+app.use((req, res) => {
+  res.status(404).json({
+    status: 'error',
+    message: 'Route not found',
+    path: req.path,
+  });
+});
+
+// 9. Error handler (LAST)
+app.use(errorHandler);
+```
+
+### Async Error Wrapper
+
+**Problem**: Express doesn't catch async errors automatically
+
+```typescript
+// src/middleware/asyncHandler.ts
+import { Request, Response, NextFunction } from 'express';
+
+export const asyncHandler = <T>(
+  fn: (req: Request, res: Response, next: NextFunction) => Promise<T>
+) => {
+  return (req: Request, res: Response, next: NextFunction) => {
+    Promise.resolve(fn(req, res, next)).catch(next);
+  };
+};
+
+// Usage
+router.get('/:id', asyncHandler(async (req, res) => {
+  const user = await userService.findById(req.params.id);
+  if (!user) throw new NotFoundError('User not found');
+  res.json(user);
+}));
+```
+
+**Alternative**: Use express-async-errors (automatic)
+
+```typescript
+// At top of app.ts (BEFORE routes)
+import 'express-async-errors';
+
+// Now all async route handlers auto-catch errors
+router.get('/:id', async (req, res) => {
+  const user = await userService.findById(req.params.id);
+  res.json(user);
+}); // Errors automatically forwarded to error handler
+```
+
+## Error Handling
+
+### Custom Error Classes
+
+```typescript
+// src/errors/AppError.ts
+export class AppError extends Error {
+  constructor(
+    public readonly message: string,
+    public readonly statusCode: number,
+    public readonly isOperational: boolean = true
+  ) {
+    super(message);
+    Error.captureStackTrace(this, this.constructor);
+  }
+}
+
+// src/errors/HttpErrors.ts
+export class BadRequestError extends AppError {
+  constructor(message: string) {
+    super(message, 400);
+  }
+}
+
+export class UnauthorizedError extends AppError {
+  constructor(message = 'Unauthorized') {
+    super(message, 401);
+  }
+}
+
+export class ForbiddenError extends AppError {
+  constructor(message = 'Forbidden') {
+    super(message, 403);
+  }
+}
+
+export class NotFoundError extends AppError {
+  constructor(message: string) {
+    super(message, 404);
+  }
+}
+
+export class ConflictError extends AppError {
+  constructor(message: string) {
+    super(message, 409);
+  }
+}
+
+export class TooManyRequestsError extends AppError {
+  constructor(message = 'Too many requests', public retryAfter?: number) {
+    super(message, 429);
+  }
+}
+```
+
+### Centralized Error Handler
+
+```typescript
+// src/middleware/errorHandler.ts
+import { Request, Response, NextFunction } from 'express';
+import { AppError } from '../errors/AppError';
+import { logger } from '../config/logger';
+
+export const errorHandler = (
+  err: Error,
+  req: Request,
+  res: Response,
+  next: NextFunction
+) => {
+  // Log error with context
+  logger.error('Error occurred', {
+    error: {
+      message: err.message,
+      stack: err.stack,
+      name: err.name,
+    },
+    request: {
+      method: req.method,
+      url: req.url,
+      ip: req.ip,
+      userAgent: req.get('user-agent'),
+    },
+  });
+
+  // Operational errors (expected)
+  if (err instanceof AppError && err.isOperational) {
+    const response: any = {
+      status: 'error',
+      message: err.message,
+    };
+
+    // Add retry-after for rate limiting
+    if (err instanceof TooManyRequestsError && err.retryAfter) {
+      res.setHeader('Retry-After', err.retryAfter);
+      response.retryAfter = err.retryAfter;
+    }
+
+    return res.status(err.statusCode).json(response);
+  }
+
+  // Validation errors (Zod, express-validator)
+  if (err.name === 'ZodError') {
+    return res.status(400).json({
+      status: 'error',
+      message: 'Validation failed',
+      errors: (err as any).errors,
+    });
+  }
+
+  // Database constraint violations
+  if ((err as any).code === '23505') { // PostgreSQL unique violation
+    return res.status(409).json({
+      status: 'error',
+      message: 'Resource already exists',
+    });
+  }
+
+  if ((err as any).code === '23503') { // Foreign key violation
+    return res.status(400).json({
+      status: 'error',
+      message: 'Invalid reference',
+    });
+  }
+
+  // Unexpected errors (don't leak details in production)
+  res.status(500).json({
+    status: 'error',
+    message: process.env.NODE_ENV === 'production'
+      ? 'Internal server error'
+      : err.message,
+    ...(process.env.NODE_ENV !== 'production' && { stack: err.stack }),
+  });
+};
+```
+
+### Global Error Handlers
+
+```typescript
+// src/server.ts
+process.on('unhandledRejection', (reason: Error) => {
+  logger.error('Unhandled Rejection', { reason });
+  // Graceful shutdown
+  server.close(() => process.exit(1));
+});
+
+process.on('uncaughtException', (error: Error) => {
+  logger.error('Uncaught Exception', { error });
+  process.exit(1);
+});
+```
+
+## Request Validation
+
+### Zod Integration (Type-Safe)
+
+```typescript
+// src/schemas/userSchema.ts
+import { z } from 'zod';
+
+export const createUserSchema = z.object({
+  body: z.object({
+    email: z.string().email('Invalid email'),
+    password: z.string()
+      .min(8, 'Password must be at least 8 characters')
+      .regex(/[A-Z]/, 'Password must contain uppercase')
+      .regex(/[0-9]/, 'Password must contain number'),
+    name: z.string().min(2).max(100),
+    age: z.number().int().positive().max(150).optional(),
+  }),
+});
+
+export const getUserSchema = z.object({
+  params: z.object({
+    id: z.string().regex(/^\d+$/, 'ID must be numeric'),
+  }),
+});
+
+export const getUsersSchema = z.object({
+  query: z.object({
+    page: z.string().regex(/^\d+$/).transform(Number).default('1'),
+    limit: z.string().regex(/^\d+$/).transform(Number).default('10'),
+    search: z.string().optional(),
+    sortBy: z.enum(['name', 'created_at', 'updated_at']).optional(),
+    order: z.enum(['asc', 'desc']).optional(),
+  }),
+});
+
+// Type inference
+export type CreateUserInput = z.infer<typeof createUserSchema>['body'];
+export type GetUserParams = z.infer<typeof getUserSchema>['params'];
+export type GetUsersQuery = z.infer<typeof getUsersSchema>['query'];
+```
+
+**Validation middleware**:
+
+```typescript
+// src/middleware/validate.ts
+import { Request, Response, NextFunction } from 'express';
+import { AnyZodObject, ZodError } from 'zod';
+
+export const validate = (schema: AnyZodObject) => {
+  return async (req: Request, res: Response, next: NextFunction) => {
+    try {
+      const validated = await schema.parseAsync({
+        body: req.body,
+        query: req.query,
+        params: req.params,
+      });
+
+      // Replace with validated data (transforms applied)
+      req.body = validated.body || req.body;
+      req.query = validated.query || req.query;
+      req.params = validated.params || req.params;
+
+      next();
+    } catch (error) {
+      if (error instanceof ZodError) {
+        return res.status(400).json({
+          status: 'error',
+          message: 'Validation failed',
+          errors: error.errors.map(err => ({
+            field: err.path.join('.'),
+            message: err.message,
+            code: err.code,
+          })),
+        });
+      }
+      next(error);
+    }
+  };
+};
+```
+
+**Usage in routes**:
+
+```typescript
+import { Router } from 'express';
+import { validate } from '../middleware/validate';
+import * as schemas from '../schemas/userSchema';
+
+const router = Router();
+
+router.post('/', validate(schemas.createUserSchema), async (req, res) => {
+  // req.body is now typed as CreateUserInput
+  const user = await userService.create(req.body);
+  res.status(201).json(user);
+});
+
+router.get('/:id', validate(schemas.getUserSchema), async (req, res) => {
+  // req.params.id is validated
+  const user = await userService.findById(req.params.id);
+  if (!user) throw new NotFoundError('User not found');
+  res.json(user);
+});
+```
+
+## Database Connection Pooling
+
+### PostgreSQL with pg
+
+```typescript
+// src/config/database.ts
+import { Pool, PoolConfig } from 'pg';
+import { logger } from './logger';
+
+const config: PoolConfig = {
+  host: process.env.DB_HOST || 'localhost',
+  port: Number(process.env.DB_PORT) || 5432,
+  database: process.env.DB_NAME,
+  user: process.env.DB_USER,
+  password: process.env.DB_PASSWORD,
+  max: Number(process.env.DB_POOL_MAX) || 20,
+  idleTimeoutMillis: 30000,
+  connectionTimeoutMillis: 2000,
+  statement_timeout: 30000, // 30s query timeout
+};
+
+export const pool = new Pool(config);
+
+// Event handlers
+pool.on('connect', (client) => {
+  logger.debug('Database client connected');
+});
+
+pool.on('acquire', (client) => {
+  logger.debug('Client acquired from pool');
+});
+
+pool.on('error', (err, client) => {
+  logger.error('Unexpected pool error', { error: err });
+  process.exit(-1);
+});
+
+// Health check
+export const testConnection = async () => {
+  try {
+    const client = await pool.connect();
+    const result = await client.query('SELECT NOW()');
+    client.release();
+    logger.info('Database connection successful', {
+      serverTime: result.rows[0].now,
+    });
+  } catch (err) {
+    logger.error('Database connection failed', { error: err });
+    throw err;
+  }
+};
+
+// Graceful shutdown
+export const closePool = async () => {
+  logger.info('Closing database pool');
+  await pool.end();
+  logger.info('Database pool closed');
+};
+```
+
+### Transaction Helper
+
+```typescript
+// src/utils/transaction.ts
+import { Pool, PoolClient } from 'pg';
+
+export async function withTransaction<T>(
+  pool: Pool,
+  callback: (client: PoolClient) => Promise<T>
+): Promise<T> {
+  const client = await pool.connect();
+
+  try {
+    await client.query('BEGIN');
+    const result = await callback(client);
+    await client.query('COMMIT');
+    return result;
+  } catch (error) {
+    await client.query('ROLLBACK');
+    throw error;
+  } finally {
+    client.release();
+  }
+}
+
+// Usage
+import { pool } from '../config/database';
+
+async function createUserWithProfile(userData, profileData) {
+  return withTransaction(pool, async (client) => {
+    const userResult = await client.query(
+      'INSERT INTO users (email, name) VALUES ($1, $2) RETURNING id',
+      [userData.email, userData.name]
+    );
+    const userId = userResult.rows[0].id;
+
+    await client.query(
+      'INSERT INTO profiles (user_id, bio) VALUES ($1, $2)',
+      [userId, profileData.bio]
+    );
+
+    return userId;
+  });
+}
+```
+
+## Testing
+
+### Integration Tests with Supertest
+
+```typescript
+// tests/integration/userRoutes.test.ts
+import request from 'supertest';
+import app from '../../src/app';
+import { pool } from '../../src/config/database';
+
+describe('User Routes', () => {
+  beforeAll(async () => {
+    await pool.query('CREATE TABLE IF NOT EXISTS users (...)');
+  });
+
+  afterEach(async () => {
+    await pool.query('TRUNCATE TABLE users CASCADE');
+  });
+
+  afterAll(async () => {
+    await pool.end();
+  });
+
+  describe('POST /api/users', () => {
+    it('should create user with valid data', async () => {
+      const response = await request(app)
+        .post('/api/users')
+        .send({
+          email: 'test@example.com',
+          name: 'Test User',
+          password: 'Password123',
+        })
+        .expect(201);
+
+      expect(response.body).toHaveProperty('id');
+      expect(response.body.email).toBe('test@example.com');
+      expect(response.body).not.toHaveProperty('password');
+    });
+
+    it('should return 400 for invalid email', async () => {
+      const response = await request(app)
+        .post('/api/users')
+        .send({
+          email: 'invalid',
+          name: 'Test',
+          password: 'Password123',
+        })
+        .expect(400);
+
+      expect(response.body.status).toBe('error');
+      expect(response.body.errors).toContainEqual(
+        expect.objectContaining({
+          field: 'body.email',
+          message: expect.stringContaining('email'),
+        })
+      );
+    });
+  });
+
+  describe('GET /api/users/:id', () => {
+    it('should return user by ID', async () => {
+      const createRes = await request(app)
+        .post('/api/users')
+        .send({
+          email: 'test@example.com',
+          name: 'Test User',
+          password: 'Password123',
+        });
+
+      const response = await request(app)
+        .get(`/api/users/${createRes.body.id}`)
+        .expect(200);
+
+      expect(response.body.id).toBe(createRes.body.id);
+    });
+
+    it('should return 404 for non-existent user', async () => {
+      await request(app)
+        .get('/api/users/99999')
+        .expect(404);
+    });
+  });
+});
+```
+
+### Unit Tests with Mocks
+
+```typescript
+// tests/unit/userService.test.ts
+import { userService } from '../../src/services/userService';
+import { pool } from '../../src/config/database';
+
+jest.mock('../../src/config/database');
+
+const mockPool = pool as jest.Mocked<typeof pool>;
+
+describe('UserService', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+
+  describe('findById', () => {
+    it('should return user when found', async () => {
+      mockPool.query.mockResolvedValue({
+        rows: [{ id: 1, email: 'test@example.com', name: 'Test' }],
+        command: 'SELECT',
+        rowCount: 1,
+        oid: 0,
+        fields: [],
+      });
+
+      const result = await userService.findById('1');
+
+      expect(result).toEqual(
+        expect.objectContaining({ id: 1, email: 'test@example.com' })
+      );
+    });
+
+    it('should return null when not found', async () => {
+      mockPool.query.mockResolvedValue({
+        rows: [],
+        command: 'SELECT',
+        rowCount: 0,
+        oid: 0,
+        fields: [],
+      });
+
+      const result = await userService.findById('999');
+      expect(result).toBeNull();
+    });
+  });
+});
+```
+
+## Production Deployment
+
+### PM2 Configuration
+
+```javascript
+// ecosystem.config.js
+module.exports = {
+  apps: [{
+    name: 'api',
+    script: './dist/server.js',
+    instances: 'max', // Use all CPU cores
+    exec_mode: 'cluster',
+    env: {
+      NODE_ENV: 'production',
+      PORT: 3000,
+    },
+    error_file: './logs/err.log',
+    out_file: './logs/out.log',
+    log_date_format: 'YYYY-MM-DD HH:mm:ss Z',
+    merge_logs: true,
+    max_memory_restart: '500M',
+    wait_ready: true,
+    listen_timeout: 10000,
+    kill_timeout: 5000,
+  }],
+};
+```
+
+**Graceful shutdown with PM2**:
+
+```typescript
+// src/server.ts
+const server = app.listen(PORT, () => {
+  logger.info(`Server started on port ${PORT}`);
+
+  // Signal PM2 ready
+  if (process.send) {
+    process.send('ready');
+  }
+});
+
+// Graceful shutdown
+process.on('SIGINT', async () => {
+  logger.info('SIGINT received, closing server');
+
+  server.close(async () => {
+    await closePool();
+    logger.info('Server closed');
+    process.exit(0);
+  });
+
+  // Force shutdown after 10s
+  setTimeout(() => {
+    logger.error('Forcing shutdown');
+    process.exit(1);
+  }, 10000);
+});
+```
+
+### Dockerfile
+
+```dockerfile
+# Multi-stage build
+FROM node:18-alpine AS builder
+
+WORKDIR /app
+
+# Copy package files
+COPY package*.json ./
+COPY tsconfig.json ./
+
+# Install dependencies
+RUN npm ci
+
+# Copy source
+COPY src ./src
+
+# Build TypeScript
+RUN npm run build
+
+# Production image
+FROM node:18-alpine
+
+WORKDIR /app
+
+# Install production dependencies only
+COPY package*.json ./
+RUN npm ci --omit=dev && npm cache clean --force
+
+# Copy built files
+COPY --from=builder /app/dist ./dist
+
+# Create non-root user
+RUN addgroup -g 1001 -S nodejs && \
+    adduser -S nodejs -u 1001
+
+USER nodejs
+
+EXPOSE 3000
+
+CMD ["node", "dist/server.js"]
+```
+
+### Health Check Endpoint
+
+```typescript
+// src/routes/healthRoutes.ts
+import { Router } from 'express';
+import { pool } from '../config/database';
+
+const router = Router();
+
+router.get('/health', async (req, res) => {
+  const health = {
+    uptime: process.uptime(),
+    message: 'OK',
+    timestamp: Date.now(),
+  };
+
+  try {
+    await pool.query('SELECT 1');
+    health.database = 'connected';
+  } catch (error) {
+    health.database = 'disconnected';
+    return res.status(503).json(health);
+  }
+
+  res.json(health);
+});
+
+router.get('/health/ready', async (req, res) => {
+  // Readiness check
+  try {
+    await pool.query('SELECT 1');
+    res.status(200).json({ status: 'ready' });
+  } catch (error) {
+    res.status(503).json({ status: 'not ready' });
+  }
+});
+
+router.get('/health/live', (req, res) => {
+  // Liveness check (simpler)
+  res.status(200).json({ status: 'alive' });
+});
+
+export default router;
+```
+
+## Performance Optimization
+
+### Response Caching
+
+```typescript
+import Redis from 'ioredis';
+
+const redis = new Redis({
+  host: process.env.REDIS_HOST,
+  port: Number(process.env.REDIS_PORT),
+});
+
+export const cacheMiddleware = (duration: number) => {
+  return async (req: Request, res: Response, next: NextFunction) => {
+    if (req.method !== 'GET') return next();
+
+    const key = `cache:${req.originalUrl}`;
+
+    try {
+      const cached = await redis.get(key);
+      if (cached) {
+        return res.json(JSON.parse(cached));
+      }
+
+      // Capture response
+      const originalJson = res.json.bind(res);
+      res.json = (body: any) => {
+        redis.setex(key, duration, JSON.stringify(body));
+        return originalJson(body);
+      };
+
+      next();
+    } catch (error) {
+      next();
+    }
+  };
+};
+
+// Usage
+router.get('/users', cacheMiddleware(300), async (req, res) => {
+  const users = await userService.findAll();
+  res.json(users);
+});
+```
+
+## Security
+
+### Rate Limiting
+
+```typescript
+import rateLimit from 'express-rate-limit';
+import RedisStore from 'rate-limit-redis';
+import Redis from 'ioredis';
+
+const redis = new Redis();
+
+export const apiLimiter = rateLimit({
+  store: new RedisStore({ client: redis }),
+  windowMs: 15 * 60 * 1000, // 15 minutes
+  max: 100, // 100 requests per window
+  message: 'Too many requests, please try again later',
+  standardHeaders: true,
+  legacyHeaders: false,
+});
+
+export const authLimiter = rateLimit({
+  store: new RedisStore({ client: redis }),
+  windowMs: 15 * 60 * 1000,
+  max: 5, // 5 attempts
+  skipSuccessfulRequests: true,
+});
+
+// Usage
+app.use('/api/', apiLimiter);
+app.use('/api/auth/login', authLimiter);
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **No async error handling** | Crashes server | Use asyncHandler or express-async-errors |
+| **Inconsistent error responses** | Poor DX | Centralized error handler |
+| **New DB connection per request** | Exhausts connections | Use connection pool |
+| **No graceful shutdown** | Data loss, broken requests | Handle SIGTERM/SIGINT |
+| **Logging to console in production** | Lost logs, no structure | Use Winston/Pino with transports |
+| **No request validation** | Security vulnerabilities | Zod/express-validator |
+| **Synchronous operations in routes** | Blocks event loop | Use async/await |
+| **No health checks** | Can't monitor service | /health endpoints |
+
+## Cross-References
+
+**Related skills**:
+- **Database patterns** → `database-integration` (pooling, transactions)
+- **API testing** → `api-testing` (supertest patterns)
+- **REST design** → `rest-api-design` (endpoint patterns)
+- **Authentication** → `api-authentication` (JWT, sessions)
+
+## Further Reading
+
+- **Express docs**: https://expressjs.com/
+- **Express.js Best Practices**: https://expressjs.com/en/advanced/best-practice-performance.html
+- **Node.js Production Best Practices**: https://github.com/goldbergyoni/nodebestpractices
diff --git a/skills/using-web-backend/fastapi-development.md b/skills/using-web-backend/fastapi-development.md
new file mode 100644
index 0000000..ccade1a
--- /dev/null
+++ b/skills/using-web-backend/fastapi-development.md
@@ -0,0 +1,500 @@
+
+# FastAPI Development
+
+## Overview
+
+**FastAPI specialist skill providing production-ready patterns, anti-patterns to avoid, and testing strategies.**
+
+**Core principle**: FastAPI's type hints, dependency injection, and async-first design enable fast, maintainable APIs - but require understanding async/sync boundaries, proper dependency management, and production hardening patterns.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Dependency injection**: Database connections, auth, shared resources, testing overrides
+- **Async/sync boundaries**: Mixing blocking I/O with async endpoints, performance issues
+- **Background tasks**: Choosing between BackgroundTasks, Celery, or other task queues
+- **File uploads**: Streaming large files, memory management
+- **Testing**: Dependency overrides, async test clients, fixture patterns
+- **Production deployment**: ASGI servers, lifespan management, connection pooling
+- **Security**: SQL injection, CORS, authentication patterns
+- **Performance**: Connection pooling, query optimization, caching
+
+## Quick Reference - Common Patterns
+
+| Pattern | Use Case | Code Snippet |
+|---------|----------|--------------|
+| **DB dependency with pooling** | Per-request database access | `def get_db(): db = SessionLocal(); try: yield db; finally: db.close()` |
+| **Dependency override for testing** | Test with mock/test DB | `app.dependency_overrides[get_db] = override_get_db` |
+| **Lifespan events** | Startup/shutdown resources | `@asynccontextmanager async def lifespan(app): ... yield ...` |
+| **Streaming file upload** | Large files without memory issues | `async with aiofiles.open(...) as f: while chunk := await file.read(CHUNK_SIZE): await f.write(chunk)` |
+| **Background tasks (short)** | < 30 sec tasks | `background_tasks.add_task(func, args)` |
+| **Task queue (long)** | > 1 min tasks, retries needed | Use Celery/Arq with Redis |
+| **Parameterized queries** | Prevent SQL injection | `cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))` |
+
+## Core Patterns
+
+### 1. Dependency Injection Architecture
+
+**Pattern: Connection pooling with yield dependencies**
+
+```python
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, Session
+from fastapi import Depends, FastAPI
+
+# One-time pool creation at module level
+engine = create_engine(
+    "postgresql://user:pass@localhost/db",
+    pool_size=20,          # Max connections
+    max_overflow=0,        # No overflow beyond pool_size
+    pool_pre_ping=True,    # Verify connection health before use
+    pool_recycle=3600      # Recycle connections every hour
+)
+SessionLocal = sessionmaker(bind=engine, expire_on_commit=False)
+
+# Dependency pattern with automatic cleanup
+def get_db() -> Session:
+    """
+    Yields database session from pool.
+    Ensures cleanup even if endpoint raises exception.
+    """
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()
+
+# Usage in endpoints
+@app.get("/items/{item_id}")
+def get_item(item_id: int, db: Session = Depends(get_db)):
+    return db.query(Item).filter(Item.id == item_id).first()
+```
+
+**Why this pattern**:
+- Pool created once (expensive operation)
+- Per-request connections from pool (cheap)
+- `yield` ensures cleanup on success AND exceptions
+- `pool_pre_ping` prevents stale connection errors
+- `pool_recycle` prevents long-lived connection issues
+
+**Testing pattern**:
+
+```python
+# conftest.py
+import pytest
+from fastapi.testclient import TestClient
+
+@pytest.fixture
+def test_db():
+    """Test database fixture"""
+    db = TestSessionLocal()
+    try:
+        yield db
+    finally:
+        db.rollback()
+        db.close()
+
+@pytest.fixture
+def client(test_db):
+    """Test client with overridden dependencies"""
+    def override_get_db():
+        yield test_db
+
+    app.dependency_overrides[get_db] = override_get_db
+    with TestClient(app) as c:
+        yield c
+    app.dependency_overrides.clear()
+
+# test_items.py
+def test_get_item(client, test_db):
+    # Setup test data
+    test_db.add(Item(id=1, name="Test"))
+    test_db.commit()
+
+    # Test endpoint
+    response = client.get("/items/1")
+    assert response.status_code == 200
+```
+
+### 2. Async/Sync Boundary Management
+
+**❌ Anti-pattern: Blocking calls in async endpoints**
+
+```python
+# BAD - Blocks event loop
+@app.get("/users/{user_id}")
+async def get_user(user_id: int):
+    conn = psycopg2.connect(...)  # Blocking!
+    cursor = conn.cursor()
+    cursor.execute(...)           # Blocking!
+    return cursor.fetchone()
+```
+
+**✅ Pattern: Use async libraries or run_in_threadpool**
+
+```python
+# GOOD Option 1: Async database library
+from databases import Database
+
+database = Database("postgresql://...")
+
+@app.get("/users/{user_id}")
+async def get_user(user_id: int):
+    query = "SELECT * FROM users WHERE id = :user_id"
+    return await database.fetch_one(query=query, values={"user_id": user_id})
+
+# GOOD Option 2: Run blocking code in thread pool
+from fastapi.concurrency import run_in_threadpool
+
+def blocking_db_call(user_id: int):
+    conn = psycopg2.connect(...)
+    cursor = conn.cursor()
+    cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
+    return cursor.fetchone()
+
+@app.get("/users/{user_id}")
+async def get_user(user_id: int):
+    return await run_in_threadpool(blocking_db_call, user_id)
+```
+
+**Decision table**:
+
+| Scenario | Use |
+|----------|-----|
+| PostgreSQL with async needed | `asyncpg` or `databases` library |
+| PostgreSQL, sync is fine | `psycopg2` with `def` (not `async def`) endpoints |
+| MySQL with async | `aiomysql` |
+| SQLite | `aiosqlite` (async) or sync with `def` endpoints |
+| External API calls | `httpx.AsyncClient` |
+| CPU-intensive work | `run_in_threadpool` or Celery |
+
+### 3. Lifespan Management (Modern Pattern)
+
+**✅ Use lifespan context manager** (replaces deprecated `@app.on_event`)
+
+```python
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+
+# Global resources
+resources = {}
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    resources["db_pool"] = await create_async_pool(
+        "postgresql://...",
+        min_size=10,
+        max_size=20
+    )
+    resources["redis"] = await aioredis.create_redis_pool("redis://...")
+    resources["ml_model"] = load_ml_model()  # Can be sync or async
+
+    yield  # Application runs
+
+    # Shutdown
+    await resources["db_pool"].close()
+    resources["redis"].close()
+    await resources["redis"].wait_closed()
+    resources.clear()
+
+app = FastAPI(lifespan=lifespan)
+
+# Access resources in endpoints
+@app.get("/predict")
+async def predict(data: dict):
+    model = resources["ml_model"]
+    return {"prediction": model.predict(data)}
+```
+
+### 4. File Upload Patterns
+
+**For 100MB+ files: Stream to disk, never load into memory**
+
+```python
+from fastapi import UploadFile, File, HTTPException
+import aiofiles
+import os
+
+UPLOAD_DIR = "/var/uploads"
+CHUNK_SIZE = 1024 * 1024  # 1MB chunks
+MAX_FILE_SIZE = 500 * 1024 * 1024  # 500MB
+
+@app.post("/upload")
+async def upload_large_file(file: UploadFile = File(...)):
+    # Validate content type
+    if not file.content_type.startswith("video/"):
+        raise HTTPException(400, "Only video files accepted")
+
+    filepath = os.path.join(UPLOAD_DIR, f"{uuid.uuid4()}_{file.filename}")
+    size = 0
+
+    try:
+        async with aiofiles.open(filepath, 'wb') as f:
+            while chunk := await file.read(CHUNK_SIZE):
+                size += len(chunk)
+                if size > MAX_FILE_SIZE:
+                    raise HTTPException(413, "File too large")
+                await f.write(chunk)
+    except Exception as e:
+        # Cleanup on failure
+        if os.path.exists(filepath):
+            os.remove(filepath)
+        raise
+
+    return {"filename": file.filename, "size": size}
+```
+
+**For very large files (1GB+): Direct S3 upload with presigned URLs**
+
+```python
+import boto3
+
+@app.post("/upload/presigned-url")
+async def get_presigned_upload_url(filename: str):
+    s3_client = boto3.client('s3')
+    presigned_post = s3_client.generate_presigned_post(
+        Bucket='my-bucket',
+        Key=f'uploads/{uuid.uuid4()}_{filename}',
+        ExpiresIn=3600
+    )
+    return presigned_post  # Client uploads directly to S3
+```
+
+### 5. Background Task Decision Matrix
+
+| Task Duration | Needs Retries? | Needs Monitoring? | Solution |
+|---------------|----------------|-------------------|----------|
+| < 30 seconds | No | No | `BackgroundTasks` |
+| < 30 seconds | Yes | Maybe | Celery/Arq |
+| > 1 minute | Don't care | Don't care | Celery/Arq |
+| Any | Yes | Yes | Celery/Arq with monitoring |
+
+**BackgroundTasks pattern** (simple, in-process):
+
+```python
+from fastapi import BackgroundTasks
+
+async def send_email(email: str):
+    await asyncio.sleep(2)  # Async work
+    print(f"Email sent to {email}")
+
+@app.post("/register")
+async def register(email: str, background_tasks: BackgroundTasks):
+    # ... save user ...
+    background_tasks.add_task(send_email, email)
+    return {"status": "registered"}  # Returns immediately
+```
+
+**Celery pattern** (distributed, persistent):
+
+```python
+# celery_app.py
+from celery import Celery
+
+celery_app = Celery('tasks', broker='redis://localhost:6379/0')
+
+@celery_app.task(bind=True, max_retries=3)
+def process_video(self, filepath: str):
+    try:
+        # Long-running work
+        extract_frames(filepath)
+    except Exception as exc:
+        raise self.retry(exc=exc, countdown=60)
+
+# main.py
+from celery_app import process_video
+
+@app.post("/upload")
+async def upload(file: UploadFile):
+    filepath = await save_file(file)
+    task = process_video.delay(filepath)
+    return {"task_id": task.id}
+
+@app.get("/status/{task_id}")
+async def get_status(task_id: str):
+    from celery_app import celery_app
+    result = celery_app.AsyncResult(task_id)
+    return {"status": result.state, "result": result.result}
+```
+
+## Security Patterns
+
+### SQL Injection Prevention
+
+**❌ NEVER use f-strings or string concatenation**
+
+```python
+# DANGEROUS
+cursor.execute(f"SELECT * FROM users WHERE id = {user_id}")
+cursor.execute("SELECT * FROM users WHERE email = '" + email + "'")
+```
+
+**✅ ALWAYS use parameterized queries**
+
+```python
+# SQLAlchemy ORM (safe)
+db.query(User).filter(User.id == user_id).first()
+
+# Raw SQL (safe with parameters)
+cursor.execute("SELECT * FROM users WHERE id = %s", (user_id,))
+cursor.execute("SELECT * FROM users WHERE email = :email", {"email": email})
+```
+
+### CORS Configuration
+
+```python
+from fastapi.middleware.cors import CORSMiddleware
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["https://yourdomain.com"],  # Specific origins, not "*" in production
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE"],
+    allow_headers=["*"],
+)
+```
+
+### Authentication Pattern
+
+```python
+from fastapi import Depends, HTTPException, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+
+security = HTTPBearer()
+
+async def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
+    token = credentials.credentials
+    try:
+        payload = jwt.decode(token, SECRET_KEY, algorithms=["HS256"])
+        user_id = payload.get("sub")
+        if not user_id:
+            raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid token")
+        return await get_user_by_id(user_id)
+    except jwt.JWTError:
+        raise HTTPException(status.HTTP_401_UNAUTHORIZED, "Invalid token")
+
+@app.get("/protected")
+async def protected_route(current_user = Depends(get_current_user)):
+    return {"user": current_user}
+```
+
+## Middleware Ordering
+
+**Critical: Middleware wraps in order added, executes in reverse for responses**
+
+```python
+# Correct order:
+app.add_middleware(CORSMiddleware, ...)       # 1. FIRST - handles preflight
+app.add_middleware(RequestLoggingMiddleware)  # 2. Logs entire request
+app.add_middleware(ErrorHandlingMiddleware)   # 3. Catches errors from auth/routes
+app.add_middleware(AuthenticationMiddleware)  # 4. LAST - closest to routes
+```
+
+## Common Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| Global database connection | Not thread-safe, connection leaks | Use connection pool with dependency injection |
+| `async def` with blocking I/O | Blocks event loop, kills performance | Use async libraries or `run_in_threadpool` |
+| `time.sleep()` in async code | Blocks entire event loop | Use `asyncio.sleep()` |
+| Loading large files into memory | Memory exhaustion, OOM crashes | Stream with `aiofiles` and chunks |
+| BackgroundTasks for long work | Lost on restart, no retries | Use Celery/Arq |
+| String formatting in SQL | SQL injection vulnerability | Parameterized queries only |
+| `allow_origins=["*"]` with credentials | Security vulnerability | Specify exact origins |
+| Not closing database connections | Connection pool exhaustion | Use `yield` in dependencies |
+
+## Testing Best Practices
+
+```python
+import pytest
+from fastapi.testclient import TestClient
+from httpx import AsyncClient
+
+# Sync tests (simpler, faster for most cases)
+def test_read_item(client):
+    response = client.get("/items/1")
+    assert response.status_code == 200
+
+# Async tests (needed for testing async endpoints with real async operations)
+@pytest.mark.asyncio
+async def test_async_endpoint():
+    async with AsyncClient(app=app, base_url="http://test") as ac:
+        response = await ac.get("/items/1")
+    assert response.status_code == 200
+
+# Dependency override pattern
+def test_with_mock_db(client):
+    def override_get_db():
+        yield mock_db
+
+    app.dependency_overrides[get_db] = override_get_db
+    response = client.get("/items/1")
+    app.dependency_overrides.clear()
+    assert response.status_code == 200
+```
+
+## Production Deployment
+
+**ASGI server configuration** (Uvicorn + Gunicorn):
+
+```bash
+# gunicorn with uvicorn workers (production)
+gunicorn main:app \
+  --workers 4 \
+  --worker-class uvicorn.workers.UvicornWorker \
+  --bind 0.0.0.0:8000 \
+  --timeout 120 \
+  --graceful-timeout 30 \
+  --keep-alive 5
+```
+
+**Environment-based configuration**:
+
+```python
+from pydantic_settings import BaseSettings
+
+class Settings(BaseSettings):
+    database_url: str
+    redis_url: str
+    secret_key: str
+    debug: bool = False
+
+    class Config:
+        env_file = ".env"
+
+settings = Settings()
+
+# Use in app
+engine = create_engine(settings.database_url)
+```
+
+## Cross-References
+
+**Related skills**:
+- **Security** → `ordis-security-architect` (threat modeling, OWASP top 10)
+- **Python patterns** → `axiom-python-engineering` (async patterns, type hints)
+- **API testing** → `api-testing` (contract testing, integration tests)
+- **API documentation** → `api-documentation` or `muna-technical-writer`
+- **Database optimization** → `database-integration` (query optimization, migrations)
+- **Authentication deep dive** → `api-authentication` (OAuth2, JWT patterns)
+- **GraphQL alternative** → `graphql-api-design`
+
+## Performance Tips
+
+1. **Use connection pooling** - Create pool once, not per-request
+2. **Enable response caching** - Use `fastapi-cache2` for expensive queries
+3. **Limit response size** - Paginate large result sets
+4. **Use async for I/O** - Database, HTTP calls, file operations
+5. **Profile slow endpoints** - Use `starlette-prometheus` for monitoring
+6. **Enable gzip compression** - `GZipMiddleware` for large JSON responses
+
+## When NOT to Use FastAPI
+
+- **Simple CRUD with admin panel** → Django (has built-in admin)
+- **Heavy template rendering** → Django or Flask
+- **Mature ecosystem needed** → Django (more third-party packages)
+- **Team unfamiliar with async** → Flask or Django (simpler mental model)
+
+FastAPI excels at: Modern APIs, microservices, ML model serving, real-time features, high performance requirements.
diff --git a/skills/using-web-backend/graphql-api-design.md b/skills/using-web-backend/graphql-api-design.md
new file mode 100644
index 0000000..97f4c95
--- /dev/null
+++ b/skills/using-web-backend/graphql-api-design.md
@@ -0,0 +1,954 @@
+
+# GraphQL API Design
+
+## Overview
+
+**GraphQL API specialist covering schema design, query optimization, real-time subscriptions, federation, and production patterns.**
+
+**Core principle**: GraphQL enables clients to request exactly the data they need in a single query - but requires careful schema design, batching strategies, and security measures to prevent performance and security issues.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **N+1 query problems**: Too many database queries for nested resolvers
+- **Schema design**: Types, interfaces, unions, input types, directives
+- **Pagination**: Connections, cursors, offset patterns
+- **Performance**: Query complexity, caching, batching, persisted queries
+- **Real-time**: Subscriptions, WebSocket patterns, live queries
+- **Federation**: Splitting schema across multiple services
+- **Security**: Query depth limiting, cost analysis, allowlisting
+- **Testing**: Schema validation, resolver testing, integration tests
+- **Migrations**: Schema evolution, deprecation, versioning
+
+**Do NOT use for**:
+- REST API design → `rest-api-design`
+- Framework-specific implementation → `fastapi-development`, `express-development`
+- Microservices architecture → `microservices-architecture` (use with Federation)
+
+## GraphQL vs REST Decision Matrix
+
+| Factor | Choose GraphQL | Choose REST |
+|--------|----------------|-------------|
+| **Client needs** | Mobile apps, varying data needs | Uniform data requirements |
+| **Over/under-fetching** | Problem | Not a problem |
+| **Real-time features** | Subscriptions built-in | Need SSE/WebSockets separately |
+| **Schema-first** | Strong typing required | Flexible, schema optional |
+| **Caching** | Complex (field-level) | Simple (HTTP caching) |
+| **File uploads** | Non-standard (multipart) | Native (multipart/form-data) |
+| **Team expertise** | GraphQL experience | REST experience |
+| **API consumers** | Known clients | Public/third-party |
+| **Rate limiting** | Complex (field-level) | Simple (endpoint-level) |
+
+**Hybrid approach**: GraphQL for internal/mobile, REST for public APIs
+
+## Quick Reference - Core Patterns
+
+| Pattern | Use Case | Key Concept |
+|---------|----------|-------------|
+| **DataLoader** | N+1 queries | Batch and cache within request |
+| **Connection** | Pagination | Cursor-based with edges/nodes |
+| **Union** | Heterogeneous results | Search, activity feeds |
+| **Interface** | Shared fields | Polymorphic types with guarantees |
+| **Directive** | Field behavior | @auth, @deprecated, custom logic |
+| **Input types** | Mutations | Type-safe input validation |
+| **Federation** | Microservices | Distributed schema composition |
+| **Subscription** | Real-time | WebSocket-based live updates |
+
+## N+1 Query Optimization
+
+### The Problem
+
+```javascript
+// Schema
+type Post {
+  id: ID!
+  title: String!
+  author: User!  // Requires fetching user
+}
+
+type Query {
+  posts: [Post!]!
+}
+
+// Naive resolver (N+1 problem)
+const resolvers = {
+  Query: {
+    posts: () => db.posts.findAll()  // 1 query
+  },
+  Post: {
+    author: (post) => db.users.findOne(post.authorId)  // N queries!
+  }
+};
+
+// Result: 100 posts = 101 database queries
+```
+
+### DataLoader Solution
+
+```javascript
+const DataLoader = require('dataloader');
+
+// Batch loading function
+const batchUsers = async (userIds) => {
+  const users = await db.users.findMany({
+    where: { id: { in: userIds } }
+  });
+
+  // CRITICAL: Return in same order as requested IDs
+  const userMap = new Map(users.map(u => [u.id, u]));
+  return userIds.map(id => userMap.get(id) || null);
+};
+
+// Create loader per-request (avoid stale cache)
+const createLoaders = () => ({
+  user: new DataLoader(batchUsers),
+  post: new DataLoader(batchPosts),
+  // ... other loaders
+});
+
+// Add to context
+const server = new ApolloServer({
+  typeDefs,
+  resolvers,
+  context: () => ({
+    loaders: createLoaders(),
+    db,
+    user: getCurrentUser()
+  })
+});
+
+// Use in resolver
+const resolvers = {
+  Post: {
+    author: (post, args, { loaders }) => {
+      return loaders.user.load(post.authorId);  // Batched!
+    }
+  }
+};
+```
+
+**Result**: 100 posts = 2 queries (1 for posts, 1 batched for unique authors)
+
+### Advanced DataLoader Patterns
+
+**Composite Keys**:
+
+```javascript
+// For multi-field lookups
+const batchUsersByEmail = async (keys) => {
+  // keys = [{domain: 'example.com', email: 'user@example.com'}, ...]
+  const users = await db.users.findMany({
+    where: {
+      OR: keys.map(k => ({ email: k.email, domain: k.domain }))
+    }
+  });
+
+  const userMap = new Map(
+    users.map(u => [`${u.domain}:${u.email}`, u])
+  );
+
+  return keys.map(k => userMap.get(`${k.domain}:${k.email}`));
+};
+
+const userByEmailLoader = new DataLoader(batchUsersByEmail, {
+  cacheKeyFn: (key) => `${key.domain}:${key.email}`
+});
+```
+
+**Priming Cache**:
+
+```javascript
+// After fetching posts, prime user loader
+const posts = await db.posts.findAll();
+posts.forEach(post => {
+  if (post.authorData) {
+    loaders.user.prime(post.authorId, post.authorData);
+  }
+});
+return posts;
+```
+
+**Error Handling in Batch**:
+
+```javascript
+const batchUsers = async (userIds) => {
+  const users = await db.users.findMany({
+    where: { id: { in: userIds } }
+  });
+
+  const userMap = new Map(users.map(u => [u.id, u]));
+
+  return userIds.map(id => {
+    const user = userMap.get(id);
+    if (!user) {
+      return new Error(`User ${id} not found`);  // Per-item error
+    }
+    return user;
+  });
+};
+```
+
+## Schema Design Patterns
+
+### Interface vs Union
+
+**Interface** (shared fields enforced):
+
+```graphql
+interface Node {
+  id: ID!
+}
+
+interface Timestamped {
+  createdAt: DateTime!
+  updatedAt: DateTime!
+}
+
+type User implements Node & Timestamped {
+  id: ID!
+  createdAt: DateTime!
+  updatedAt: DateTime!
+  email: String!
+  name: String!
+}
+
+type Post implements Node & Timestamped {
+  id: ID!
+  createdAt: DateTime!
+  updatedAt: DateTime!
+  title: String!
+  content: String!
+}
+
+type Query {
+  node(id: ID!): Node  # Can return any Node implementer
+  nodes(ids: [ID!]!): [Node!]!
+}
+```
+
+**Query**:
+```graphql
+{
+  node(id: "user_123") {
+    id
+    ... on User {
+      email
+      name
+    }
+    ... on Post {
+      title
+    }
+  }
+}
+```
+
+**Union** (no shared fields required):
+
+```graphql
+union SearchResult = User | Post | Comment
+
+type Query {
+  search(query: String!): [SearchResult!]!
+}
+```
+
+**When to use each**:
+
+| Use Case | Pattern | Why |
+|----------|---------|-----|
+| Global ID lookup | Interface (Node) | Guarantees `id` field |
+| Polymorphic lists with shared fields | Interface | Can query shared fields without fragments |
+| Heterogeneous results | Union | No shared field requirements |
+| Activity feeds | Union | Different event types |
+| Search results | Union | Mixed content types |
+
+### Input Types and Validation
+
+```graphql
+input CreatePostInput {
+  title: String!
+  content: String!
+  tags: [String!]
+  publishedAt: DateTime
+}
+
+input UpdatePostInput {
+  title: String
+  content: String
+  tags: [String!]
+}
+
+type Mutation {
+  createPost(input: CreatePostInput!): Post!
+  updatePost(id: ID!, input: UpdatePostInput!): Post!
+}
+```
+
+**Benefits**:
+- Reusable across multiple mutations
+- Clear separation of create vs update requirements
+- Type-safe in generated code
+- Can add descriptions per field
+
+### Custom Directives
+
+```graphql
+directive @auth(requires: Role = USER) on FIELD_DEFINITION
+directive @rateLimit(limit: Int!, window: Int!) on FIELD_DEFINITION
+directive @deprecated(reason: String) on FIELD_DEFINITION | ENUM_VALUE
+
+enum Role {
+  USER
+  ADMIN
+  SUPER_ADMIN
+}
+
+type Query {
+  publicData: String
+  userData: User @auth(requires: USER)
+  adminData: String @auth(requires: ADMIN)
+  expensiveQuery: Result @rateLimit(limit: 10, window: 60)
+}
+
+type User {
+  id: ID!
+  email: String! @auth(requires: USER)  # Only authenticated users
+  internalId: String @deprecated(reason: "Use `id` instead")
+}
+```
+
+## Pagination Patterns
+
+### Relay Connection Specification
+
+**Standard connection pattern**:
+
+```graphql
+type PostConnection {
+  edges: [PostEdge!]!
+  pageInfo: PageInfo!
+  totalCount: Int  # Optional
+}
+
+type PostEdge {
+  node: Post!
+  cursor: String!
+}
+
+type PageInfo {
+  hasNextPage: Boolean!
+  hasPreviousPage: Boolean!
+  startCursor: String
+  endCursor: String
+}
+
+type Query {
+  posts(
+    first: Int
+    after: String
+    last: Int
+    before: String
+  ): PostConnection!
+}
+```
+
+**Implementation**:
+
+```javascript
+const resolvers = {
+  Query: {
+    posts: async (parent, { first, after, last, before }) => {
+      const limit = first || last || 10;
+      const cursor = after || before;
+
+      // Decode cursor
+      const offset = cursor ? decodeCursor(cursor) : 0;
+
+      // Fetch one extra to determine hasNextPage
+      const posts = await db.posts.findMany({
+        skip: offset,
+        take: limit + 1,
+        orderBy: { createdAt: 'desc' }
+      });
+
+      const hasNextPage = posts.length > limit;
+      const edges = posts.slice(0, limit).map((post, index) => ({
+        node: post,
+        cursor: encodeCursor(offset + index)
+      }));
+
+      return {
+        edges,
+        pageInfo: {
+          hasNextPage,
+          hasPreviousPage: offset > 0,
+          startCursor: edges[0]?.cursor,
+          endCursor: edges[edges.length - 1]?.cursor
+        }
+      };
+    }
+  }
+};
+
+// Opaque cursor encoding
+const encodeCursor = (offset) =>
+  Buffer.from(`arrayconnection:${offset}`).toString('base64');
+const decodeCursor = (cursor) =>
+  parseInt(Buffer.from(cursor, 'base64').toString().split(':')[1]);
+```
+
+**Alternative: Offset pagination** (simpler but less robust):
+
+```graphql
+type PostPage {
+  items: [Post!]!
+  total: Int!
+  page: Int!
+  pageSize: Int!
+}
+
+type Query {
+  posts(page: Int = 1, pageSize: Int = 20): PostPage!
+}
+```
+
+## Performance Optimization
+
+### Query Complexity Analysis
+
+**Prevent expensive queries**:
+
+```javascript
+const depthLimit = require('graphql-depth-limit');
+const { createComplexityLimitRule } = require('graphql-validation-complexity');
+
+const server = new ApolloServer({
+  typeDefs,
+  resolvers,
+  validationRules: [
+    depthLimit(10),  // Max 10 levels deep
+    createComplexityLimitRule(1000, {
+      scalarCost: 1,
+      objectCost: 2,
+      listFactor: 10
+    })
+  ]
+});
+```
+
+**Custom complexity**:
+
+```graphql
+type Query {
+  posts(first: Int!): [Post!]! @cost(complexity: 10, multipliers: ["first"])
+  expensiveAnalytics: AnalyticsReport! @cost(complexity: 1000)
+}
+```
+
+### Automatic Persisted Queries (APQ)
+
+**Client sends hash instead of full query**:
+
+```javascript
+// Client
+const query = gql`
+  query GetUser($id: ID!) {
+    user(id: $id) { name email }
+  }
+`;
+
+const queryHash = sha256(query);
+
+// First request: Send hash only
+fetch('/graphql', {
+  body: JSON.stringify({
+    extensions: {
+      persistedQuery: {
+        version: 1,
+        sha256Hash: queryHash
+      }
+    },
+    variables: { id: '123' }
+  })
+});
+
+// If server doesn't have it (PersistedQueryNotFound)
+// Second request: Send full query + hash
+fetch('/graphql', {
+  body: JSON.stringify({
+    query,
+    extensions: {
+      persistedQuery: {
+        version: 1,
+        sha256Hash: queryHash
+      }
+    },
+    variables: { id: '123' }
+  })
+});
+
+// Future requests: Just send hash
+```
+
+**Benefits**:
+- Reduced bandwidth (hash << full query)
+- CDN caching of GET requests
+- Query allowlisting (if configured)
+
+### Field-Level Caching
+
+```javascript
+const resolvers = {
+  Query: {
+    user: async (parent, { id }, { cache }) => {
+      const cacheKey = `user:${id}`;
+      const cached = await cache.get(cacheKey);
+      if (cached) return JSON.parse(cached);
+
+      const user = await db.users.findOne(id);
+      await cache.set(cacheKey, JSON.stringify(user), { ttl: 300 });
+      return user;
+    }
+  }
+};
+```
+
+## Subscriptions (Real-Time)
+
+### Basic Subscription
+
+```graphql
+type Subscription {
+  postAdded: Post!
+  commentAdded(postId: ID!): Comment!
+}
+
+type Mutation {
+  createPost(input: CreatePostInput!): Post!
+}
+```
+
+**Implementation (Apollo Server)**:
+
+```javascript
+const { PubSub } = require('graphql-subscriptions');
+const pubsub = new PubSub();
+
+const resolvers = {
+  Mutation: {
+    createPost: async (parent, { input }) => {
+      const post = await db.posts.create(input);
+      pubsub.publish('POST_ADDED', { postAdded: post });
+      return post;
+    }
+  },
+  Subscription: {
+    postAdded: {
+      subscribe: () => pubsub.asyncIterator(['POST_ADDED'])
+    },
+    commentAdded: {
+      subscribe: (parent, { postId }) =>
+        pubsub.asyncIterator([`COMMENT_ADDED_${postId}`])
+    }
+  }
+};
+
+// Client
+subscription {
+  postAdded {
+    id
+    title
+    author { name }
+  }
+}
+```
+
+### Scaling Subscriptions
+
+**Problem**: In-memory PubSub doesn't work across servers
+
+**Solution**: Redis PubSub
+
+```javascript
+const { RedisPubSub } = require('graphql-redis-subscriptions');
+const Redis = require('ioredis');
+
+const pubsub = new RedisPubSub({
+  publisher: new Redis(),
+  subscriber: new Redis()
+});
+
+// Now works across multiple server instances
+```
+
+### Subscription Authorization
+
+```javascript
+const resolvers = {
+  Subscription: {
+    secretDataUpdated: {
+      subscribe: withFilter(
+        () => pubsub.asyncIterator(['SECRET_DATA']),
+        (payload, variables, context) => {
+          // Only admin users can subscribe
+          return context.user?.role === 'ADMIN';
+        }
+      )
+    }
+  }
+};
+```
+
+## Federation (Distributed Schema)
+
+**Split schema across multiple services**:
+
+### User Service
+
+```graphql
+# user-service schema
+type User @key(fields: "id") {
+  id: ID!
+  email: String!
+  name: String!
+}
+
+type Query {
+  user(id: ID!): User
+}
+```
+
+### Post Service
+
+```graphql
+# post-service schema
+extend type User @key(fields: "id") {
+  id: ID! @external
+  posts: [Post!]!
+}
+
+type Post {
+  id: ID!
+  title: String!
+  content: String!
+  authorId: ID!
+  author: User!
+}
+```
+
+### Gateway
+
+Composes schemas and routes requests:
+
+```javascript
+const { ApolloGateway } = require('@apollo/gateway');
+
+const gateway = new ApolloGateway({
+  serviceList: [
+    { name: 'users', url: 'http://user-service:4001/graphql' },
+    { name: 'posts', url: 'http://post-service:4002/graphql' }
+  ]
+});
+
+const server = new ApolloServer({
+  gateway,
+  subscriptions: false  // Not yet supported in federation
+});
+```
+
+**Reference Resolver** (fetch extended fields):
+
+```javascript
+// post-service resolvers
+const resolvers = {
+  User: {
+    __resolveReference: async (user) => {
+      // Receive { __typename: 'User', id: '123' }
+      // Don't need to fetch user, just return it for field resolution
+      return user;
+    },
+    posts: async (user) => {
+      return db.posts.findMany({ where: { authorId: user.id } });
+    }
+  }
+};
+```
+
+## Security Patterns
+
+### Query Depth Limiting
+
+```javascript
+const depthLimit = require('graphql-depth-limit');
+
+const server = new ApolloServer({
+  validationRules: [depthLimit(7)]  // Max 7 levels deep
+});
+
+// Prevents: user { posts { author { posts { author { ... } } } }
+```
+
+### Query Allowlisting (Production)
+
+```javascript
+const allowedQueries = new Map([
+  ['GetUser', 'query GetUser($id: ID!) { user(id: $id) { name } }'],
+  ['ListPosts', 'query ListPosts { posts { title } }']
+]);
+
+const server = new ApolloServer({
+  validationRules: [
+    (context) => ({
+      Document(node) {
+        const queryName = node.definitions[0]?.name?.value;
+        if (!allowedQueries.has(queryName)) {
+          context.reportError(
+            new GraphQLError('Query not allowed')
+          );
+        }
+      }
+    })
+  ]
+});
+```
+
+### Rate Limiting (Field-Level)
+
+```javascript
+const { shield, rule, and } = require('graphql-shield');
+
+const isRateLimited = rule({ cache: 'contextual' })(
+  async (parent, args, ctx, info) => {
+    const key = `rate:${ctx.user.id}:${info.fieldName}`;
+    const count = await redis.incr(key);
+    if (count === 1) {
+      await redis.expire(key, 60);  // 1 minute window
+    }
+    return count <= 10;  // 10 requests per minute
+  }
+);
+
+const permissions = shield({
+  Query: {
+    expensiveQuery: isRateLimited
+  }
+});
+```
+
+## Schema Evolution
+
+### Deprecation
+
+```graphql
+type User {
+  id: ID!
+  username: String @deprecated(reason: "Use `name` instead")
+  name: String!
+}
+```
+
+**Tooling shows warnings to clients**
+
+### Breaking Changes (Avoid)
+
+❌ **Breaking**:
+- Removing fields
+- Changing field types
+- Making nullable → non-nullable
+- Removing enum values
+- Changing arguments
+
+✅ **Non-breaking**:
+- Adding fields
+- Adding types
+- Deprecating fields
+- Making non-nullable → nullable
+- Adding arguments with defaults
+
+### Versioning Strategy
+
+**Don't version schema** - evolve incrementally:
+
+1. Add new field
+2. Deprecate old field
+3. Monitor usage
+4. Remove old field in next major version (if removing)
+
+## Testing Strategies
+
+### Schema Validation
+
+```javascript
+const { buildSchema, validateSchema } = require('graphql');
+
+test('schema is valid', () => {
+  const schema = buildSchema(typeDefs);
+  const errors = validateSchema(schema);
+  expect(errors).toHaveLength(0);
+});
+```
+
+### Resolver Testing
+
+```javascript
+const resolvers = require('./resolvers');
+
+test('user resolver fetches user', async () => {
+  const mockDb = {
+    users: { findOne: jest.fn().mockResolvedValue({ id: '1', name: 'Alice' }) }
+  };
+
+  const result = await resolvers.Query.user(
+    null,
+    { id: '1' },
+    { db: mockDb, loaders: { user: mockDataLoader() } }
+  );
+
+  expect(result).toEqual({ id: '1', name: 'Alice' });
+  expect(mockDb.users.findOne).toHaveBeenCalledWith('1');
+});
+```
+
+### Integration Testing
+
+```javascript
+const { ApolloServer } = require('apollo-server');
+const { createTestClient } = require('apollo-server-testing');
+
+const server = new ApolloServer({ typeDefs, resolvers });
+const { query } = createTestClient(server);
+
+test('GetUser query', async () => {
+  const GET_USER = gql`
+    query GetUser($id: ID!) {
+      user(id: $id) {
+        name
+        email
+      }
+    }
+  `;
+
+  const res = await query({ query: GET_USER, variables: { id: '1' } });
+
+  expect(res.errors).toBeUndefined();
+  expect(res.data.user).toMatchObject({
+    name: 'Alice',
+    email: 'alice@example.com'
+  });
+});
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **No DataLoader** | N+1 queries kill performance | Use DataLoader for all entity fetching |
+| **Offset pagination** | Breaks with real-time data | Use cursor-based connections |
+| **No query complexity** | DoS via deeply nested queries | Set depth/complexity limits |
+| **Shared DataLoader instances** | Stale cache across requests | Create new loaders per request |
+| **No error masking** | Leaks internal errors to clients | Mask in production, log internally |
+| **mutations returning Boolean** | Can't extend response | Return object type |
+| **Nullable IDs** | IDs should never be null | Use `ID!` not `ID` |
+| **Over-fetching in resolvers** | Selecting * wastes bandwidth | Select only requested fields |
+
+## Common Mistakes
+
+### 1. DataLoader Return Order
+
+```javascript
+// ❌ WRONG - Returns in database order
+const batchUsers = async (ids) => {
+  return await db.users.findMany({ where: { id: { in: ids } } });
+};
+
+// ✅ CORRECT - Returns in requested order
+const batchUsers = async (ids) => {
+  const users = await db.users.findMany({ where: { id: { in: ids } } });
+  const userMap = new Map(users.map(u => [u.id, u]));
+  return ids.map(id => userMap.get(id));
+};
+```
+
+### 2. Mutations Returning Primitives
+
+```graphql
+# ❌ BAD - Can't extend
+type Mutation {
+  deletePost(id: ID!): Boolean!
+}
+
+# ✅ GOOD - Extensible
+type DeletePostPayload {
+  success: Boolean!
+  deletedPostId: ID
+  message: String
+}
+
+type Mutation {
+  deletePost(id: ID!): DeletePostPayload!
+}
+```
+
+### 3. No Context in Subscriptions
+
+```javascript
+// ❌ Missing auth context
+const server = new ApolloServer({
+  subscriptions: {
+    onConnect: () => {
+      return {};  // No user context!
+    }
+  }
+});
+
+// ✅ Include auth
+const server = new ApolloServer({
+  subscriptions: {
+    onConnect: (connectionParams) => {
+      const token = connectionParams.authToken;
+      const user = verifyToken(token);
+      return { user };
+    }
+  }
+});
+```
+
+## Tooling Ecosystem
+
+**Schema Management**:
+- **Apollo Studio**: Schema registry, operation tracking, metrics
+- **GraphQL Inspector**: Schema diffing, breaking change detection
+- **Graphql-eslint**: Linting for schema and queries
+
+**Code Generation**:
+- **GraphQL Code Generator**: TypeScript types from schema
+- **Apollo Codegen**: Client types for queries
+
+**Development**:
+- **GraphiQL**: In-browser IDE
+- **Apollo Sandbox**: Modern GraphQL explorer
+- **Altair**: Desktop GraphQL client
+
+**Testing**:
+- **EasyGraphQL Test**: Schema mocking
+- **GraphQL Tools**: Schema stitching, mocking
+
+## Cross-References
+
+**Related skills**:
+- **REST comparison** → `rest-api-design` (when to use each)
+- **FastAPI implementation** → `fastapi-development` (Strawberry, Graphene)
+- **Express implementation** → `express-development` (Apollo Server, GraphQL Yoga)
+- **Microservices** → `microservices-architecture` (use with Federation)
+- **Security** → `ordis-security-architect` (OWASP API Security)
+- **Testing** → `api-testing` (integration testing strategies)
+- **Authentication** → `api-authentication` (JWT, OAuth2 with GraphQL)
+
+## Further Reading
+
+- **GraphQL Spec**: https://spec.graphql.org/
+- **Apollo Docs**: Federation, caching, tooling
+- **Relay Spec**: Connection specification
+- **DataLoader GitHub**: facebook/dataloader
+- **Production Ready GraphQL**: Book by Marc-André Giroux
diff --git a/skills/using-web-backend/message-queues.md b/skills/using-web-backend/message-queues.md
new file mode 100644
index 0000000..75b44db
--- /dev/null
+++ b/skills/using-web-backend/message-queues.md
@@ -0,0 +1,993 @@
+
+# Message Queues
+
+## Overview
+
+**Message queue specialist covering technology selection, reliability patterns, ordering guarantees, schema evolution, and production operations.**
+
+**Core principle**: Message queues decouple producers from consumers, enabling async processing, load leveling, and resilience - but require careful design for reliability, ordering, monitoring, and operational excellence.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Technology selection**: RabbitMQ vs Kafka vs SQS vs SNS
+- **Reliability**: Guaranteed delivery, acknowledgments, retries, DLQ
+- **Ordering**: Partition keys, FIFO queues, ordered processing
+- **Scaling**: Consumer groups, parallelism, backpressure
+- **Schema evolution**: Message versioning, Avro, Protobuf
+- **Monitoring**: Lag tracking, alerting, distributed tracing
+- **Advanced patterns**: Outbox, saga, CQRS, event sourcing
+- **Security**: Encryption, IAM, Kafka authentication
+- **Testing**: Local testing, chaos engineering, load testing
+
+**Do NOT use for**:
+- Request/response APIs → Use REST or GraphQL instead
+- Strong consistency required → Use database transactions
+- Real-time streaming analytics → See if streaming-specific skill exists
+
+## Technology Selection Matrix
+
+| Factor | RabbitMQ | Apache Kafka | AWS SQS | AWS SNS |
+|--------|----------|--------------|---------|---------|
+| **Use Case** | Task queues, routing | Event streaming, logs | Simple queues | Pub/sub fanout |
+| **Throughput** | 10k-50k msg/s | 100k+ msg/s | 3k msg/s (std), 300 msg/s (FIFO) | 100k+ msg/s |
+| **Ordering** | Queue-level | Partition-level (strong) | FIFO queues only | None |
+| **Persistence** | Durable queues | Log-based (default) | Managed | Ephemeral (SNS → SQS for durability) |
+| **Retention** | Until consumed | Days to weeks | 4 days (std), 14 days max | None (delivery only) |
+| **Routing** | Exchanges (topic, fanout, headers) | Topics only | None | Topic-based filtering |
+| **Message size** | Up to 128 MB | Up to 1 MB (configurable) | 256 KB | 256 KB |
+| **Ops complexity** | Medium (clustering) | High (partitions, replication) | Low (managed) | Low (managed) |
+| **Cost** | EC2 self-hosted | Self-hosted or MSK | Pay-per-request | Pay-per-request |
+
+### Decision Tree
+
+```
+Are you on AWS and need simple async processing?
+  → Yes → **AWS SQS** (start simple)
+  → No → Continue...
+
+Do you need event replay or stream processing?
+  → Yes → **Kafka** (log-based, replayable)
+  → No → Continue...
+
+Do you need complex routing (topic exchange, headers)?
+  → Yes → **RabbitMQ** (rich exchange types)
+  → No → Continue...
+
+Do you need pub/sub fanout to multiple subscribers?
+  → Yes → **SNS** (or Kafka topics with multiple consumer groups)
+  → No → **SQS** or **RabbitMQ** for task queues
+```
+
+### Migration Path
+
+| Current State | Next Step | Why |
+|---------------|-----------|-----|
+| No queue | Start with SQS (if AWS) or RabbitMQ | Lowest operational complexity |
+| SQS → 1k+ msg/s | Consider Kafka or sharded SQS | SQS throttles at 3k msg/s |
+| RabbitMQ → Event sourcing needed | Migrate to Kafka | Kafka's log retention enables replay |
+| Kafka → Simple task queue | Consider RabbitMQ or SQS | Kafka is overkill for simple queues |
+
+## Reliability Patterns
+
+### Acknowledgment Modes
+
+| Mode | When Ack Sent | Reliability | Performance | Use Case |
+|------|---------------|-------------|-------------|----------|
+| **Auto-ack** | On receive | Low (lost on crash) | High | Logs, analytics, best-effort |
+| **Manual ack (after processing)** | After success | High (at-least-once) | Medium | Standard production pattern |
+| **Transactional** | In transaction | Highest (exactly-once) | Low | Financial, critical data |
+
+### At-Least-Once Delivery Pattern
+
+**SQS**:
+
+```python
+# WRONG: Delete before processing
+message = sqs.receive_message(QueueUrl=queue_url)['Messages'][0]
+sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=message['ReceiptHandle'])
+process(message['Body'])  # ❌ If this fails, message is lost
+
+# CORRECT: Process, then delete
+message = sqs.receive_message(
+    QueueUrl=queue_url,
+    VisibilityTimeout=300  # 5 minutes to process
+)['Messages'][0]
+
+try:
+    process(json.loads(message['Body']))
+    sqs.delete_message(QueueUrl=queue_url, ReceiptHandle=message['ReceiptHandle'])
+except Exception as e:
+    # Message becomes visible again after timeout
+    logger.error(f"Processing failed, will retry: {e}")
+```
+
+**Kafka**:
+
+```python
+# WRONG: Auto-commit before processing
+consumer = KafkaConsumer(
+    'orders',
+    enable_auto_commit=True,  # ❌ Commits offset before processing
+    auto_commit_interval_ms=5000
+)
+
+for msg in consumer:
+    process(msg.value)  # Crash here = message lost
+
+# CORRECT: Manual commit after processing
+consumer = KafkaConsumer(
+    'orders',
+    enable_auto_commit=False
+)
+
+for msg in consumer:
+    try:
+        process(msg.value)
+        consumer.commit()  # ✓ Commit only after success
+    except Exception as e:
+        logger.error(f"Processing failed, will retry: {e}")
+        # Don't commit - message will be reprocessed
+```
+
+**RabbitMQ**:
+
+```python
+import pika
+
+connection = pika.BlockingConnection(pika.ConnectionParameters('localhost'))
+channel = connection.channel()
+
+def callback(ch, method, properties, body):
+    try:
+        process(json.loads(body))
+        ch.basic_ack(delivery_tag=method.delivery_tag)  # ✓ Ack after success
+    except Exception as e:
+        logger.error(f"Processing failed: {e}")
+        ch.basic_nack(delivery_tag=method.delivery_tag, requeue=True)  # Requeue
+
+channel.basic_consume(
+    queue='orders',
+    on_message_callback=callback,
+    auto_ack=False  # ✓ Manual acknowledgment
+)
+
+channel.start_consuming()
+```
+
+### Idempotency (Critical for At-Least-Once)
+
+Since at-least-once delivery guarantees duplicates, **all processing must be idempotent**:
+
+```python
+# Pattern 1: Database unique constraint
+def process_order(order_id, data):
+    db.execute(
+        "INSERT INTO orders (id, user_id, amount, created_at) "
+        "VALUES (%s, %s, %s, NOW()) "
+        "ON CONFLICT (id) DO NOTHING",  # Idempotent
+        (order_id, data['user_id'], data['amount'])
+    )
+
+# Pattern 2: Distributed lock (Redis)
+def process_order_with_lock(order_id, data):
+    lock_key = f"lock:order:{order_id}"
+
+    # Try to acquire lock (60s TTL)
+    if not redis.set(lock_key, "1", nx=True, ex=60):
+        logger.info(f"Order {order_id} already being processed")
+        return  # Duplicate, skip
+
+    try:
+        # Process order
+        create_order(data)
+        charge_payment(data['amount'])
+    finally:
+        redis.delete(lock_key)
+
+# Pattern 3: Idempotency key table
+def process_with_idempotency_key(message_id, data):
+    with db.transaction():
+        # Check if already processed
+        result = db.execute(
+            "SELECT 1 FROM processed_messages WHERE message_id = %s FOR UPDATE",
+            (message_id,)
+        )
+
+        if result:
+            return  # Already processed
+
+        # Process + record atomically
+        process_order(data)
+        db.execute(
+            "INSERT INTO processed_messages (message_id, processed_at) VALUES (%s, NOW())",
+            (message_id,)
+        )
+```
+
+## Ordering Guarantees
+
+### Kafka: Partition-Level Ordering
+
+**Kafka guarantees ordering within a partition**, not across partitions.
+
+```python
+from kafka import KafkaProducer
+
+producer = KafkaProducer(
+    bootstrap_servers=['kafka:9092'],
+    key_serializer=str.encode,
+    value_serializer=lambda v: json.dumps(v).encode()
+)
+
+# ✓ Partition key ensures ordering
+def publish_order_event(user_id, event_type, data):
+    producer.send(
+        'orders',
+        key=str(user_id),  # All user_id events go to same partition
+        value={
+            'event_type': event_type,
+            'user_id': user_id,
+            'data': data,
+            'timestamp': time.time()
+        }
+    )
+
+# User 123's events all go to partition 2 → strict ordering
+publish_order_event(123, 'order_placed', {...})
+publish_order_event(123, 'payment_processed', {...})
+publish_order_event(123, 'shipped', {...})
+```
+
+**Partition count determines max parallelism**:
+
+```
+Topic: orders (4 partitions)
+Consumer group: order-processors
+
+2 consumers → Each processes 2 partitions
+4 consumers → Each processes 1 partition (max parallelism)
+5 consumers → 1 consumer idle (wasted)
+
+Rule: partition_count >= max_consumers_needed
+```
+
+### SQS FIFO: MessageGroupId Ordering
+
+```python
+import boto3
+
+sqs = boto3.client('sqs')
+
+# FIFO queue guarantees ordering per MessageGroupId
+sqs.send_message(
+    QueueUrl='orders.fifo',
+    MessageBody=json.dumps(event),
+    MessageGroupId=f"user-{user_id}",  # Like Kafka partition key
+    MessageDeduplicationId=f"{event_id}-{timestamp}"  # Prevent duplicates
+)
+
+# Throughput limit: 300 msg/s per MessageGroupId
+# Workaround: Use multiple MessageGroupIds if possible
+```
+
+### RabbitMQ: Single Consumer Ordering
+
+```python
+# RabbitMQ guarantees ordering if single consumer
+channel.basic_qos(prefetch_count=1)  # Process one at a time
+
+channel.basic_consume(
+    queue='orders',
+    on_message_callback=callback,
+    auto_ack=False
+)
+
+# Multiple consumers break ordering unless using consistent hashing
+```
+
+## Dead Letter Queues (DLQ)
+
+### Retry Strategy with Exponential Backoff
+
+**SQS with DLQ**:
+
+```python
+# Infrastructure setup
+main_queue = sqs.create_queue(
+    QueueName='orders',
+    Attributes={
+        'RedrivePolicy': json.dumps({
+            'deadLetterTargetArn': dlq_arn,
+            'maxReceiveCount': '3'  # After 3 failures → DLQ
+        }),
+        'VisibilityTimeout': '300'
+    }
+)
+
+# Consumer with retry logic
+def process_with_retry(message):
+    attempt = int(message.attributes.get('ApproximateReceiveCount', 0))
+
+    try:
+        process_order(json.loads(message.body))
+        message.delete()
+
+    except RetriableError as e:
+        # Exponential backoff: 10s, 20s, 40s, 80s, ...
+        backoff = min(300, 2 ** attempt * 10)
+        message.change_visibility(VisibilityTimeout=backoff)
+        logger.warning(f"Retriable error (attempt {attempt}), retry in {backoff}s")
+
+    except PermanentError as e:
+        # Send to DLQ immediately
+        logger.error(f"Permanent error: {e}")
+        send_to_dlq(message, error=str(e))
+        message.delete()
+
+# Error classification
+class RetriableError(Exception):
+    """Network timeout, rate limit, DB unavailable"""
+    pass
+
+class PermanentError(Exception):
+    """Invalid data, missing field, business rule violation"""
+    pass
+```
+
+**Kafka DLQ Pattern**:
+
+```python
+from kafka import KafkaConsumer, KafkaProducer
+
+consumer = KafkaConsumer('orders', group_id='processor')
+dlq_producer = KafkaProducer(bootstrap_servers=['kafka:9092'])
+
+def process_with_dlq(message):
+    retry_count = message.headers.get('retry_count', 0)
+
+    try:
+        process_order(message.value)
+        consumer.commit()
+
+    except RetriableError as e:
+        if retry_count < 3:
+            # Send to retry topic with delay
+            delay_minutes = 2 ** retry_count  # 1min, 2min, 4min
+            retry_producer.send(
+                f'orders-retry-{delay_minutes}min',
+                value=message.value,
+                headers={'retry_count': retry_count + 1}
+            )
+        else:
+            # Max retries → DLQ
+            dlq_producer.send(
+                'orders-dlq',
+                value=message.value,
+                headers={'error': str(e), 'retry_count': retry_count}
+            )
+        consumer.commit()  # Don't reprocess from main topic
+
+    except PermanentError as e:
+        # Immediate DLQ
+        dlq_producer.send('orders-dlq', value=message.value, headers={'error': str(e)})
+        consumer.commit()
+```
+
+### DLQ Monitoring & Recovery
+
+```python
+# Alert on DLQ depth
+def check_dlq_depth():
+    attrs = sqs.get_queue_attributes(
+        QueueUrl=dlq_url,
+        AttributeNames=['ApproximateNumberOfMessages']
+    )
+    depth = int(attrs['Attributes']['ApproximateNumberOfMessages'])
+
+    if depth > 10:
+        alert(f"DLQ has {depth} messages - investigate!")
+
+# Manual recovery
+def replay_from_dlq():
+    """Fix root cause, then replay"""
+    messages = dlq.receive_messages(MaxNumberOfMessages=10)
+
+    for msg in messages:
+        data = json.loads(msg.body)
+
+        # Fix data issue
+        if 'customer_email' not in data:
+            data['customer_email'] = lookup_email(data['user_id'])
+
+        # Replay to main queue
+        main_queue.send_message(MessageBody=json.dumps(data))
+        msg.delete()
+```
+
+## Message Schema Evolution
+
+### Versioning Strategies
+
+**Pattern 1: Version field in message**:
+
+```python
+# v1 message
+{
+  "version": "1.0",
+  "order_id": "123",
+  "amount": 99.99
+}
+
+# v2 message (added currency)
+{
+  "version": "2.0",
+  "order_id": "123",
+  "amount": 99.99,
+  "currency": "USD"
+}
+
+# Consumer handles both versions
+def process_order(message):
+    if message['version'] == "1.0":
+        amount = message['amount']
+        currency = "USD"  # Default for v1
+    elif message['version'] == "2.0":
+        amount = message['amount']
+        currency = message['currency']
+    else:
+        raise ValueError(f"Unsupported version: {message['version']}")
+```
+
+**Pattern 2: Apache Avro (Kafka best practice)**:
+
+```python
+from confluent_kafka import avro
+from confluent_kafka.avro import AvroProducer, AvroConsumer
+
+# Define schema
+value_schema = avro.loads('''
+{
+  "type": "record",
+  "name": "Order",
+  "fields": [
+    {"name": "order_id", "type": "string"},
+    {"name": "amount", "type": "double"},
+    {"name": "currency", "type": "string", "default": "USD"}  # Backward compatible
+  ]
+}
+''')
+
+# Producer
+producer = AvroProducer({
+    'bootstrap.servers': 'kafka:9092',
+    'schema.registry.url': 'http://schema-registry:8081'
+}, default_value_schema=value_schema)
+
+producer.produce(topic='orders', value={
+    'order_id': '123',
+    'amount': 99.99,
+    'currency': 'USD'
+})
+
+# Consumer automatically validates schema
+consumer = AvroConsumer({
+    'bootstrap.servers': 'kafka:9092',
+    'group.id': 'processor',
+    'schema.registry.url': 'http://schema-registry:8081'
+})
+```
+
+**Avro Schema Evolution Rules**:
+
+| Change | Compatible? | Notes |
+|--------|-------------|-------|
+| Add field with default | ✓ Backward compatible | Old consumers ignore new field |
+| Remove field | ✓ Forward compatible | New consumers must handle missing field |
+| Rename field | ❌ Breaking | Requires migration |
+| Change field type | ❌ Breaking | Requires new topic or migration |
+
+**Pattern 3: Protobuf (alternative to Avro)**:
+
+```protobuf
+syntax = "proto3";
+
+message Order {
+  string order_id = 1;
+  double amount = 2;
+  string currency = 3;  // New field, backward compatible
+}
+```
+
+### Schema Registry (Kafka)
+
+```
+Producer → Schema Registry (validate) → Kafka
+Consumer → Kafka → Schema Registry (deserialize)
+
+Benefits:
+- Centralized schema management
+- Automatic validation
+- Schema evolution enforcement
+- Type safety
+```
+
+## Monitoring & Observability
+
+### Key Metrics
+
+| Metric | Alert Threshold | Why It Matters |
+|--------|----------------|----------------|
+| **Queue depth** | > 1000 (or 5min processing time) | Consumers can't keep up |
+| **Consumer lag** (Kafka) | > 100k messages or > 5 min | Consumers falling behind |
+| **DLQ depth** | > 10 | Messages failing repeatedly |
+| **Processing time p99** | > 5 seconds | Slow processing blocks queue |
+| **Error rate** | > 5% | Widespread failures |
+| **Redelivery rate** | > 10% | Idempotency issues or transient errors |
+
+### Consumer Lag Monitoring (Kafka)
+
+```python
+from kafka import KafkaAdminClient, TopicPartition
+
+admin = KafkaAdminClient(bootstrap_servers=['kafka:9092'])
+
+def check_consumer_lag(group_id, topic):
+    # Get committed offsets
+    committed = admin.list_consumer_group_offsets(group_id)
+
+    # Get latest offsets (highwater mark)
+    consumer = KafkaConsumer(bootstrap_servers=['kafka:9092'])
+    partitions = [TopicPartition(topic, p) for p in range(partition_count)]
+    latest = consumer.end_offsets(partitions)
+
+    # Calculate lag
+    total_lag = 0
+    for partition in partitions:
+        committed_offset = committed[partition].offset
+        latest_offset = latest[partition]
+        lag = latest_offset - committed_offset
+        total_lag += lag
+
+        if lag > 10000:
+            alert(f"Partition {partition.partition} lag: {lag}")
+
+    return total_lag
+
+# Alert if total lag > 100k
+if check_consumer_lag('order-processor', 'orders') > 100000:
+    alert("Consumer lag critical!")
+```
+
+### Distributed Tracing Across Queues
+
+```python
+from opentelemetry import trace
+from opentelemetry.propagate import inject, extract
+
+tracer = trace.get_tracer(__name__)
+
+# Producer: Inject trace context
+def publish_with_trace(topic, message):
+    with tracer.start_as_current_span("publish-order") as span:
+        headers = {}
+        inject(headers)  # Inject trace context into headers
+
+        producer.send(
+            topic,
+            value=message,
+            headers=list(headers.items())
+        )
+
+# Consumer: Extract trace context
+def consume_with_trace(message):
+    context = extract(dict(message.headers))
+
+    with tracer.start_as_current_span("process-order", context=context) as span:
+        process_order(message.value)
+        span.set_attribute("order.id", message.value['order_id'])
+
+# Trace spans: API → Producer → Queue → Consumer → DB
+# Shows end-to-end latency including queue wait time
+```
+
+## Backpressure & Circuit Breakers
+
+### Rate Limiting Consumers
+
+```python
+import time
+from collections import deque
+
+class RateLimitedConsumer:
+    def __init__(self, max_per_second=100):
+        self.max_per_second = max_per_second
+        self.requests = deque()
+
+    def consume(self, message):
+        now = time.time()
+
+        # Remove requests older than 1 second
+        while self.requests and self.requests[0] < now - 1:
+            self.requests.popleft()
+
+        # Check rate limit
+        if len(self.requests) >= self.max_per_second:
+            sleep_time = 1 - (now - self.requests[0])
+            time.sleep(sleep_time)
+
+        self.requests.append(time.time())
+        process(message)
+```
+
+### Circuit Breaker for Downstream Dependencies
+
+```python
+from circuitbreaker import circuit
+
+@circuit(failure_threshold=5, recovery_timeout=60)
+def call_payment_service(order_id, amount):
+    response = requests.post(
+        'https://payment-service/charge',
+        json={'order_id': order_id, 'amount': amount},
+        timeout=5
+    )
+
+    if response.status_code >= 500:
+        raise ServiceUnavailableError()
+
+    return response.json()
+
+def process_order(message):
+    try:
+        result = call_payment_service(message['order_id'], message['amount'])
+        # ... continue processing
+    except CircuitBreakerError:
+        # Circuit open - don't overwhelm failing service
+        logger.warning("Payment service circuit open, requeueing message")
+        raise RetriableError("Circuit breaker open")
+```
+
+## Advanced Patterns
+
+### Outbox Pattern (Reliable Publishing)
+
+**Problem**: How to atomically update database AND publish message?
+
+```python
+# ❌ WRONG: Dual write (can fail between DB and queue)
+def create_order(data):
+    db.execute("INSERT INTO orders (...) VALUES (...)")
+    producer.send('orders', data)  # ❌ If this fails, DB updated but no event
+
+# ✓ CORRECT: Outbox pattern
+def create_order_with_outbox(data):
+    with db.transaction():
+        # 1. Insert order
+        db.execute("INSERT INTO orders (id, user_id, amount) VALUES (%s, %s, %s)",
+                   (data['id'], data['user_id'], data['amount']))
+
+        # 2. Insert into outbox (same transaction)
+        db.execute("INSERT INTO outbox (event_type, payload) VALUES (%s, %s)",
+                   ('order.created', json.dumps(data)))
+
+    # Separate process reads outbox and publishes
+
+# Outbox processor (separate worker)
+def process_outbox():
+    while True:
+        events = db.execute("SELECT * FROM outbox WHERE published_at IS NULL LIMIT 10")
+
+        for event in events:
+            try:
+                producer.send(event['event_type'], json.loads(event['payload']))
+                db.execute("UPDATE outbox SET published_at = NOW() WHERE id = %s", (event['id'],))
+            except Exception as e:
+                logger.error(f"Failed to publish event {event['id']}: {e}")
+                # Will retry on next iteration
+
+        time.sleep(1)
+```
+
+### Saga Pattern (Distributed Transactions)
+
+See `microservices-architecture` skill for full saga patterns (choreography vs orchestration).
+
+**Quick reference for message-based saga**:
+
+```python
+# Order saga coordinator publishes commands
+def create_order_saga(order_data):
+    saga_id = str(uuid.uuid4())
+
+    # Step 1: Reserve inventory
+    producer.send('inventory-commands', {
+        'command': 'reserve',
+        'saga_id': saga_id,
+        'order_id': order_data['order_id'],
+        'items': order_data['items']
+    })
+
+    # Inventory service responds on 'inventory-events'
+    # If success → proceed to step 2
+    # If failure → compensate (cancel order)
+```
+
+## Security
+
+### Message Encryption
+
+**SQS**: Server-side encryption (SSE) with KMS
+
+```python
+sqs.create_queue(
+    QueueName='orders-encrypted',
+    Attributes={
+        'KmsMasterKeyId': 'alias/my-key',  # AWS KMS
+        'KmsDataKeyReusePeriodSeconds': '300'
+    }
+)
+```
+
+**Kafka**: Encryption in transit + at rest
+
+```python
+# SSL/TLS for in-transit encryption
+producer = KafkaProducer(
+    bootstrap_servers=['kafka:9093'],
+    security_protocol='SSL',
+    ssl_cafile='/path/to/ca-cert',
+    ssl_certfile='/path/to/client-cert',
+    ssl_keyfile='/path/to/client-key'
+)
+
+# Encryption at rest (Kafka broker config)
+# log.dirs=/encrypted-volume  # Use encrypted EBS volumes
+```
+
+### Authentication & Authorization
+
+**SQS**: IAM policies
+
+```json
+{
+  "Version": "2012-10-17",
+  "Statement": [{
+    "Effect": "Allow",
+    "Principal": {"AWS": "arn:aws:iam::123456789012:role/OrderService"},
+    "Action": ["sqs:SendMessage"],
+    "Resource": "arn:aws:sqs:us-east-1:123456789012:orders"
+  }]
+}
+```
+
+**Kafka**: SASL/SCRAM authentication
+
+```python
+producer = KafkaProducer(
+    bootstrap_servers=['kafka:9093'],
+    security_protocol='SASL_SSL',
+    sasl_mechanism='SCRAM-SHA-512',
+    sasl_plain_username='order-service',
+    sasl_plain_password='secret'
+)
+```
+
+**Kafka ACLs** (authorization):
+
+```bash
+# Grant order-service permission to write to orders topic
+kafka-acls --add \
+  --allow-principal User:order-service \
+  --operation Write \
+  --topic orders
+```
+
+## Testing Strategies
+
+### Local Testing
+
+**LocalStack for SQS/SNS**:
+
+```python
+# docker-compose.yml
+services:
+  localstack:
+    image: localstack/localstack
+    environment:
+      - SERVICES=sqs,sns
+
+# Test code
+import boto3
+
+sqs = boto3.client(
+    'sqs',
+    endpoint_url='http://localhost:4566',  # LocalStack
+    region_name='us-east-1'
+)
+
+queue_url = sqs.create_queue(QueueName='test-orders')['QueueUrl']
+sqs.send_message(QueueUrl=queue_url, MessageBody='test')
+```
+
+**Kafka in Docker**:
+
+```yaml
+# docker-compose.yml
+services:
+  zookeeper:
+    image: confluentinc/cp-zookeeper:latest
+    environment:
+      ZOOKEEPER_CLIENT_PORT: 2181
+
+  kafka:
+    image: confluentinc/cp-kafka:latest
+    ports:
+      - "9092:9092"
+    environment:
+      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
+      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
+```
+
+### Integration Testing
+
+```python
+import pytest
+from testcontainers.kafka import KafkaContainer
+
+@pytest.fixture
+def kafka():
+    with KafkaContainer() as kafka:
+        yield kafka.get_bootstrap_server()
+
+def test_order_processing(kafka):
+    producer = KafkaProducer(bootstrap_servers=kafka)
+    consumer = KafkaConsumer('orders', bootstrap_servers=kafka, auto_offset_reset='earliest')
+
+    # Publish message
+    producer.send('orders', value=b'{"order_id": "123"}')
+    producer.flush()
+
+    # Consume and verify
+    message = next(consumer)
+    assert json.loads(message.value)['order_id'] == '123'
+```
+
+### Chaos Engineering
+
+```python
+# Test consumer failure recovery
+def test_consumer_crash_recovery():
+    # Start consumer
+    consumer_process = subprocess.Popen(['python', 'consumer.py'])
+    time.sleep(2)
+
+    # Publish message
+    producer.send('orders', value=test_order)
+    producer.flush()
+
+    # Kill consumer mid-processing
+    consumer_process.kill()
+
+    # Restart consumer
+    consumer_process = subprocess.Popen(['python', 'consumer.py'])
+    time.sleep(5)
+
+    # Verify message was reprocessed (idempotency!)
+    assert db.execute("SELECT COUNT(*) FROM orders WHERE id = %s", (test_order['id'],))[0] == 1
+```
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Auto-ack before processing** | Messages lost on crash | Manual ack after processing |
+| **No idempotency** | Duplicates cause data corruption | Unique constraints, locks, or idempotency keys |
+| **No DLQ** | Poison messages block queue | Configure DLQ with maxReceiveCount |
+| **No monitoring** | Can't detect consumer lag or failures | Monitor lag, depth, error rate |
+| **Synchronous message processing** | Low throughput | Batch processing, parallel consumers |
+| **Large messages** | Exceeds queue limits, slow transfer | Store in S3, send reference in message |
+| **No schema versioning** | Breaking changes break consumers | Use Avro/Protobuf with schema registry |
+| **Shared consumer instances** | Race conditions, duplicate processing | Use consumer groups (Kafka) or visibility timeout (SQS) |
+
+## Technology-Specific Patterns
+
+### RabbitMQ Exchanges
+
+```python
+# Topic exchange for routing
+channel.exchange_declare(exchange='orders', exchange_type='topic')
+
+# Bind queues with patterns
+channel.queue_bind(exchange='orders', queue='us-orders', routing_key='order.us.*')
+channel.queue_bind(exchange='orders', queue='eu-orders', routing_key='order.eu.*')
+
+# Publish with routing key
+channel.basic_publish(
+    exchange='orders',
+    routing_key='order.us.california',  # Goes to us-orders queue
+    body=json.dumps(order)
+)
+
+# Fanout exchange for pub/sub
+channel.exchange_declare(exchange='analytics', exchange_type='fanout')
+# All bound queues receive every message
+```
+
+### Kafka Connect (Data Integration)
+
+```json
+{
+  "name": "mysql-source",
+  "config": {
+    "connector.class": "io.confluent.connect.jdbc.JdbcSourceConnector",
+    "connection.url": "jdbc:mysql://localhost:3306/mydb",
+    "table.whitelist": "orders",
+    "mode": "incrementing",
+    "incrementing.column.name": "id",
+    "topic.prefix": "mysql-"
+  }
+}
+```
+
+**Use cases**:
+- Stream DB changes to Kafka (CDC)
+- Sink Kafka to Elasticsearch, S3, databases
+- No custom code needed for common integrations
+
+## Batching Optimizations
+
+### Batch Size Tuning
+
+```python
+# SQS batch receiving (up to 10 messages)
+messages = sqs.receive_messages(
+    QueueUrl=queue_url,
+    MaxNumberOfMessages=10,  # Fetch 10 at once
+    WaitTimeSeconds=20  # Long polling (reduces empty receives)
+)
+
+# Process in parallel
+with ThreadPoolExecutor(max_workers=10) as executor:
+    futures = [executor.submit(process, msg) for msg in messages]
+    for future in futures:
+        future.result()
+
+# Kafka batch consuming
+consumer = KafkaConsumer(
+    'orders',
+    max_poll_records=500,  # Fetch 500 messages per poll
+    fetch_min_bytes=1024  # Wait for at least 1KB before returning
+)
+
+for messages in consumer:
+    batch_process(messages)  # Process 500 at once
+```
+
+**Batch size tradeoffs**:
+
+| Batch Size | Throughput | Latency | Memory |
+|------------|------------|---------|--------|
+| 1 | Low | Low | Low |
+| 10-100 | Medium | Medium | Medium |
+| 500+ | High | High | High |
+
+**Recommendation**: Start with 10-100, increase for higher throughput if latency allows.
+
+## Cross-References
+
+**Related skills**:
+- **Microservices communication** → `microservices-architecture` (saga, event-driven)
+- **FastAPI async** → `fastapi-development` (consuming queues in FastAPI)
+- **REST vs async** → `rest-api-design` (when to use queues vs HTTP)
+- **Security** → `ordis-security-architect` (encryption, IAM, compliance)
+- **Testing** → `api-testing` (integration testing strategies)
+
+## Further Reading
+
+- **Enterprise Integration Patterns** by Gregor Hohpe (message patterns)
+- **Designing Data-Intensive Applications** by Martin Kleppmann (Kafka internals)
+- **RabbitMQ in Action** by Alvaro Videla
+- **Kafka: The Definitive Guide** by Neha Narkhede
+- **AWS SQS Best Practices**: https://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSDeveloperGuide/sqs-best-practices.html
diff --git a/skills/using-web-backend/microservices-architecture.md b/skills/using-web-backend/microservices-architecture.md
new file mode 100644
index 0000000..645513e
--- /dev/null
+++ b/skills/using-web-backend/microservices-architecture.md
@@ -0,0 +1,592 @@
+
+# Microservices Architecture
+
+## Overview
+
+**Microservices architecture specialist covering service boundaries, communication patterns, data consistency, and operational concerns.**
+
+**Core principle**: Microservices decompose applications into independently deployable services organized around business capabilities - enabling team autonomy and technology diversity at the cost of operational complexity and distributed system challenges.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Service boundaries**: Defining service scope, applying domain-driven design
+- **Monolith decomposition**: Strategies for splitting existing systems
+- **Data consistency**: Sagas, event sourcing, eventual consistency patterns
+- **Communication**: Sync (REST/gRPC) vs async (events/messages)
+- **API gateways**: Routing, authentication, rate limiting
+- **Service discovery**: Registry patterns, DNS, configuration
+- **Resilience**: Circuit breakers, retries, timeouts, bulkheads
+- **Observability**: Distributed tracing, logging aggregation, metrics
+- **Deployment**: Containers, orchestration, blue-green deployments
+
+**Do NOT use for**:
+- Monolithic architectures (microservices aren't always better)
+- Single-team projects < 5 services (overhead exceeds benefits)
+- Simple CRUD applications (microservices add unnecessary complexity)
+
+## When NOT to Use Microservices
+
+**Stay monolithic if**:
+- Team < 10 engineers
+- Domain is not well understood yet
+- Strong consistency required everywhere
+- Network latency is critical
+- You can't invest in observability/DevOps infrastructure
+
+**Microservices require**: Mature DevOps, monitoring, distributed systems expertise, organizational support.
+
+## Service Boundary Patterns (Domain-Driven Design)
+
+### 1. Bounded Contexts
+
+**Pattern: One microservice = One bounded context**
+
+```
+❌ Too fine-grained (anemic services):
+- UserService (just CRUD)
+- OrderService (just CRUD)
+- PaymentService (just CRUD)
+
+✅ Business capability alignment:
+- CustomerManagementService (user profiles, preferences, history)
+- OrderFulfillmentService (order lifecycle, inventory, shipping)
+- PaymentProcessingService (payment, billing, invoicing, refunds)
+```
+
+**Identifying boundaries**:
+1. **Ubiquitous language** - Different terms for same concept = different contexts
+2. **Change patterns** - Services that change together should stay together
+3. **Team ownership** - One team should own one service
+4. **Data autonomy** - Each service owns its data, no shared databases
+
+### 2. Strategic DDD Patterns
+
+| Pattern | Use When | Example |
+|---------|----------|---------|
+| **Separate Ways** | Contexts are independent | Analytics service, main app service |
+| **Partnership** | Teams must collaborate closely | Order + Inventory services |
+| **Customer-Supplier** | Upstream/downstream relationship | Payment gateway (upstream) → Order service |
+| **Conformist** | Accept upstream model as-is | Third-party API integration |
+| **Anti-Corruption Layer** | Isolate from legacy/external systems | ACL between new microservices and legacy monolith |
+
+### 3. Service Sizing Guidelines
+
+**Too small (Nanoservices)**:
+- Excessive network calls
+- Distributed monolith
+- Coordination overhead exceeds benefits
+
+**Too large (Minimonoliths)**:
+- Multiple teams modifying same service
+- Mixed deployment frequencies
+- Tight coupling re-emerges
+
+**Right size indicators**:
+- Single team can own it
+- Deployable independently
+- Changes don't ripple to other services
+- Clear business capability
+- 100-10,000 LOC (highly variable)
+
+## Communication Patterns
+
+### Synchronous Communication
+
+**REST APIs**:
+
+```python
+# Order service calling Payment service
+async def create_order(order: Order):
+    # Synchronous REST call
+    payment = await payment_service.charge(
+        amount=order.total,
+        customer_id=order.customer_id
+    )
+
+    if payment.status == "success":
+        order.status = "confirmed"
+        await db.save(order)
+        return order
+    else:
+        raise PaymentFailedException()
+```
+
+**Pros**: Simple, request-response, easy to debug
+**Cons**: Tight coupling, availability dependency, latency cascades
+
+**gRPC**:
+
+```python
+# Proto definition
+service OrderService {
+    rpc CreateOrder (OrderRequest) returns (OrderResponse);
+}
+
+# Implementation
+class OrderServicer(order_pb2_grpc.OrderServiceServicer):
+    async def CreateOrder(self, request, context):
+        # Type-safe, efficient binary protocol
+        payment = await payment_stub.Charge(
+            PaymentRequest(amount=request.total)
+        )
+        return OrderResponse(order_id=order.id)
+```
+
+**Pros**: Type-safe, efficient, streaming support
+**Cons**: HTTP/2 required, less human-readable, proto dependencies
+
+### Asynchronous Communication
+
+**Event-Driven (Pub/Sub)**:
+
+```python
+# Order service publishes event
+await event_bus.publish("order.created", {
+    "order_id": order.id,
+    "customer_id": customer.id,
+    "total": order.total
+})
+
+# Inventory service subscribes
+@event_bus.subscribe("order.created")
+async def reserve_inventory(event):
+    await inventory.reserve(event["order_id"])
+    await event_bus.publish("inventory.reserved", {...})
+
+# Notification service subscribes
+@event_bus.subscribe("order.created")
+async def send_confirmation(event):
+    await email.send_order_confirmation(event)
+```
+
+**Pros**: Loose coupling, services independent, scalable
+**Cons**: Eventual consistency, harder to trace, ordering challenges
+
+**Message Queues (Point-to-Point)**:
+
+```python
+# Producer
+await queue.send("payment-processing", {
+    "order_id": order.id,
+    "amount": order.total
+})
+
+# Consumer
+@queue.consumer("payment-processing")
+async def process_payment(message):
+    result = await payment_gateway.charge(message["amount"])
+    if result.success:
+        await message.ack()
+    else:
+        await message.nack(requeue=True)
+```
+
+**Pros**: Guaranteed delivery, work distribution, retry handling
+**Cons**: Queue becomes bottleneck, requires message broker
+
+### Communication Pattern Decision Matrix
+
+| Scenario | Pattern | Why |
+|----------|---------|-----|
+| User-facing request/response | Sync (REST/gRPC) | Low latency, immediate feedback |
+| Background processing | Async (queue) | Don't block user, retry support |
+| Cross-service notifications | Async (pub/sub) | Loose coupling, multiple consumers |
+| Real-time updates | WebSocket/SSE | Bidirectional, streaming |
+| Data replication | Event sourcing | Audit trail, rebuild state |
+| High throughput | Async (messaging) | Buffer spikes, backpressure |
+
+## Data Consistency Patterns
+
+### 1. Saga Pattern (Distributed Transactions)
+
+**Choreography (Event-Driven)**:
+
+```python
+# Order Service
+async def create_order(order):
+    order.status = "pending"
+    await db.save(order)
+    await events.publish("order.created", order)
+
+# Payment Service
+@events.subscribe("order.created")
+async def handle_order(event):
+    try:
+        await charge_customer(event["total"])
+        await events.publish("payment.completed", event)
+    except PaymentError:
+        await events.publish("payment.failed", event)
+
+# Inventory Service
+@events.subscribe("payment.completed")
+async def reserve_items(event):
+    try:
+        await reserve(event["items"])
+        await events.publish("inventory.reserved", event)
+    except InventoryError:
+        await events.publish("inventory.failed", event)
+
+# Order Service (Compensation)
+@events.subscribe("payment.failed")
+async def cancel_order(event):
+    order = await db.get(event["order_id"])
+    order.status = "cancelled"
+    await db.save(order)
+
+@events.subscribe("inventory.failed")
+async def refund_payment(event):
+    await payment.refund(event["order_id"])
+    await cancel_order(event)
+```
+
+**Orchestration (Coordinator)**:
+
+```python
+class OrderSaga:
+    def __init__(self, order):
+        self.order = order
+        self.completed_steps = []
+
+    async def execute(self):
+        try:
+            # Step 1: Reserve inventory
+            await self.reserve_inventory()
+            self.completed_steps.append("inventory")
+
+            # Step 2: Process payment
+            await self.process_payment()
+            self.completed_steps.append("payment")
+
+            # Step 3: Confirm order
+            await self.confirm_order()
+
+        except Exception as e:
+            # Compensate in reverse order
+            await self.compensate()
+            raise
+
+    async def compensate(self):
+        for step in reversed(self.completed_steps):
+            if step == "inventory":
+                await inventory_service.release(self.order.id)
+            elif step == "payment":
+                await payment_service.refund(self.order.id)
+```
+
+**Choreography vs Orchestration**:
+
+| Aspect | Choreography | Orchestration |
+|--------|--------------|---------------|
+| Coordination | Decentralized (events) | Centralized (orchestrator) |
+| Coupling | Loose | Tight to orchestrator |
+| Complexity | Distributed across services | Concentrated in orchestrator |
+| Tracing | Harder (follow events) | Easier (single coordinator) |
+| Failure handling | Implicit (event handlers) | Explicit (orchestrator logic) |
+| Best for | Simple workflows | Complex workflows |
+
+### 2. Event Sourcing
+
+**Pattern: Store events, not state**
+
+```python
+# Traditional approach (storing state)
+class Order:
+    id: int
+    status: str  # "pending" → "confirmed" → "shipped"
+    total: float
+
+# Event sourcing (storing events)
+class OrderCreated(Event):
+    order_id: int
+    total: float
+
+class OrderConfirmed(Event):
+    order_id: int
+
+class OrderShipped(Event):
+    order_id: int
+
+# Rebuild state from events
+def rebuild_order(order_id):
+    events = event_store.get_events(order_id)
+    order = Order()
+    for event in events:
+        order.apply(event)  # Apply each event to rebuild state
+    return order
+```
+
+**Pros**: Complete audit trail, time travel, event replay
+**Cons**: Complexity, eventual consistency, schema evolution challenges
+
+### 3. CQRS (Command Query Responsibility Segregation)
+
+**Separate read and write models**:
+
+```python
+# Write model (commands)
+class CreateOrder:
+    def execute(self, data):
+        order = Order(**data)
+        await db.save(order)
+        await event_bus.publish("order.created", order)
+
+# Read model (projections)
+class OrderReadModel:
+    # Denormalized for fast reads
+    def __init__(self):
+        self.cache = {}
+
+    @event_bus.subscribe("order.created")
+    async def on_order_created(self, event):
+        self.cache[event["order_id"]] = {
+            "id": event["order_id"],
+            "customer_name": await get_customer_name(event["customer_id"]),
+            "status": "pending",
+            "total": event["total"]
+        }
+
+    def get_order(self, order_id):
+        return self.cache.get(order_id)  # Fast read, no joins
+```
+
+**Use when**: Read/write patterns differ significantly (e.g., analytics dashboards)
+
+## Resilience Patterns
+
+### 1. Circuit Breaker
+
+```python
+from circuitbreaker import circuit
+
+@circuit(failure_threshold=5, recovery_timeout=60)
+async def call_payment_service(amount):
+    response = await http.post("http://payment-service/charge", json={"amount": amount})
+    if response.status >= 500:
+        raise PaymentServiceError()
+    return response.json()
+
+# Circuit states:
+# CLOSED → normal operation
+# OPEN → fails fast after threshold
+# HALF_OPEN → test if service recovered
+```
+
+### 2. Retry with Exponential Backoff
+
+```python
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=2, max=10)
+)
+async def call_with_retry(url):
+    return await http.get(url)
+
+# Retries: 2s → 4s → 8s
+```
+
+### 3. Timeout
+
+```python
+import asyncio
+
+async def call_with_timeout(url):
+    try:
+        return await asyncio.wait_for(
+            http.get(url),
+            timeout=5.0  # 5 second timeout
+        )
+    except asyncio.TimeoutError:
+        return {"error": "Service timeout"}
+```
+
+### 4. Bulkhead
+
+**Isolate resources to prevent cascade failures**:
+
+```python
+# Separate thread pools for different services
+payment_pool = ThreadPoolExecutor(max_workers=10)
+inventory_pool = ThreadPoolExecutor(max_workers=5)
+
+async def call_payment():
+    return await asyncio.get_event_loop().run_in_executor(
+        payment_pool,
+        payment_service.call
+    )
+
+# If payment service is slow, it only exhausts payment_pool,
+# inventory calls still work
+```
+
+## API Gateway Pattern
+
+**Centralized entry point for client requests**:
+
+```
+Client → API Gateway → [Order, Payment, Inventory services]
+```
+
+**Responsibilities**:
+- Routing requests to services
+- Authentication/authorization
+- Rate limiting
+- Request/response transformation
+- Caching
+- Logging/monitoring
+
+**Example (Kong, AWS API Gateway, Nginx)**:
+
+```yaml
+# API Gateway config
+routes:
+  - path: /orders
+    service: order-service
+    auth: jwt
+    ratelimit: 100/minute
+
+  - path: /payments
+    service: payment-service
+    auth: oauth2
+    ratelimit: 50/minute
+```
+
+**Backend for Frontend (BFF) Pattern**:
+
+```
+Web Client → Web BFF → Services
+Mobile App → Mobile BFF → Services
+```
+
+Each client type has optimized gateway.
+
+## Service Discovery
+
+### 1. Client-Side Discovery
+
+```python
+# Service registry (Consul, Eureka)
+registry = ServiceRegistry("http://consul:8500")
+
+# Client looks up service
+instances = registry.get_instances("payment-service")
+instance = load_balancer.choose(instances)
+response = await http.get(f"http://{instance.host}:{instance.port}/charge")
+```
+
+### 2. Server-Side Discovery (Load Balancer)
+
+```
+Client → Load Balancer → [Service Instance 1, Instance 2, Instance 3]
+```
+
+**DNS-based**: Kubernetes services, AWS ELB
+
+## Observability
+
+### Distributed Tracing
+
+```python
+from opentelemetry import trace
+
+tracer = trace.get_tracer(__name__)
+
+async def create_order(order):
+    with tracer.start_as_current_span("create-order") as span:
+        span.set_attribute("order.id", order.id)
+        span.set_attribute("order.total", order.total)
+
+        # Trace propagates to payment service
+        payment = await payment_service.charge(
+            amount=order.total,
+            trace_context=span.context
+        )
+
+        span.add_event("payment-completed")
+        return order
+```
+
+**Tools**: Jaeger, Zipkin, AWS X-Ray, Datadog APM
+
+### Log Aggregation
+
+**Structured logging with correlation IDs**:
+
+```python
+import logging
+import uuid
+
+logger = logging.getLogger(__name__)
+
+async def handle_request(request):
+    correlation_id = request.headers.get("X-Correlation-ID") or str(uuid.uuid4())
+
+    logger.info("Processing request", extra={
+        "correlation_id": correlation_id,
+        "service": "order-service",
+        "user_id": request.user_id
+    })
+```
+
+**Tools**: ELK stack (Elasticsearch, Logstash, Kibana), Splunk, Datadog
+
+## Monolith Decomposition Strategies
+
+### 1. Strangler Fig Pattern
+
+**Gradually replace monolith with microservices**:
+
+```
+Phase 1: Monolith handles everything
+Phase 2: Extract service, proxy some requests to it
+Phase 3: More services extracted, proxy more requests
+Phase 4: Monolith retired
+```
+
+### 2. Branch by Abstraction
+
+1. Create abstraction layer in monolith
+2. Implement new service
+3. Gradually migrate code behind abstraction
+4. Remove old implementation
+5. Extract as microservice
+
+### 3. Extract by Bounded Context
+
+Priority order:
+1. Services with clear boundaries (authentication, payments)
+2. Services changing frequently
+3. Services with different scaling needs
+4. Services with technology mismatches (e.g., Java monolith, Python ML service)
+
+## Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| **Distributed Monolith** | Services share database, deploy together | One DB per service, independent deployment |
+| **Nanoservices** | Too fine-grained, excessive network calls | Merge related services, follow DDD |
+| **Shared Database** | Tight coupling, schema changes break multiple services | Database per service |
+| **Synchronous Chains** | A→B→C→D, latency adds up, cascading failures | Async events, parallelize where possible |
+| **Chatty Services** | N+1 calls, excessive network overhead | Batch APIs, caching, coarser boundaries |
+| **No Circuit Breakers** | Cascading failures bring down system | Circuit breakers + timeouts + retries |
+| **No Distributed Tracing** | Impossible to debug cross-service issues | OpenTelemetry, correlation IDs |
+
+## Cross-References
+
+**Related skills**:
+- **Message queues** → `message-queues` (RabbitMQ, Kafka patterns)
+- **REST APIs** → `rest-api-design` (service interface design)
+- **gRPC** → Check if gRPC skill exists
+- **Security** → `ordis-security-architect` (service-to-service auth, zero trust)
+- **Database** → `database-integration` (per-service databases, migrations)
+- **Testing** → `api-testing` (contract testing, integration testing)
+
+## Further Reading
+
+- **Building Microservices** by Sam Newman
+- **Domain-Driven Design** by Eric Evans
+- **Release It!** by Michael Nygard (resilience patterns)
+- **Microservices Patterns** by Chris Richardson
diff --git a/skills/using-web-backend/rest-api-design.md b/skills/using-web-backend/rest-api-design.md
new file mode 100644
index 0000000..7106a14
--- /dev/null
+++ b/skills/using-web-backend/rest-api-design.md
@@ -0,0 +1,523 @@
+
+# REST API Design
+
+## Overview
+
+**REST API design specialist covering resource modeling, HTTP semantics, versioning, pagination, and API evolution.**
+
+**Core principle**: REST is an architectural style based on resources, HTTP semantics, and stateless communication. Good REST API design makes resources discoverable, operations predictable, and evolution manageable.
+
+## When to Use This Skill
+
+Use when encountering:
+
+- **Resource modeling**: Designing URL structures, choosing singular vs plural, handling relationships
+- **HTTP methods**: GET, POST, PUT, PATCH, DELETE semantics and idempotency
+- **Status codes**: Choosing correct 2xx, 4xx, 5xx codes
+- **Versioning**: URI vs header versioning, managing API evolution
+- **Pagination**: Offset, cursor, or page-based pagination strategies
+- **Filtering/sorting**: Query parameter design for collections
+- **Error responses**: Standardized error formats
+- **HATEOAS**: Hypermedia-driven APIs and discoverability
+
+**Do NOT use for**:
+- GraphQL API design → `graphql-api-design`
+- Framework-specific implementation → `fastapi-development`, `django-development`, `express-development`
+- Authentication patterns → `api-authentication`
+
+## Quick Reference - HTTP Methods
+
+| Method | Semantics | Idempotent? | Safe? | Request Body | Response Body |
+|--------|-----------|-------------|-------|--------------|---------------|
+| GET | Retrieve resource | ✅ Yes | ✅ Yes | ❌ No | ✅ Yes |
+| POST | Create resource | ❌ No | ❌ No | ✅ Yes | ✅ Yes |
+| PUT | Replace resource | ✅ Yes | ❌ No | ✅ Yes | ✅ Optional |
+| PATCH | Partial update | ❌ No* | ❌ No | ✅ Yes | ✅ Optional |
+| DELETE | Remove resource | ✅ Yes | ❌ No | ❌ Optional | ✅ Optional |
+| HEAD | Retrieve headers | ✅ Yes | ✅ Yes | ❌ No | ❌ No |
+| OPTIONS | Supported methods | ✅ Yes | ✅ Yes | ❌ No | ✅ Yes |
+
+*PATCH can be designed to be idempotent but often isn't
+
+## Quick Reference - Status Codes
+
+| Code | Meaning | Use When |
+|------|---------|----------|
+| 200 OK | Success | GET, PUT, PATCH succeeded with response body |
+| 201 Created | Resource created | POST created new resource |
+| 202 Accepted | Async processing | Request accepted, processing continues async |
+| 204 No Content | Success, no body | DELETE succeeded, PUT/PATCH succeeded without response |
+| 400 Bad Request | Invalid input | Validation failed, malformed request |
+| 401 Unauthorized | Authentication failed | Missing or invalid credentials |
+| 403 Forbidden | Authorization failed | User authenticated but lacks permission |
+| 404 Not Found | Resource missing | Resource doesn't exist |
+| 409 Conflict | State conflict | Resource already exists, version conflict |
+| 422 Unprocessable Entity | Semantic error | Valid syntax but business logic failed |
+| 429 Too Many Requests | Rate limited | User exceeded rate limit |
+| 500 Internal Server Error | Server error | Unexpected server failure |
+| 503 Service Unavailable | Temporary outage | Maintenance, overload |
+
+## Resource Modeling Patterns
+
+### 1. URL Structure
+
+**✅ Good patterns**:
+
+```
+GET    /users                    # List users
+POST   /users                    # Create user
+GET    /users/{id}               # Get specific user
+PUT    /users/{id}               # Replace user
+PATCH  /users/{id}               # Update user
+DELETE /users/{id}               # Delete user
+
+GET    /users/{id}/orders        # User's orders (nested resource)
+POST   /users/{id}/orders        # Create order for user
+GET    /orders/{id}              # Get specific order (top-level for direct access)
+
+GET    /search/users?q=john      # Search endpoint
+```
+
+**❌ Anti-patterns**:
+
+```
+GET    /getUsers                 # Verb in URL (use HTTP method instead)
+POST   /users/create             # Redundant verb
+GET    /users/123/delete         # DELETE operation via GET
+POST   /api?action=createUser    # RPC-style, not REST
+GET    /users/{id}/orders/{id}   # Ambiguous - which {id}?
+```
+
+### 2. Singular vs Plural
+
+**Convention: Use plural for collections, even for single-item endpoints**
+
+```
+✅ /users/{id}         # Consistent plural
+✅ /orders/{id}        # Consistent plural
+
+❌ /user/{id}          # Inconsistent singular
+❌ /users/{id}/order/{id}  # Mixed singular/plural
+```
+
+**Exception**: Non-countable resources can be singular
+
+```
+✅ /me                 # Current user context
+✅ /config             # Application config (single resource)
+✅ /health             # Health check endpoint
+```
+
+### 3. Nested Resources vs Top-Level
+
+**Nested when showing relationship**:
+
+```
+GET /users/{userId}/orders          # "Orders belonging to this user"
+POST /users/{userId}/orders         # "Create order for this user"
+```
+
+**Top-level when resource has independent identity**:
+
+```
+GET /orders/{orderId}               # Direct access to order
+DELETE /orders/{orderId}            # Delete order directly
+```
+
+**Guidelines**:
+- Nest ≤ 2 levels deep (`/users/{id}/orders/{id}` is max)
+- Provide top-level access for resources that exist independently
+- Use query parameters for filtering instead of deep nesting
+
+```
+✅ GET /orders?userId=123           # Better than /users/123/orders/{id}
+❌ GET /users/{id}/orders/{id}/items/{id}  # Too deep
+```
+
+## Pagination Patterns
+
+### Offset Pagination
+
+**Good for**: Small datasets, page numbers, SQL databases
+
+```
+GET /users?limit=20&offset=40
+
+Response:
+{
+  "data": [...],
+  "pagination": {
+    "limit": 20,
+    "offset": 40,
+    "total": 1000,
+    "hasMore": true
+  }
+}
+```
+
+**Pros**: Simple, allows jumping to any page
+**Cons**: Performance degrades with large offsets, inconsistent with concurrent modifications
+
+### Cursor Pagination
+
+**Good for**: Large datasets, real-time data, NoSQL databases
+
+```
+GET /users?limit=20&after=eyJpZCI6MTIzfQ
+
+Response:
+{
+  "data": [...],
+  "pagination": {
+    "nextCursor": "eyJpZCI6MTQzfQ",
+    "hasMore": true
+  }
+}
+```
+
+**Pros**: Consistent results, efficient for large datasets
+**Cons**: Can't jump to arbitrary page, cursors are opaque
+
+### Page-Based Pagination
+
+**Good for**: UIs with page numbers
+
+```
+GET /users?page=3&pageSize=20
+
+Response:
+{
+  "data": [...],
+  "pagination": {
+    "page": 3,
+    "pageSize": 20,
+    "totalPages": 50,
+    "totalCount": 1000
+  }
+}
+```
+
+**Choice matrix**:
+
+| Use Case | Pattern |
+|----------|---------|
+| Admin dashboards, small datasets | Offset or Page |
+| Infinite scroll feeds | Cursor |
+| Real-time data (chat, notifications) | Cursor |
+| Need page numbers in UI | Page |
+| Large datasets (millions of rows) | Cursor |
+
+## Filtering and Sorting
+
+### Query Parameter Conventions
+
+```
+GET /users?status=active&role=admin           # Simple filtering
+GET /users?createdAfter=2024-01-01            # Date filtering
+GET /users?search=john                        # Full-text search
+GET /users?sort=createdAt&order=desc          # Sorting
+GET /users?sort=-createdAt                    # Alternative: prefix for descending
+GET /users?fields=id,name,email               # Sparse fieldsets
+GET /users?include=orders,profile             # Relationship inclusion
+```
+
+### Advanced Filtering Patterns
+
+**LHS Brackets (Rails-style)**:
+
+```
+GET /users?filter[status]=active&filter[role]=admin
+```
+
+**RHS Colon (JSON API style)**:
+
+```
+GET /users?filter=status:active,role:admin
+```
+
+**Comparison operators**:
+
+```
+GET /products?price[gte]=100&price[lte]=500   # Price between 100-500
+GET /users?createdAt[gt]=2024-01-01           # Created after date
+```
+
+## API Versioning Strategies
+
+### 1. URI Versioning
+
+```
+GET /v1/users
+GET /v2/users
+```
+
+**Pros**: Explicit, easy to route, clear in logs
+**Cons**: Violates REST principles (resource identity changes), URL proliferation
+
+**Best for**: Public APIs, major breaking changes
+
+### 2. Header Versioning
+
+```
+GET /users
+Accept: application/vnd.myapi.v2+json
+```
+
+**Pros**: Clean URLs, follows REST principles
+**Cons**: Less visible, harder to test in browser
+
+**Best for**: Internal APIs, clients with header control
+
+### 3. Query Parameter Versioning
+
+```
+GET /users?version=2
+```
+
+**Pros**: Easy to test, optional (can default to latest)
+**Cons**: Pollutes query parameters, not semantic
+
+**Best for**: Minor version variants, opt-in features
+
+### Version Deprecation Process
+
+1. **Announce**: Document deprecation timeline (6-12 months recommended)
+2. **Warn**: Add `Deprecated` header to responses
+3. **Sunset**: Add `Sunset` header with end date (RFC 8594)
+4. **Migrate**: Provide migration guides and tooling
+5. **Remove**: After sunset date, return 410 Gone
+
+```
+HTTP/1.1 200 OK
+Deprecated: true
+Sunset: Sat, 31 Dec 2024 23:59:59 GMT
+Link: </v2/users>; rel="successor-version"
+```
+
+## Error Response Format
+
+**Standard JSON error format**:
+
+```json
+{
+  "error": {
+    "code": "VALIDATION_ERROR",
+    "message": "One or more fields failed validation",
+    "details": [
+      {
+        "field": "email",
+        "message": "Invalid email format",
+        "code": "INVALID_FORMAT"
+      },
+      {
+        "field": "age",
+        "message": "Must be at least 18",
+        "code": "OUT_OF_RANGE"
+      }
+    ],
+    "requestId": "req_abc123",
+    "timestamp": "2024-11-14T10:30:00Z"
+  }
+}
+```
+
+**Problem Details (RFC 7807)**:
+
+```json
+{
+  "type": "https://api.example.com/errors/validation-error",
+  "title": "Validation Error",
+  "status": 400,
+  "detail": "The request body contains invalid data",
+  "instance": "/users",
+  "invalid-params": [
+    {
+      "name": "email",
+      "reason": "Invalid email format"
+    }
+  ]
+}
+```
+
+## HATEOAS (Hypermedia)
+
+**Level 3 REST includes hypermedia links**:
+
+```json
+{
+  "id": 123,
+  "name": "John Doe",
+  "status": "active",
+  "_links": {
+    "self": { "href": "/users/123" },
+    "orders": { "href": "/users/123/orders" },
+    "deactivate": {
+      "href": "/users/123/deactivate",
+      "method": "POST"
+    }
+  }
+}
+```
+
+**Benefits**:
+- Self-documenting API
+- Clients discover available actions
+- Server controls workflow
+- Reduces client-server coupling
+
+**Tradeoffs**:
+- Increased response size
+- Complexity for simple APIs
+- Limited client library support
+
+**When to use**: Complex workflows, long-lived APIs, discoverability requirements
+
+## Idempotency Keys
+
+**For POST operations that should be safely retryable**:
+
+```
+POST /orders
+Idempotency-Key: key_abc123xyz
+
+{
+  "items": [...],
+  "total": 99.99
+}
+```
+
+**Server behavior**:
+1. First request with key → Process and store result
+2. Duplicate request with same key → Return stored result (do not reprocess)
+3. Different request with same key → Return 409 Conflict
+
+**Implementation**:
+
+```python
+@app.post("/orders")
+def create_order(order: Order, idempotency_key: str = Header(None)):
+    if idempotency_key:
+        # Check if key was used before
+        cached = redis.get(f"idempotency:{idempotency_key}")
+        if cached:
+            return JSONResponse(content=cached, status_code=200)
+
+    # Process order
+    result = process_order(order)
+
+    if idempotency_key:
+        # Cache result for 24 hours
+        redis.setex(f"idempotency:{idempotency_key}", 86400, result)
+
+    return result
+```
+
+## API Evolution Patterns
+
+### Adding Fields (Non-Breaking)
+
+**✅ Safe changes**:
+- Add optional request fields
+- Add response fields
+- Add new endpoints
+- Add new query parameters
+
+**Client requirements**: Ignore unknown fields
+
+### Removing Fields (Breaking)
+
+**Strategies**:
+1. **Deprecation period**: Mark field as deprecated, remove in next major version
+2. **Versioning**: Create v2 without field
+3. **Optional → Required**: Never safe, always breaking
+
+### Changing Field Types (Breaking)
+
+**❌ Breaking**:
+- String → Number
+- Number → String
+- Boolean → String
+- Flat → Nested object
+
+**✅ Non-breaking**:
+- Number → String (if client coerces)
+- Adding nullability (required → optional)
+
+**Strategy**: Add new field with correct type, deprecate old field
+
+## Richardson Maturity Model
+
+| Level | Description | Example |
+|-------|-------------|---------|
+| 0 | POX (Plain Old XML) | Single endpoint, all operations via POST |
+| 1 | Resources | Multiple endpoints, still using POST for everything |
+| 2 | HTTP Verbs | Proper HTTP methods (GET, POST, PUT, DELETE) |
+| 3 | Hypermedia (HATEOAS) | Responses include links to related resources |
+
+**Most APIs target Level 2** (HTTP verbs + status codes).
+**Level 3 is optional** but valuable for complex domains.
+
+## Common Anti-Patterns
+
+| Anti-Pattern | Why Bad | Fix |
+|--------------|---------|-----|
+| Verbs in URLs (`/createUser`) | Not RESTful, redundant with HTTP methods | Use POST /users |
+| GET with side effects | Violates HTTP semantics, not safe | Use POST/PUT/DELETE |
+| POST for everything | Loses HTTP semantics, not idempotent | Use appropriate method |
+| 200 for errors | Breaks HTTP contract | Use correct 4xx/5xx codes |
+| Deeply nested URLs | Hard to navigate, brittle | Max 2 levels, use query params |
+| Binary response flags | Unclear semantics | Use proper HTTP status codes |
+| Timestamps without timezone | Ambiguous | Use ISO 8601 with timezone |
+| Pagination without total | Can't show "Page X of Y" | Include total count or hasMore |
+
+## Best Practices Checklist
+
+**Resource Design**:
+- [ ] Resources are nouns, not verbs
+- [ ] Plural names for collections
+- [ ] Max 2 levels of nesting
+- [ ] Consistent naming conventions (snake_case or camelCase)
+
+**HTTP Semantics**:
+- [ ] Correct HTTP methods for operations
+- [ ] Proper status codes (not just 200/500)
+- [ ] Idempotent operations are actually idempotent
+- [ ] GET/HEAD have no side effects
+
+**API Evolution**:
+- [ ] Versioning strategy defined
+- [ ] Backward compatibility maintained within version
+- [ ] Deprecation headers for sunset features
+- [ ] Migration guides for breaking changes
+
+**Error Handling**:
+- [ ] Consistent error response format
+- [ ] Detailed field-level validation errors
+- [ ] Request IDs for tracing
+- [ ] Human-readable error messages
+
+**Performance**:
+- [ ] Pagination for large collections
+- [ ] ETags for caching
+- [ ] Gzip compression enabled
+- [ ] Rate limiting implemented
+
+## Cross-References
+
+**Related skills**:
+- **GraphQL alternative** → `graphql-api-design`
+- **FastAPI implementation** → `fastapi-development`
+- **Django implementation** → `django-development`
+- **Express implementation** → `express-development`
+- **Authentication** → `api-authentication`
+- **API testing** → `api-testing`
+- **API documentation** → `api-documentation` or `muna-technical-writer`
+- **Security** → `ordis-security-architect` (OWASP API Security)
+
+## Further Reading
+
+- **REST Dissertation**: Roy Fielding's original thesis
+- **RFC 7807**: Problem Details for HTTP APIs
+- **RFC 8594**: Sunset HTTP Header
+- **JSON:API**: Opinionated REST specification
+- **OpenAPI 3.0**: API documentation standard