Initial commit
This commit is contained in:
@@ -0,0 +1,135 @@
|
||||
{
|
||||
"name": "Architecture Decision Record Quality Rubric",
|
||||
"scale": {
|
||||
"min": 1,
|
||||
"max": 5,
|
||||
"description": "1=Poor, 2=Fair, 3=Good, 4=Very Good, 5=Excellent"
|
||||
},
|
||||
"criteria": [
|
||||
{
|
||||
"name": "Context Clarity",
|
||||
"description": "Context section clearly explains WHY this decision is needed, without proposing solutions",
|
||||
"scoring": {
|
||||
"1": "No context or context is vague/unhelpful",
|
||||
"2": "Some context but missing key requirements or constraints",
|
||||
"3": "Context explains situation with main requirements/constraints",
|
||||
"4": "Comprehensive context with background, requirements, and constraints",
|
||||
"5": "Exceptional context that future readers with no knowledge can fully understand"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Decision Specificity",
|
||||
"description": "Decision statement is specific, actionable, and unambiguous",
|
||||
"scoring": {
|
||||
"1": "Vague or no clear decision stated",
|
||||
"2": "Decision stated but lacks specifics (versions, scope, approach)",
|
||||
"3": "Decision is clear with main specifics",
|
||||
"4": "Decision is very specific with technical details and scope",
|
||||
"5": "Exceptionally detailed decision with configuration, versions, scope, and implementation approach"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Alternatives Quality",
|
||||
"description": "Real alternatives documented with honest, balanced pros/cons",
|
||||
"scoring": {
|
||||
"1": "No alternatives or only straw man options",
|
||||
"2": "1-2 alternatives but unfairly presented or minimal analysis",
|
||||
"3": "2-3 alternatives with basic pros/cons",
|
||||
"4": "3+ alternatives with honest, balanced analysis and specific reasons not chosen",
|
||||
"5": "Multiple well-researched alternatives with nuanced trade-offs and fair representation"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Consequence Honesty",
|
||||
"description": "Consequences include both benefits AND drawbacks with realistic assessment",
|
||||
"scoring": {
|
||||
"1": "Only benefits listed or consequences are vague",
|
||||
"2": "Mostly benefits with token mention of downsides",
|
||||
"3": "Balanced benefits and drawbacks but somewhat general",
|
||||
"4": "Honest assessment of benefits, drawbacks, and risks with specifics",
|
||||
"5": "Exceptionally honest and nuanced consequences with quantified trade-offs and mitigation strategies"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Technical Accuracy",
|
||||
"description": "Technical details are accurate, current, and specific",
|
||||
"scoring": {
|
||||
"1": "Technical errors or outdated information",
|
||||
"2": "Some technical details but lacking accuracy or currency",
|
||||
"3": "Technically sound with accurate information",
|
||||
"4": "High technical accuracy with specific versions, configurations, and current best practices",
|
||||
"5": "Exceptional technical depth with precise details, performance characteristics, and expert-level accuracy"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Future Comprehension",
|
||||
"description": "Someone unfamiliar with current context can understand the decision",
|
||||
"scoring": {
|
||||
"1": "Requires insider knowledge to understand",
|
||||
"2": "Some context but many gaps for outsiders",
|
||||
"3": "Mostly understandable with some background",
|
||||
"4": "Clear to future readers with minimal context needed",
|
||||
"5": "Perfectly self-contained; any future reader can fully understand"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Trade-off Transparency",
|
||||
"description": "Trade-offs are explicitly stated and downsides acknowledged",
|
||||
"scoring": {
|
||||
"1": "No acknowledgment of trade-offs or downsides",
|
||||
"2": "Minimal mention of trade-offs",
|
||||
"3": "Trade-offs mentioned but not deeply explored",
|
||||
"4": "Clear articulation of trade-offs and what's being sacrificed",
|
||||
"5": "Exceptional transparency about trade-offs with explicit acceptance of costs"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Structure and Organization",
|
||||
"description": "ADR follows standard structure and is well-organized",
|
||||
"scoring": {
|
||||
"1": "No clear structure or missing major sections",
|
||||
"2": "Basic structure but disorganized or incomplete sections",
|
||||
"3": "Follows standard ADR format with all key sections",
|
||||
"4": "Well-organized with clear sections and good flow",
|
||||
"5": "Exemplary structure with logical flow, clear headings, and easy navigation"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Actionability",
|
||||
"description": "Decision is implementable; clear what to do next",
|
||||
"scoring": {
|
||||
"1": "Not clear what action to take",
|
||||
"2": "General direction but unclear how to implement",
|
||||
"3": "Clear decision that can be implemented",
|
||||
"4": "Actionable decision with implementation guidance",
|
||||
"5": "Exceptionally actionable with rollout plan, success criteria, and next steps"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "Appropriate Scope",
|
||||
"description": "ADR covers one decision at appropriate level of detail",
|
||||
"scoring": {
|
||||
"1": "Too broad (multiple unrelated decisions) or too narrow (trivial)",
|
||||
"2": "Scope issues but decision is identifiable",
|
||||
"3": "Appropriate scope for a single significant decision",
|
||||
"4": "Well-scoped decision with clear boundaries",
|
||||
"5": "Perfect scope; focused on one decision with appropriate detail level"
|
||||
}
|
||||
}
|
||||
],
|
||||
"overall_assessment": {
|
||||
"thresholds": {
|
||||
"excellent": "Average score ≥ 4.5 (high-stakes decisions should aim for this)",
|
||||
"very_good": "Average score ≥ 4.0 (most ADRs should achieve this)",
|
||||
"good": "Average score ≥ 3.5 (minimum for acceptance)",
|
||||
"acceptable": "Average score ≥ 3.0 (needs improvement but usable)",
|
||||
"needs_rework": "Average score < 3.0 (should be revised before finalizing)"
|
||||
},
|
||||
"decision_stakes_guidance": {
|
||||
"low_stakes": "Reversible decisions, low cost to change: aim for ≥ 3.5",
|
||||
"medium_stakes": "Some migration cost, affects multiple teams: aim for ≥ 4.0",
|
||||
"high_stakes": "Expensive to reverse, organization-wide impact: aim for ≥ 4.5"
|
||||
}
|
||||
},
|
||||
"usage_instructions": "Rate each criterion independently on 1-5 scale. Calculate average score. For high-stakes decisions (affecting entire organization, expensive to reverse), aim for ≥4.5 average. For medium-stakes decisions, aim for ≥4.0. Minimum acceptable score is 3.5. Identify lowest-scoring criteria and improve those sections before delivering to user."
|
||||
}
|
||||
285
skills/adr-architecture/resources/examples/database-selection.md
Normal file
285
skills/adr-architecture/resources/examples/database-selection.md
Normal file
@@ -0,0 +1,285 @@
|
||||
# ADR-042: Use PostgreSQL for Primary Application Database
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2024-01-15
|
||||
**Deciders:** Backend team (Sarah, James, Alex), CTO (Michael), DevOps lead (Christine)
|
||||
**Related ADRs:** ADR-015 (Data Model Design), ADR-051 (Read Replica Strategy - pending)
|
||||
|
||||
## Context
|
||||
|
||||
### Background
|
||||
Our new SaaS platform for project management is scheduled to launch Q2 2024. We need to select a primary database that will store user data, projects, tasks, and collaboration information for the next 3-5 years.
|
||||
|
||||
Current situation:
|
||||
- Prototype uses SQLite (clearly insufficient for production)
|
||||
- Expected launch: 500 organizations, ~5,000 users
|
||||
- Growth projection: 10,000 organizations, ~100,000 users within 18 months
|
||||
- Data model is relational with complex queries (projects → tasks → subtasks → comments → attachments)
|
||||
|
||||
### Requirements
|
||||
|
||||
**Functional:**
|
||||
- Support for complex relational queries with JOINs across 4-6 tables
|
||||
- ACID transactions (critical for billing and permissions)
|
||||
- Full-text search across project content
|
||||
- JSON support for flexible metadata fields
|
||||
- Row-level security for multi-tenant isolation
|
||||
|
||||
**Non-Functional:**
|
||||
- Handle 10,000 QPS at launch (mostly reads)
|
||||
- < 100ms p95 latency for queries
|
||||
- 99.9% uptime SLA
|
||||
- Support for read replicas (anticipated need at 50k+ QPS)
|
||||
- Point-in-time recovery for disaster recovery
|
||||
|
||||
### Constraints
|
||||
- Budget: $5,000/month maximum for database infrastructure
|
||||
- Team expertise: Strong SQL experience, limited NoSQL experience
|
||||
- Timeline: Must finalize in 2 weeks to stay on schedule
|
||||
- Compliance: SOC 2 Type II required (data encryption at rest/transit)
|
||||
- Existing stack: Node.js backend, React frontend, deploying on AWS
|
||||
|
||||
## Decision
|
||||
|
||||
We will use **PostgreSQL 15+** as our primary application database, hosted on AWS RDS with the following configuration:
|
||||
|
||||
**Infrastructure:**
|
||||
- AWS RDS PostgreSQL 15.x
|
||||
- Initially: db.r6g.xlarge instance (4 vCPU, 32GB RAM)
|
||||
- Multi-AZ deployment for high availability
|
||||
- Automated daily backups with 7-day retention
|
||||
- Point-in-time recovery enabled
|
||||
|
||||
**Architecture:**
|
||||
- Single primary database initially
|
||||
- Prepared for read replicas when QPS exceeds 40k (anticipated 12-18 months)
|
||||
- Connection pooling via PgBouncer (deployed on application servers)
|
||||
- Row-Level Security (RLS) policies for multi-tenancy
|
||||
|
||||
**Scope:**
|
||||
- All application data (users, organizations, projects, tasks)
|
||||
- Session storage (using pgSession)
|
||||
- Background job queue (using pg-boss)
|
||||
- Excludes: Analytics data (separate data warehouse), file metadata (DynamoDB)
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### MySQL 8.0
|
||||
**Description:** Popular open-source relational database, strong AWS RDS support
|
||||
|
||||
**Pros:**
|
||||
- Team has some MySQL experience
|
||||
- Excellent AWS RDS integration
|
||||
- Strong replication support
|
||||
- Lower cost than commercial databases
|
||||
|
||||
**Cons:**
|
||||
- Weaker JSON support compared to PostgreSQL (JSON functions less mature)
|
||||
- Less robust constraint enforcement (e.g., CHECK constraints)
|
||||
- Full-text search less powerful than PostgreSQL's
|
||||
- InnoDB row-level locking can be problematic under high concurrency
|
||||
|
||||
**Why not chosen:** PostgreSQL's superior JSON support is critical for our flexible metadata requirements. Our data model has complex constraints that PostgreSQL handles more elegantly.
|
||||
|
||||
### MongoDB Atlas
|
||||
**Description:** Managed NoSQL document database with flexible schema
|
||||
|
||||
**Pros:**
|
||||
- Excellent horizontal scalability
|
||||
- Flexible schema for evolving data model
|
||||
- Strong JSON/document support
|
||||
- Good full-text search
|
||||
|
||||
**Cons:**
|
||||
- No multi-document ACID transactions (critical for our billing logic)
|
||||
- Team has limited NoSQL experience (learning curve risk)
|
||||
- Eventual consistency model incompatible with our requirements
|
||||
- JOIN-like operations ($lookup) are slow and cumbersome
|
||||
- More expensive at our scale (~$7k/month vs $3k for PostgreSQL)
|
||||
|
||||
**Why not chosen:** Lack of ACID transactions across documents is a dealbreaker for billing and permission changes. Our relational data model doesn't fit document paradigm well.
|
||||
|
||||
### Amazon Aurora PostgreSQL
|
||||
**Description:** AWS's PostgreSQL-compatible database with performance enhancements
|
||||
|
||||
**Pros:**
|
||||
- PostgreSQL compatibility with AWS optimizations
|
||||
- Better read scaling (15 read replicas vs 5)
|
||||
- Faster failover (< 30s vs 60-120s)
|
||||
- Continuous backup to S3
|
||||
|
||||
**Cons:**
|
||||
- 20-30% more expensive than RDS PostgreSQL
|
||||
- Some PostgreSQL extensions not supported
|
||||
- Vendor lock-in to AWS (harder to migrate to other clouds)
|
||||
- Adds complexity we don't need yet
|
||||
|
||||
**Why not chosen:** Premium cost not justified at our current scale. Standard RDS PostgreSQL meets our needs. We can migrate to Aurora later if needed (minimal code changes).
|
||||
|
||||
### CockroachDB
|
||||
**Description:** Distributed SQL database with PostgreSQL compatibility
|
||||
|
||||
**Pros:**
|
||||
- Horizontal scalability built-in
|
||||
- Multi-region support for global deployment
|
||||
- PostgreSQL wire protocol compatibility
|
||||
- Strong consistency guarantees
|
||||
|
||||
**Cons:**
|
||||
- Significantly more complex to operate (distributed systems expertise needed)
|
||||
- Higher latency for single-region workloads (consensus overhead)
|
||||
- Limited ecosystem compared to PostgreSQL
|
||||
- Team has zero distributed database experience
|
||||
- More expensive (~2-3x cost of RDS PostgreSQL)
|
||||
|
||||
**Why not chosen:** Operational complexity far exceeds our current needs. We're a single-region deployment for the foreseeable future. Can revisit if we expand globally.
|
||||
|
||||
## Consequences
|
||||
|
||||
### Benefits
|
||||
|
||||
**Strong Data Integrity:**
|
||||
- ACID transactions ensure billing accuracy and permission consistency
|
||||
- Robust constraint enforcement catches data errors at write-time
|
||||
- Foreign keys prevent orphaned records
|
||||
|
||||
**Excellent Query Capabilities:**
|
||||
- Complex JOINs perform well with proper indexing
|
||||
- Window functions enable sophisticated analytics
|
||||
- CTEs (Common Table Expressions) simplify complex query logic
|
||||
- Full-text search with GIN indexes for project content search
|
||||
|
||||
**JSON Flexibility:**
|
||||
- JSONB type allows flexible metadata without schema migrations
|
||||
- JSON operators enable querying nested structures efficiently
|
||||
- Balances schema enforcement (relations) with flexibility (JSON)
|
||||
|
||||
**Team Productivity:**
|
||||
- Team's SQL expertise means fast development velocity
|
||||
- Mature ORM support (Sequelize, TypeORM) accelerates development
|
||||
- Extensive community resources and documentation
|
||||
- Familiar debugging and optimization tools
|
||||
|
||||
**Operational Maturity:**
|
||||
- AWS RDS handles backups, patching, monitoring automatically
|
||||
- Point-in-time recovery provides disaster recovery
|
||||
- Multi-AZ deployment ensures high availability
|
||||
- Well-understood scaling path (read replicas, connection pooling)
|
||||
|
||||
**Cost Efficiency:**
|
||||
- Estimated $3,000/month at launch scale (db.r6g.xlarge + storage)
|
||||
- Scales to ~$8,000/month with read replicas (at 100k users)
|
||||
- Well within $5k/month budget initially
|
||||
|
||||
### Drawbacks
|
||||
|
||||
**Vertical Scaling Limits:**
|
||||
- Single primary database limits write throughput to one instance
|
||||
- At ~50-60k QPS, will need read replicas (adds operational complexity)
|
||||
- Ultimate write limit around 100k QPS even with largest instance
|
||||
- Mitigation: Implement caching (Redis) for read-heavy workloads
|
||||
|
||||
**Sharding Complexity:**
|
||||
- Horizontal partitioning (sharding) is manual and complex
|
||||
- If we exceed single-instance limits, migration to sharded setup is expensive
|
||||
- Not as straightforward as DynamoDB or Cassandra for horizontal scaling
|
||||
- Mitigation: Monitor growth carefully; consider Aurora or CockroachDB if needed
|
||||
|
||||
**Replication Lag:**
|
||||
- Read replicas have eventual consistency (typically 10-100ms lag)
|
||||
- Application must handle stale reads if using replicas
|
||||
- Some queries must route to primary for consistency
|
||||
- Mitigation: Use replicas only for analytics and non-critical reads
|
||||
|
||||
**Backup Window:**
|
||||
- Automated backups cause brief I/O pause (usually < 5s)
|
||||
- Scheduled during low-traffic window (3-4 AM PST)
|
||||
- Multi-AZ deployment minimizes impact
|
||||
- Mitigation: Accept brief latency spike during backup window
|
||||
|
||||
### Risks
|
||||
|
||||
**Performance Bottleneck:**
|
||||
- **Risk:** Single database becomes bottleneck before we implement read replicas
|
||||
- **Likelihood:** Medium (depends on growth rate)
|
||||
- **Mitigation:** Implement aggressive caching (Redis) for frequently accessed data; monitor QPS weekly; prepare read replica configuration in advance
|
||||
|
||||
**Data Migration Challenges:**
|
||||
- **Risk:** If we need to migrate to different database, data size makes migration slow
|
||||
- **Likelihood:** Low (PostgreSQL should serve us for 3-5 years)
|
||||
- **Mitigation:** Regularly test backup/restore procedures; maintain clear data export processes
|
||||
|
||||
**Team Scaling:**
|
||||
- **Risk:** As team grows, need to train new hires on PostgreSQL specifics (RLS, JSONB)
|
||||
- **Likelihood:** High (we plan to grow team)
|
||||
- **Mitigation:** Document database patterns; create onboarding materials; conduct code reviews
|
||||
|
||||
### Trade-offs Accepted
|
||||
|
||||
**Trading horizontal scalability for operational simplicity:** We're choosing a database that's simple to operate now but harder to scale horizontally later, accepting that we may need to re-architect in 3-5 years if we grow beyond single-instance limits.
|
||||
|
||||
**Trading NoSQL flexibility for data integrity:** We're prioritizing ACID guarantees and relational integrity over schema flexibility, accepting that schema migrations will be required for data model changes.
|
||||
|
||||
**Trading vendor portability for convenience:** AWS RDS lock-in is acceptable given the operational benefits. We could migrate to other managed PostgreSQL services (Google Cloud SQL, Azure) if needed, though with effort.
|
||||
|
||||
## Implementation
|
||||
|
||||
### Rollout Plan
|
||||
|
||||
**Phase 1: Setup (Week 1-2)**
|
||||
- Provision AWS RDS PostgreSQL instance
|
||||
- Configure VPC security groups and IAM roles
|
||||
- Set up automated backups and monitoring
|
||||
- Configure PgBouncer connection pooling
|
||||
|
||||
**Phase 2: Migration (Week 3-4)**
|
||||
- Migrate schema from SQLite prototype
|
||||
- Load seed data and test data
|
||||
- Performance test with simulated load
|
||||
- Configure monitoring alerts (CloudWatch, Datadog)
|
||||
|
||||
**Phase 3: Launch (Q2 2024)**
|
||||
- Deploy to production
|
||||
- Monitor query performance and optimize slow queries
|
||||
- Weekly capacity review for first 3 months
|
||||
|
||||
### Success Criteria
|
||||
|
||||
**Technical:**
|
||||
- p95 query latency < 100ms (measured via APM)
|
||||
- Zero data integrity issues in first 6 months
|
||||
- 99.9% uptime achieved
|
||||
|
||||
**Operational:**
|
||||
- Team can confidently make schema changes
|
||||
- Backup/restore tested and verified monthly
|
||||
- On-call incidents < 2 per month related to database
|
||||
|
||||
**Business:**
|
||||
- Database costs remain under $5k/month through 10k users
|
||||
- Support 100k users without re-architecture
|
||||
|
||||
### Future Considerations
|
||||
|
||||
**Short-term (3-6 months):**
|
||||
- Implement Redis caching for hot data paths
|
||||
- Tune connection pool settings based on actual load
|
||||
- Create read-only database user for analytics
|
||||
|
||||
**Medium-term (6-18 months):**
|
||||
- Add read replicas when QPS exceeds 40k
|
||||
- Implement query result caching
|
||||
- Consider Aurora migration if cost-benefit justifies
|
||||
|
||||
**Long-term (18+ months):**
|
||||
- Evaluate sharding strategy if approaching single-instance limits
|
||||
- Consider multi-region deployment for global users
|
||||
- Explore specialized databases for specific workloads (e.g., time-series data)
|
||||
|
||||
## References
|
||||
|
||||
- [PostgreSQL 15 Release Notes](https://www.postgresql.org/docs/15/release-15.html)
|
||||
- [AWS RDS PostgreSQL Best Practices](https://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/CHAP_BestPractices.html)
|
||||
- [Internal: Database Performance Requirements Doc](https://docs.internal/db-requirements)
|
||||
- [Internal: Load Testing Results](https://docs.internal/load-test-2024-01)
|
||||
- [Benchmark: PostgreSQL vs MySQL JSON Performance](https://www.enterprisedb.com/postgres-tutorials/postgresql-vs-mysql-json-performance)
|
||||
325
skills/adr-architecture/resources/methodology.md
Normal file
325
skills/adr-architecture/resources/methodology.md
Normal file
@@ -0,0 +1,325 @@
|
||||
# ADR Methodology for Complex Decisions
|
||||
|
||||
## Complex ADR Workflow
|
||||
|
||||
Copy this checklist and track your progress:
|
||||
|
||||
```
|
||||
ADR Progress (Complex Decisions):
|
||||
- [ ] Step 1: Identify decision pattern and scope
|
||||
- [ ] Step 2: Conduct detailed analysis for each concern
|
||||
- [ ] Step 3: Engage stakeholders and gather input
|
||||
- [ ] Step 4: Build decision tree for related choices
|
||||
- [ ] Step 5: Perform quantitative analysis and create ADR
|
||||
```
|
||||
|
||||
**Step 1: Identify decision pattern and scope**
|
||||
|
||||
Determine which pattern applies to your decision (cascading, competing concerns, unknown unknowns, etc.). See [Complex Decision Patterns](#complex-decision-patterns) for patterns and approaches.
|
||||
|
||||
**Step 2: Conduct detailed analysis for each concern**
|
||||
|
||||
For each competing concern (security, scalability, cost, compliance), analyze how alternatives address it. See [Extended ADR Sections](#extended-adr-sections) for analysis templates.
|
||||
|
||||
**Step 3: Engage stakeholders and gather input**
|
||||
|
||||
Identify all affected parties and gather their perspectives systematically. See [Stakeholder Management](#stakeholder-management) for mapping and engagement techniques.
|
||||
|
||||
**Step 4: Build decision tree for related choices**
|
||||
|
||||
Map out cascading or interdependent decisions. See [Decision Trees for Related Choices](#decision-trees-for-related-choices) for structuring related ADRs.
|
||||
|
||||
**Step 5: Perform quantitative analysis and create ADR**
|
||||
|
||||
Use scoring matrices, cost modeling, or load testing to support decision. See [Quantitative Analysis](#quantitative-analysis) for methods and examples.
|
||||
|
||||
## Complex Decision Patterns
|
||||
|
||||
### Pattern 1: Cascading Decisions
|
||||
|
||||
When one architectural choice forces or constrains subsequent decisions.
|
||||
|
||||
**Approach:**
|
||||
1. Create primary ADR for the main architectural decision
|
||||
2. Create child ADRs for cascading decisions, referencing parent
|
||||
3. Use "Related ADRs" field to link the chain
|
||||
|
||||
**Example:**
|
||||
- ADR-100: Adopt Microservices Architecture (parent)
|
||||
- ADR-101: Use gRPC for Inter-Service Communication (child - follows from ADR-100)
|
||||
- ADR-102: Implement Service Mesh with Istio (child - follows from ADR-101)
|
||||
|
||||
### Pattern 2: Competing Concerns
|
||||
|
||||
When decision must balance multiple competing priorities (cost vs performance, security vs usability).
|
||||
|
||||
**Approach:**
|
||||
Add **Analysis Sections** to standard ADR:
|
||||
|
||||
```markdown
|
||||
## Detailed Analysis
|
||||
|
||||
### Security Analysis
|
||||
{How each alternative addresses security requirements}
|
||||
|
||||
### Performance Analysis
|
||||
{Benchmarks, load tests, scalability projections}
|
||||
|
||||
### Cost Analysis
|
||||
{TCO over 3 years, including hidden costs}
|
||||
|
||||
### Operational Complexity Analysis
|
||||
{Team skill requirements, monitoring needs, on-call burden}
|
||||
```
|
||||
|
||||
### Pattern 3: Phased Decisions
|
||||
|
||||
When full solution is too complex to decide at once; need to make interim decision.
|
||||
|
||||
**Approach:**
|
||||
1. Create ADR for Phase 1 decision
|
||||
2. Add "Future Decisions" section listing what's deferred
|
||||
3. Set review date to revisit (e.g., "Review in 6 months")
|
||||
|
||||
**Example:**
|
||||
```markdown
|
||||
## Decision (Phase 1)
|
||||
Start with managed PostgreSQL on RDS. Evaluate sharding vs Aurora vs NewSQL in 12 months.
|
||||
|
||||
## Future Decisions Needed
|
||||
- ADR-XXX: Horizontal scaling strategy (by Q3 2025)
|
||||
- ADR-XXX: Multi-region deployment approach (by Q4 2025)
|
||||
```
|
||||
|
||||
## Extended ADR Sections
|
||||
|
||||
### When to Add Detailed Analysis Sections
|
||||
|
||||
**Security Analysis** - Add when:
|
||||
- Decision affects authentication, authorization, or data protection
|
||||
- Compliance requirements involved (SOC2, HIPAA, GDPR)
|
||||
- Handling sensitive data
|
||||
|
||||
**Performance Analysis** - Add when:
|
||||
- SLA commitments at stake
|
||||
- Significant performance differences between alternatives
|
||||
- Scalability is critical concern
|
||||
|
||||
**Cost Analysis** - Add when:
|
||||
- Multi-year TCO differs significantly (>20%) between alternatives
|
||||
- Hidden costs exist (operational overhead, training, vendor lock-in)
|
||||
- Budget constraints are tight
|
||||
|
||||
**Operational Complexity Analysis** - Add when:
|
||||
- Team skill gaps exist for some alternatives
|
||||
- On-call burden varies significantly
|
||||
- Monitoring/debugging complexity differs
|
||||
|
||||
**Migration Analysis** - Add when:
|
||||
- Replacing existing system
|
||||
- Need to maintain backward compatibility
|
||||
- Rollback strategy is complex
|
||||
|
||||
### Template for Extended Sections
|
||||
|
||||
```markdown
|
||||
## Security Analysis
|
||||
|
||||
### {Alternative A}
|
||||
- **Threat model**: {What threats does this mitigate?}
|
||||
- **Attack surface**: {What new vulnerabilities introduced?}
|
||||
- **Compliance**: {How does this meet regulatory requirements?}
|
||||
- **Score**: {1-5 rating}
|
||||
|
||||
### {Alternative B}
|
||||
{Same structure}
|
||||
|
||||
### Security Recommendation
|
||||
{Which alternative is strongest on security, and any mitigations needed}
|
||||
```
|
||||
|
||||
## Stakeholder Management
|
||||
|
||||
### Identifying Stakeholders
|
||||
|
||||
**Technical stakeholders:**
|
||||
- Engineering teams affected by the decision
|
||||
- DevOps/SRE teams who will operate the solution
|
||||
- Security team for compliance/security decisions
|
||||
- Architecture review board (if exists)
|
||||
|
||||
**Business stakeholders:**
|
||||
- Product managers (feature impact)
|
||||
- Finance (budget implications)
|
||||
- Legal/compliance (regulatory requirements)
|
||||
- Executive sponsors (strategic alignment)
|
||||
|
||||
### Getting Input
|
||||
|
||||
**Pre-ADR phase:**
|
||||
1. Conduct stakeholder interviews to gather requirements and constraints
|
||||
2. Share draft alternatives for early feedback
|
||||
3. Identify concerns and dealbreakers
|
||||
|
||||
**ADR draft phase:**
|
||||
1. Share draft ADR with key stakeholders for review
|
||||
2. Hold working session to discuss controversial points
|
||||
3. Revise based on feedback
|
||||
|
||||
**Approval phase:**
|
||||
1. Present ADR at architecture review (if applicable)
|
||||
2. Get sign-off from decision-makers
|
||||
3. Communicate decision to broader team
|
||||
|
||||
### Recording Stakeholder Positions
|
||||
|
||||
For controversial decisions, add:
|
||||
|
||||
```markdown
|
||||
## Stakeholder Positions
|
||||
|
||||
**Backend Team (Support):**
|
||||
"PostgreSQL aligns with our SQL expertise and provides ACID guarantees we need."
|
||||
|
||||
**DevOps Team (Concerns):**
|
||||
"Concerned about operational complexity of read replicas. Request 2-week training before launch."
|
||||
|
||||
**Finance (Neutral):**
|
||||
"Within budget. Request quarterly cost reviews to ensure no overruns."
|
||||
```
|
||||
|
||||
## Decision Trees for Related Choices
|
||||
|
||||
When decision involves multiple related questions, use decision tree approach.
|
||||
|
||||
**Example: Cloud Provider + Database Decision**
|
||||
|
||||
```
|
||||
Q1: Which cloud provider?
|
||||
├─ AWS
|
||||
│ ├─ Q2: Which database?
|
||||
│ │ ├─ RDS PostgreSQL → ADR-042
|
||||
│ │ ├─ Aurora → ADR-043
|
||||
│ │ └─ DynamoDB → ADR-044
|
||||
│
|
||||
├─ GCP
|
||||
│ └─ Q2: Which database?
|
||||
│ ├─ Cloud SQL → ADR-045
|
||||
│ └─ Spanner → ADR-046
|
||||
```
|
||||
|
||||
**Approach:**
|
||||
1. Create ADR for Q1 (cloud provider selection)
|
||||
2. Create separate ADRs for Q2 based on Q1 outcome
|
||||
3. Link ADRs using "Related ADRs" field
|
||||
|
||||
## Quantitative Analysis
|
||||
|
||||
### Cost-Benefit Matrix
|
||||
|
||||
For decisions with measurable trade-offs:
|
||||
|
||||
| Alternative | Setup Cost | Annual Cost | Performance (QPS) | Team Velocity Impact | Risk Score |
|
||||
|-------------|-----------|-------------|-------------------|---------------------|------------|
|
||||
| PostgreSQL | $10k | $36k | 50k | +10% (familiar) | Low |
|
||||
| MongoDB | $15k | $84k | 100k | -20% (learning) | Medium |
|
||||
| DynamoDB | $5k | $60k | 200k | -15% (new patterns) | Medium |
|
||||
|
||||
### Decision Matrix with Weighted Criteria
|
||||
|
||||
When multiple factors matter with different importance:
|
||||
|
||||
```markdown
|
||||
## Weighted Decision Matrix
|
||||
|
||||
Criteria weights:
|
||||
- Performance: 30%
|
||||
- Cost: 25%
|
||||
- Team Expertise: 20%
|
||||
- Operational Simplicity: 15%
|
||||
- Ecosystem Maturity: 10%
|
||||
|
||||
| Alternative | Performance | Cost | Team | Ops | Ecosystem | Weighted Score |
|
||||
|-------------|-------------|------|------|-----|-----------|----------------|
|
||||
| PostgreSQL | 7/10 | 9/10 | 10/10| 8/10| 10/10 | **8.35** |
|
||||
| MongoDB | 9/10 | 6/10 | 5/10 | 7/10| 8/10 | 7.10 |
|
||||
| DynamoDB | 10/10 | 7/10 | 4/10 | 9/10| 7/10 | 7.50 |
|
||||
|
||||
PostgreSQL scores highest on weighted criteria.
|
||||
```
|
||||
|
||||
### Scenario Analysis
|
||||
|
||||
For decisions under uncertainty, model different futures:
|
||||
|
||||
```markdown
|
||||
## Scenario Analysis
|
||||
|
||||
### Scenario 1: Rapid Growth (3x projections)
|
||||
- PostgreSQL: Need expensive scaling (Aurora + sharding), $150k/yr
|
||||
- DynamoDB: Handles easily, $120k/yr
|
||||
- **Winner**: DynamoDB
|
||||
|
||||
### Scenario 2: Moderate Growth (1.5x projections)
|
||||
- PostgreSQL: Read replicas sufficient, $60k/yr
|
||||
- DynamoDB: Overprovisioned, $90k/yr
|
||||
- **Winner**: PostgreSQL
|
||||
|
||||
### Scenario 3: Slow Growth (0.8x projections)
|
||||
- PostgreSQL: Single instance sufficient, $40k/yr
|
||||
- DynamoDB: Low usage still requires min provision, $70k/yr
|
||||
- **Winner**: PostgreSQL
|
||||
|
||||
**Assessment**: PostgreSQL wins in 2 of 3 scenarios. Given our conservative growth estimates, PostgreSQL is safer bet.
|
||||
```
|
||||
|
||||
## Review and Update Process
|
||||
|
||||
### When to Review ADRs
|
||||
|
||||
**Scheduled reviews:**
|
||||
- High-stakes decisions: Review after 6 months
|
||||
- Medium-stakes: Review after 12 months
|
||||
- Check if consequences matched reality
|
||||
|
||||
**Triggered reviews:**
|
||||
- Major change in context (team size, scale, requirements)
|
||||
- Significant problems attributed to decision
|
||||
- New technology emerges that changes trade-offs
|
||||
|
||||
### How to Update ADRs
|
||||
|
||||
**Never edit old ADRs.** Instead:
|
||||
|
||||
1. Create new ADR that supersedes the old one
|
||||
2. Update old ADR status to "Superseded by ADR-XXX"
|
||||
3. New ADR should reference old one and explain what changed
|
||||
|
||||
**Example:**
|
||||
```markdown
|
||||
# ADR-099: Migrate from PostgreSQL to CockroachDB
|
||||
|
||||
**Status:** Accepted
|
||||
**Date:** 2026-03-15
|
||||
**Supersedes:** ADR-042 (PostgreSQL decision)
|
||||
|
||||
## Context
|
||||
ADR-042 chose PostgreSQL in 2024 when we had 5k users. We now have 500k users across 8 regions. PostgreSQL sharding has become operationally complex...
|
||||
|
||||
## What Changed
|
||||
- Scale increased 100x beyond projections
|
||||
- Multi-region deployment now required for latency
|
||||
- Team size grew from 5 to 40 engineers (distributed systems expertise available)
|
||||
...
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
For complex decisions:
|
||||
- Break into multiple ADRs if needed (use cascading pattern)
|
||||
- Add detailed analysis sections for critical factors
|
||||
- Engage stakeholders early and document positions
|
||||
- Use quantitative analysis (matrices, scenarios) to support intuition
|
||||
- Plan for review and evolution over time
|
||||
|
||||
Remember: The best ADR is the one that helps future teammates understand "why" when reading it 2 years later.
|
||||
317
skills/adr-architecture/resources/template.md
Normal file
317
skills/adr-architecture/resources/template.md
Normal file
@@ -0,0 +1,317 @@
|
||||
# ADR Template - Standard Format
|
||||
|
||||
## Workflow
|
||||
|
||||
Copy this checklist and track your progress:
|
||||
|
||||
```
|
||||
ADR Creation Progress:
|
||||
- [ ] Step 1: Gather decision context and requirements
|
||||
- [ ] Step 2: Fill in template structure
|
||||
- [ ] Step 3: Document alternatives with pros/cons
|
||||
- [ ] Step 4: Analyze consequences honestly
|
||||
- [ ] Step 5: Validate with quality checklist
|
||||
```
|
||||
|
||||
**Step 1: Gather decision context and requirements**
|
||||
|
||||
Collect information on what decision needs to be made, why now, requirements (functional/non-functional), constraints (budget, timeline, skills, compliance), and scope. This becomes your Context section.
|
||||
|
||||
**Step 2: Fill in template structure**
|
||||
|
||||
Use [Quick Template](#quick-template) below to create ADR file with title (ADR-{NUMBER}: Decision), metadata (status, date, deciders), and sections for context, decision, alternatives, and consequences.
|
||||
|
||||
**Step 3: Document alternatives with pros/cons**
|
||||
|
||||
List 2-3+ real alternatives that were seriously considered. For each: description, pros (2-4 benefits), cons (2-4 drawbacks), and specific reason not chosen. See [Alternatives Considered](#alternatives-considered) guidance.
|
||||
|
||||
**Step 4: Analyze consequences honestly**
|
||||
|
||||
Document benefits, drawbacks, risks, and trade-offs accepted. Every decision has downsides - be honest about them and note mitigation strategies. See [Consequences](#consequences) guidance for structure.
|
||||
|
||||
**Step 5: Validate with quality checklist**
|
||||
|
||||
Use [Quality Checklist](#quality-checklist) to verify: context explains WHY, decision is specific/actionable, 2-3+ alternatives documented, consequences include benefits AND drawbacks, technical details accurate, future readers can understand without context.
|
||||
|
||||
## Quick Template
|
||||
|
||||
Copy this structure to create your ADR:
|
||||
|
||||
```markdown
|
||||
# ADR-{NUMBER}: {Decision Title in Title Case}
|
||||
|
||||
**Status:** {Proposed | Accepted | Deprecated | Superseded}
|
||||
**Date:** {YYYY-MM-DD}
|
||||
**Deciders:** {List people/teams involved in decision}
|
||||
**Related ADRs:** {Links to related ADRs, if any}
|
||||
|
||||
## Context
|
||||
|
||||
{Describe the situation, problem, or opportunity that necessitates this decision}
|
||||
|
||||
**Background:**
|
||||
{What led to this decision being needed?}
|
||||
|
||||
**Requirements:**
|
||||
{What functional/non-functional requirements must be met?}
|
||||
|
||||
**Constraints:**
|
||||
{What limitations exist? Budget, timeline, skills, compliance, technical debt, etc.}
|
||||
|
||||
## Decision
|
||||
|
||||
{State clearly what you're choosing to do}
|
||||
|
||||
{Be specific and actionable. Include:}
|
||||
- {What technology/approach/standard is being adopted}
|
||||
- {What version or configuration, if relevant}
|
||||
- {What scope this applies to (one service, entire system, etc.)}
|
||||
|
||||
## Alternatives Considered
|
||||
|
||||
### Option A: {Name}
|
||||
**Description:** {Brief description}
|
||||
**Pros:**
|
||||
- {Benefit 1}
|
||||
- {Benefit 2}
|
||||
|
||||
**Cons:**
|
||||
- {Drawback 1}
|
||||
- {Drawback 2}
|
||||
|
||||
**Why not chosen:** {Specific reason}
|
||||
|
||||
### Option B: {Name}
|
||||
{Same structure}
|
||||
|
||||
### Option C: {Name}
|
||||
{Same structure}
|
||||
|
||||
*Note: Include at least 2-3 real alternatives that were seriously considered*
|
||||
|
||||
## Consequences
|
||||
|
||||
### Benefits
|
||||
- **{Benefit category}**: {Specific benefit and why it matters}
|
||||
- **{Benefit category}**: {Specific benefit and why it matters}
|
||||
|
||||
### Drawbacks
|
||||
- **{Drawback category}**: {Specific cost/limitation and mitigation if any}
|
||||
- **{Drawback category}**: {Specific cost/limitation and mitigation if any}
|
||||
|
||||
### Risks
|
||||
- **{Risk}**: {Likelihood and mitigation plan}
|
||||
|
||||
### Trade-offs Accepted
|
||||
{What are we explicitly trading off? E.g., "Trading development speed for operational simplicity"}
|
||||
|
||||
## Implementation
|
||||
|
||||
{Optional section - include if implementation details are important}
|
||||
|
||||
**Rollout Plan:**
|
||||
{How will this be deployed/adopted?}
|
||||
|
||||
**Migration Path:**
|
||||
{If replacing something, how do we transition?}
|
||||
|
||||
**Timeline:**
|
||||
{Key milestones and dates}
|
||||
|
||||
**Success Criteria:**
|
||||
{How will we know this decision was right?}
|
||||
|
||||
## References
|
||||
|
||||
{Links to:}
|
||||
- {Technical documentation}
|
||||
- {Benchmarks or research}
|
||||
- {Related discussions or RFCs}
|
||||
- {Vendor documentation}
|
||||
```
|
||||
|
||||
## Field-by-Field Guidance
|
||||
|
||||
### Title
|
||||
- Format: `ADR-{NUMBER}: {Short Decision Summary}`
|
||||
- Number: Sequential, usually 001, 002, etc.
|
||||
- Summary: One line, actionable (e.g., "Use PostgreSQL for Primary Database", not "Database Choice")
|
||||
|
||||
### Status
|
||||
- **Proposed**: Under discussion, not yet adopted
|
||||
- **Accepted**: Decision is final and being implemented
|
||||
- **Deprecated**: No longer recommended (but still in use)
|
||||
- **Superseded**: Replaced by another ADR (link to it)
|
||||
|
||||
### Context
|
||||
**Purpose**: Help future readers understand WHY this decision was necessary
|
||||
|
||||
**Include:**
|
||||
- What problem/opportunity triggered this?
|
||||
- What are the business/technical drivers?
|
||||
- What requirements must be met?
|
||||
- What constraints limit options?
|
||||
|
||||
**Don't include:**
|
||||
- Solutions (those go in Decision section)
|
||||
- Analysis of options (that goes in Alternatives)
|
||||
|
||||
**Length**: 2-4 paragraphs typically
|
||||
|
||||
**Example:**
|
||||
> Our current monolithic application is becoming difficult to scale and deploy. Deploys take 45 minutes and require full system downtime. Teams are blocked on each other's changes. We need to support 10x traffic growth in the next year.
|
||||
>
|
||||
> Requirements: Independent deployment, horizontal scaling, fault isolation, team autonomy.
|
||||
> Constraints: Team has limited Kubernetes experience, must complete migration in 6 months, budget allows 20% infrastructure cost increase.
|
||||
|
||||
### Decision
|
||||
**Purpose**: State clearly and specifically what you're doing
|
||||
|
||||
**Include:**
|
||||
- Specific technology/approach (with version if relevant)
|
||||
- Configuration or implementation approach
|
||||
- Scope of application
|
||||
|
||||
**Don't:**
|
||||
- Justify (that's in Consequences)
|
||||
- Compare (that's in Alternatives)
|
||||
- Be vague ("use the best tool")
|
||||
|
||||
**Length**: 1-3 paragraphs
|
||||
|
||||
**Example:**
|
||||
> We will adopt a microservices architecture using:
|
||||
> - Kubernetes (v1.28+) for orchestration
|
||||
> - gRPC for inter-service communication
|
||||
> - PostgreSQL databases (one per service where needed)
|
||||
> - Shared API gateway (Kong) for external traffic
|
||||
>
|
||||
> Scope: All new services and existing services as they require significant changes. No forced migration of stable services.
|
||||
|
||||
### Alternatives Considered
|
||||
**Purpose**: Show other options were evaluated seriously (prevents "we should have considered X")
|
||||
|
||||
**Include:**
|
||||
- 2-4 real alternatives that were discussed
|
||||
- Honest pros/cons for each
|
||||
- Specific reason not chosen
|
||||
|
||||
**Don't:**
|
||||
- Include straw man options you never seriously considered
|
||||
- Unfairly present alternatives (be honest about their merits)
|
||||
- Omit major alternatives
|
||||
|
||||
**Format for each alternative:**
|
||||
- Name/summary
|
||||
- Brief description
|
||||
- 2-4 key pros
|
||||
- 2-4 key cons
|
||||
- Why not chosen (specific, not "just worse")
|
||||
|
||||
**Example:**
|
||||
> ### Continue with Monolith + Optimization
|
||||
> **Pros:**
|
||||
> - No migration cost or risk
|
||||
> - Team expertise is high
|
||||
> - Simpler operations
|
||||
>
|
||||
> **Cons:**
|
||||
> - Doesn't solve team coupling problem
|
||||
> - Still requires full-system deploys
|
||||
> - Scaling is all-or-nothing
|
||||
>
|
||||
> **Why not chosen:** Doesn't address fundamental team velocity and deployment issues that are our primary pain points.
|
||||
|
||||
### Consequences
|
||||
**Purpose**: Honest assessment of what this decision means long-term
|
||||
|
||||
**Include:**
|
||||
- Benefits (what we gain)
|
||||
- Drawbacks (what we lose or costs we incur)
|
||||
- Risks (what could go wrong)
|
||||
- Trade-offs (what we explicitly chose to sacrifice)
|
||||
|
||||
**Critical**: Be honest about downsides. Every decision has cons.
|
||||
|
||||
**Format:**
|
||||
- Group by category (performance, cost, team, operations, etc.)
|
||||
- Be specific (not "better performance" but "50% faster writes, 2x slower reads")
|
||||
- Note mitigation strategies for drawbacks where applicable
|
||||
|
||||
**Example:**
|
||||
> **Benefits:**
|
||||
> - **Team velocity**: Teams can deploy independently, 10min deploys vs 45min
|
||||
> - **Scalability**: Can scale hot services independently, expect 50% infrastructure cost reduction
|
||||
> - **Resilience**: Service failures are isolated, no cascading failures
|
||||
>
|
||||
> **Drawbacks:**
|
||||
> - **Operational complexity**: Managing 15+ services vs 1, need monitoring/tracing
|
||||
> - **Development overhead**: Network calls vs function calls, serialization costs
|
||||
> - **Data consistency**: Eventual consistency across services, need compensating transactions
|
||||
>
|
||||
> **Risks:**
|
||||
> - **Migration risk**: If migration takes >6mo, could end up with worst of both worlds
|
||||
> - **Team skill gap**: Need to train team on Kubernetes, distributed systems concepts
|
||||
>
|
||||
> **Trade-offs:**
|
||||
> Trading development simplicity for deployment flexibility and team autonomy.
|
||||
|
||||
## Quality Checklist
|
||||
|
||||
Before finalizing, verify:
|
||||
|
||||
- [ ] Title is clear and specific (not generic)
|
||||
- [ ] Status is set and accurate
|
||||
- [ ] Context explains WHY without proposing solutions
|
||||
- [ ] Decision is specific and actionable
|
||||
- [ ] At least 2-3 real alternatives are documented
|
||||
- [ ] Each alternative has honest pros/cons
|
||||
- [ ] Consequences include both benefits AND drawbacks
|
||||
- [ ] Risks are identified with mitigation where applicable
|
||||
- [ ] Technical details are accurate and specific
|
||||
- [ ] Future readers will understand context without asking around
|
||||
- [ ] No jargon without explanation
|
||||
- [ ] Trade-offs are explicitly acknowledged
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### Technology Selection ADR
|
||||
Focus on: capabilities vs requirements, performance characteristics, team expertise, operational complexity, ecosystem maturity
|
||||
|
||||
### Process/Standard ADR
|
||||
Focus on: enforcement mechanisms, exceptions, onboarding/training, examples, tooling support
|
||||
|
||||
### Migration ADR
|
||||
Focus on: rollout strategy, backward compatibility, rollback plan, success metrics, timeline
|
||||
|
||||
### Deprecation ADR
|
||||
Set Status: Deprecated or Superseded
|
||||
Include: Sunset timeline, migration path, superseding ADR link (if applicable)
|
||||
|
||||
## Examples
|
||||
|
||||
See `examples/` directory for complete examples:
|
||||
- `database-selection.md` - Technology choice
|
||||
- `api-versioning.md` - Standard/process decision
|
||||
- `microservices-migration.md` - Large architectural change
|
||||
|
||||
## Anti-Patterns to Avoid
|
||||
|
||||
**Vague context:**
|
||||
- Bad: "We need a better database"
|
||||
- Good: "Current MySQL instance hitting 80% CPU during peak load (5k QPS), queries taking >500ms"
|
||||
|
||||
**Non-specific decision:**
|
||||
- Bad: "Use microservices"
|
||||
- Good: "Migrate to microservices using Kubernetes 1.28+ with gRPC, starting with user service"
|
||||
|
||||
**Unfair alternatives:**
|
||||
- Bad: "MongoDB: bad for our use case, slow, unreliable"
|
||||
- Good: "MongoDB: Excellent for flexible schemas and horizontal scaling, but lacks multi-document ACID transactions we need for payments"
|
||||
|
||||
**Hiding downsides:**
|
||||
- Bad: "PostgreSQL will solve all our problems"
|
||||
- Good: "PostgreSQL provides ACID guarantees we need, but will require read replicas at >50k QPS and is harder to shard than DynamoDB"
|
||||
|
||||
**Too long:**
|
||||
- If ADR is >3 pages, consider splitting into multiple ADRs or creating separate design doc with ADR referencing it
|
||||
Reference in New Issue
Block a user