Initial commit

2025-11-30 08:38:26 +08:00
commit 41d9f6b189
304 changed files with 98322 additions and 0 deletions
--- a/skills/prioritization-effort-impact/resources/evaluators/rubric_prioritization_effort_impact.json
+++ b/skills/prioritization-effort-impact/resources/evaluators/rubric_prioritization_effort_impact.json
@@ -0,0 +1,360 @@
+{
+  "name": "Prioritization Effort-Impact Evaluator",
+  "description": "Evaluates prioritization artifacts (effort-impact matrices, roadmaps) for quality of scoring, stakeholder alignment, and decision clarity",
+  "criteria": [
+    {
+      "name": "Scoring Quality & Differentiation",
+      "weight": 1.4,
+      "scale": {
+        "1": "All items scored similarly (e.g., all 3s) with no differentiation, or scores appear random/unsupported",
+        "2": "Some differentiation but clustering around middle (2.5-3.5 range), limited use of full scale, weak rationale",
+        "3": "Moderate differentiation with scores using 1-5 range, basic rationale provided for most items, some bias evident",
+        "4": "Strong differentiation across full 1-5 scale, clear rationale for scores, stakeholder input documented, few items cluster at boundaries",
+        "5": "Exemplary differentiation with calibrated scoring (reference examples documented), transparent rationale for all items, bias mitigation techniques used (silent voting, forced ranking), no suspicious clustering"
+      },
+      "indicators": {
+        "excellent": [
+          "Scores use full 1-5 range with clear distribution (few 1s/5s, more 2s/4s)",
+          "Reference items documented for calibration (e.g., 'Effort=2 example: CSV export, 2 days')",
+          "Scoring rationale explicit for each item (why Effort=4, why Impact=3)",
+          "Stakeholder perspectives documented (eng estimated effort, sales estimated impact)",
+          "Bias mitigation used (silent voting, anonymous scoring before discussion)"
+        ],
+        "poor": [
+          "All scores 2.5-3.5 (no differentiation)",
+          "No rationale for why scores assigned",
+          "Single person scored everything alone",
+          "Scores don't match descriptions (called 'critical' but scored Impact=2)",
+          "Obvious optimism bias (everything is low effort, high impact)"
+        ]
+      }
+    },
+    {
+      "name": "Quadrant Classification Accuracy",
+      "weight": 1.3,
+      "scale": {
+        "1": "Items misclassified (e.g., Effort=5 Impact=2 called 'Quick Win'), or no quadrants identified at all",
+        "2": "Quadrants identified but boundaries unclear (what's 'high' vs 'low'?), some misclassifications",
+        "3": "Quadrants correctly identified with reasonable boundaries (e.g., >3.5 = high), minor edge cases unclear",
+        "4": "Clear quadrant boundaries documented, all items classified correctly, edge cases explicitly addressed",
+        "5": "Exemplary classification with explicit boundary definitions, items near boundaries re-evaluated, typical quadrant distribution validated (10-20% Quick Wins, not 50%)"
+      },
+      "indicators": {
+        "excellent": [
+          "Quadrant boundaries explicit (e.g., 'High Impact = ≥4, Low Effort = ≤2')",
+          "10-20% Quick Wins (realistic, not over-optimistic)",
+          "20-30% Big Bets (sufficient strategic work)",
+          "Time Sinks identified and explicitly cut/deferred",
+          "Items near boundaries (e.g., Effort=3, Impact=3) re-evaluated or called out as edge cases"
+        ],
+        "poor": [
+          "50%+ Quick Wins (unrealistic, likely miscalibrated)",
+          "0 Quick Wins (likely miscalibrated, overestimating effort)",
+          "No Time Sinks identified (probably hiding low-value work)",
+          "Boundaries undefined (unclear what 'high impact' means)",
+          "Items clearly misclassified (Effort=5 Impact=1 in roadmap as priority)"
+        ]
+      }
+    },
+    {
+      "name": "Stakeholder Alignment & Input Quality",
+      "weight": 1.2,
+      "scale": {
+        "1": "Single person created prioritization with no stakeholder input, or stakeholder disagreements unresolved",
+        "2": "Minimal stakeholder input (1-2 people), no documentation of how disagreements resolved",
+        "3": "Multiple stakeholders involved (eng, product, sales), basic consensus reached, some perspectives missing",
+        "4": "Diverse stakeholders (eng, product, sales, CS, design) contributed appropriately (eng on effort, sales on value), disagreements discussed and resolved, participants documented",
+        "5": "Exemplary stakeholder process with weighted input by expertise (eng estimates effort, sales estimates customer value), bias mitigation (silent voting, anonymous scoring), pre-mortem for controversial items, all participants and resolution process documented"
+      },
+      "indicators": {
+        "excellent": [
+          "Participants listed with roles (3 eng, 1 PM, 2 sales, 1 CS)",
+          "Expertise-based weighting (eng scores effort 100%, sales contributes to impact)",
+          "Bias mitigation documented (silent voting used, then discussion)",
+          "Disagreements surfaced and resolved (eng said Effort=5, product said 3, converged at 4 because...)",
+          "Pre-mortem or red-teaming for controversial/uncertain items"
+        ],
+        "poor": [
+          "No participant list (unclear who contributed)",
+          "PM scored everything alone",
+          "HIPPO (highest paid person) scores overrode team input with no discussion",
+          "Stakeholders disagree but no resolution documented",
+          "One function (e.g., only eng) scored both effort and impact"
+        ]
+      }
+    },
+    {
+      "name": "Roadmap Sequencing & Realism",
+      "weight": 1.3,
+      "scale": {
+        "1": "No roadmap created, or roadmap ignores quadrants (Time Sinks scheduled first), or plans 100%+ of capacity",
+        "2": "Roadmap exists but doesn't follow quadrant logic (Big Bets before Quick Wins), capacity planning missing or unrealistic",
+        "3": "Roadmap sequences Quick Wins → Big Bets, basic timeline, capacity roughly considered but not calculated",
+        "4": "Roadmap sequences correctly, timeline realistic with capacity calculated (team size × time), dependencies mapped, buffer included (70-80% utilization)",
+        "5": "Exemplary roadmap with Quick Wins first (momentum), Big Bets phased for incremental value, Fill-Ins opportunistic, Time Sinks explicitly cut with rationale, dependencies mapped with critical path identified, capacity buffer (20-30%), velocity-based forecasting"
+      },
+      "indicators": {
+        "excellent": [
+          "Phase 1: Quick Wins (Weeks 1-4) to build momentum",
+          "Phase 2: Big Bets (phased for incremental value, not monolithic)",
+          "Fill-Ins not scheduled explicitly (opportunistic during downtime)",
+          "Time Sinks explicitly rejected with rationale communicated",
+          "Dependencies mapped (item X depends on Y completing first)",
+          "Capacity buffer (planned 70-80% of capacity, not 100%)",
+          "Timeline realistic (effort scores × team size = weeks)"
+        ],
+        "poor": [
+          "No sequencing (items listed randomly)",
+          "Big Bets scheduled before Quick Wins (no momentum)",
+          "Time Sinks included in roadmap (low ROI items)",
+          "Planned at 100%+ capacity (no buffer for unknowns)",
+          "No timeline or unrealistic timeline (20 effort points in 1 week)",
+          "Dependencies ignored (dependent items scheduled in parallel)"
+        ]
+      }
+    },
+    {
+      "name": "Effort Scoring Rigor",
+      "weight": 1.1,
+      "scale": {
+        "1": "Effort scored on single dimension (time only) with no consideration of complexity, risk, dependencies, or scores are guesses with no rationale",
+        "2": "Effort considers time but inconsistently accounts for complexity/risk/dependencies, weak rationale",
+        "3": "Effort considers multiple dimensions (time, complexity, risk) with reasonable rationale, some dimensions missing (e.g., dependencies)",
+        "4": "Effort considers time, complexity, risk, dependencies with clear rationale, minor gaps (e.g., didn't account for QA/deployment)",
+        "5": "Effort comprehensively considers time, complexity, risk, dependencies, unknowns, cross-team coordination, QA, deployment, with transparent rationale and historical calibration (past estimates vs actuals reviewed)"
+      },
+      "indicators": {
+        "excellent": [
+          "Effort dimensions documented (time=3, complexity=4, risk=2, dependencies=3 → avg=3)",
+          "Rationale explains all factors (Effort=4 because: 6 weeks, requires 3 teams, new tech stack, integration with 2 external systems)",
+          "Historical calibration referenced (similar item took 8 weeks last time)",
+          "Accounts for full lifecycle (dev + design + QA + deployment + docs)",
+          "Risk/unknowns factored in (confidence intervals or buffers)"
+        ],
+        "poor": [
+          "Effort = engineering time only (ignores design, QA, deployment)",
+          "No rationale (just 'Effort=3' with no explanation)",
+          "Optimism bias evident (everything is 1-2 effort)",
+          "Dependencies ignored (item requires prerequisite but scored standalone)",
+          "Doesn't match description (called 'major migration' but Effort=2)"
+        ]
+      }
+    },
+    {
+      "name": "Impact Scoring Rigor",
+      "weight": 1.2,
+      "scale": {
+        "1": "Impact scored on single dimension (revenue only, or gut feel) with no consideration of users, strategy, pain, or scores appear arbitrary",
+        "2": "Impact considers one dimension (e.g., users) but ignores business value, strategic alignment, or pain severity, weak rationale",
+        "3": "Impact considers multiple dimensions (users, value, strategy) with reasonable rationale, some dimensions missing or speculative",
+        "4": "Impact considers users, business value, strategic alignment, user pain with clear rationale, minor gaps (e.g., no data validation)",
+        "5": "Impact comprehensively considers users, business value, strategic alignment, user pain, competitive positioning, with transparent rationale and data validation (user research, usage analytics, revenue models, NPS/CSAT drivers)"
+      },
+      "indicators": {
+        "excellent": [
+          "Impact dimensions documented (users=5, value=$500K, strategy=4, pain=3 → avg=4.25)",
+          "Rationale explains all factors (Impact=5 because: 90% users affected, $1M ARR at risk, critical to Q1 OKR, top NPS detractor)",
+          "Data-driven validation (50 customer survey, 80% rated 'very important')",
+          "Usage analytics support (10K support tickets, 500K page views/mo with 30% bounce)",
+          "Strategic alignment explicit (ties to company OKR, competitive differentiation)",
+          "User pain quantified (severity, frequency, workarounds)"
+        ],
+        "poor": [
+          "Impact = revenue only (ignores users, strategy, pain)",
+          "No rationale (just 'Impact=4' with no explanation)",
+          "Speculation without validation ('probably' high impact, 'might' drive revenue)",
+          "Doesn't match description (called 'niche edge case' but Impact=5)",
+          "Strategic override without justification ('CEO wants it' → Impact=5)",
+          "Ignores user research (survey says low importance, scored high anyway)"
+        ]
+      }
+    },
+    {
+      "name": "Communication & Decision Transparency",
+      "weight": 1.1,
+      "scale": {
+        "1": "No explanation of decisions, just list of prioritized items with no rationale, or decisions contradict scores without explanation",
+        "2": "Minimal explanation (prioritized X, Y, Z) with no rationale for why or why not others, trade-offs unclear",
+        "3": "Basic explanation of decisions (doing X because high impact, deferring Y because low impact), trade-offs mentioned but not detailed",
+        "4": "Clear explanation of decisions with rationale tied to scores, trade-offs explicit (doing X means not doing Y), stakeholder concerns addressed",
+        "5": "Exemplary transparency with full rationale for all decisions, trade-offs explicit and quantified, stakeholder concerns documented and addressed, communication plan for rejected items (what we're NOT doing and why), success metrics defined, review cadence set"
+      },
+      "indicators": {
+        "excellent": [
+          "Decision rationale clear (prioritized X because Impact=5 Effort=2, deferred Y because Impact=2 Effort=5)",
+          "Trade-offs explicit (doing X means not doing Y this quarter)",
+          "Stakeholder concerns addressed (Sales wanted Z but impact is low because only 2 customers requesting)",
+          "Rejected items communicated (explicitly closing 15 Time Sinks to focus resources)",
+          "Success metrics defined (how will we know this roadmap succeeded? Ship 3 Quick Wins by end of month, 50% user adoption of Big Bet)",
+          "Review cadence set (re-score quarterly, adjust roadmap monthly)"
+        ],
+        "poor": [
+          "No rationale for decisions (just 'we're doing X, Y, Z')",
+          "Trade-offs hidden (doesn't mention what's NOT being done)",
+          "Stakeholder concerns ignored or dismissed without explanation",
+          "No communication plan for rejected items",
+          "No success metrics (unclear how to measure if prioritization worked)",
+          "One-time prioritization (no plan to revisit/adjust)"
+        ]
+      }
+    },
+    {
+      "name": "Completeness & Structure",
+      "weight": 1.0,
+      "scale": {
+        "1": "Missing critical components (no matrix, no roadmap, or just a list of items), or completely unstructured",
+        "2": "Some components present (matrix OR roadmap) but incomplete, minimal structure",
+        "3": "Most components present (scoring table, matrix, roadmap) with basic structure, some sections missing detail",
+        "4": "All components present and well-structured (scoring table with rationale, matrix with quadrants, phased roadmap, capacity planning), minor gaps",
+        "5": "Comprehensive artifact with all components (scoring table with multi-dimensional rationale, visual matrix, phased roadmap with dependencies, capacity planning with buffer, quality checklist completed, stakeholder sign-off documented)"
+      },
+      "indicators": {
+        "excellent": [
+          "Scoring table with all items, effort/impact scores, quadrant classification, rationale",
+          "Visual matrix plotted (2x2 grid with items positioned)",
+          "Quadrant summary (lists Quick Wins, Big Bets, Fill-Ins, Time Sinks with counts)",
+          "Phased roadmap (Phase 1: Quick Wins weeks 1-4, Phase 2: Big Bets weeks 5-16, etc.)",
+          "Capacity planning (team size, utilization, buffer calculated)",
+          "Dependencies mapped (critical path identified)",
+          "Quality checklist completed (self-assessment documented)",
+          "Stakeholder participants and sign-off documented"
+        ],
+        "poor": [
+          "Just a list of items with scores (no matrix, no roadmap)",
+          "No visual representation (hard to see quadrants at a glance)",
+          "No roadmap sequencing (unclear execution order)",
+          "No capacity planning (unclear if realistic)",
+          "Missing quadrant summaries (can't quickly see Quick Wins)",
+          "No documentation of process (unclear how decisions were made)"
+        ]
+      }
+    }
+  ],
+  "guidance": {
+    "by_context": {
+      "product_backlog": {
+        "focus": "Emphasize user reach, business value, and technical complexity. Quick wins should be UX improvements or small integrations. Big bets are new workflows or platform changes.",
+        "red_flags": [
+          "All features scored high impact (if everything is priority, nothing is)",
+          "Effort ignores design/QA time (only engineering hours)",
+          "No usage data to validate impact assumptions",
+          "Edge cases prioritized over core functionality"
+        ]
+      },
+      "technical_debt": {
+        "focus": "Emphasize developer productivity impact, future velocity, and risk reduction. Quick wins are dependency upgrades or small refactors. Big bets are architecture overhauls.",
+        "red_flags": [
+          "Impact scored only on 'clean code' (not business value or velocity)",
+          "Premature optimizations (performance work with no bottleneck)",
+          "Refactoring for refactoring's sake (no measurable improvement)",
+          "Not tying technical debt to business outcomes"
+        ]
+      },
+      "bug_triage": {
+        "focus": "Emphasize user pain severity, frequency, and business impact (revenue, support cost). Quick wins are high-frequency easy fixes. Big bets are complex architectural bugs.",
+        "red_flags": [
+          "Severity without frequency (rare edge case scored high priority)",
+          "Cosmetic bugs prioritized over functional bugs",
+          "Effort underestimated (bug fixes often have hidden complexity)",
+          "No workarounds considered (high-effort bug with easy workaround is lower priority)"
+        ]
+      },
+      "strategic_initiatives": {
+        "focus": "Emphasize strategic alignment, competitive positioning, and revenue/cost impact. Quick wins are pilot programs or process tweaks. Big bets are market expansion or platform investments.",
+        "red_flags": [
+          "All initiatives scored 'strategic' (dilutes meaning)",
+          "No tie to company OKRs or goals",
+          "Ignoring opportunity cost (resources used here can't be used there)",
+          "Betting on too many big bets (spreading too thin)"
+        ]
+      }
+    },
+    "by_team_size": {
+      "small_team_2_5": {
+        "advice": "Focus heavily on Quick Wins and Fill-Ins. Can only do 1 Big Bet at a time. Avoid Time Sinks completely (no capacity to waste). Expect 60-70% utilization (support/bugs take more time in small teams).",
+        "capacity_planning": "Assume 60% project capacity (40% goes to support, bugs, meetings, context switching)"
+      },
+      "medium_team_6_15": {
+        "advice": "Balance Quick Wins (70%) and Big Bets (30%). Can parallelize 2-3 Big Bets if low dependencies. Explicitly cut Time Sinks. Expect 70-80% utilization.",
+        "capacity_planning": "Assume 70% project capacity (30% support, bugs, meetings, code review)"
+      },
+      "large_team_16_plus": {
+        "advice": "Can run multiple Big Bets in parallel, but watch for coordination overhead. Need more strategic work (Big Bets 40%, Quick Wins 60%) to justify team size. Expect 75-85% utilization.",
+        "capacity_planning": "Assume 75% project capacity (25% meetings, cross-team coordination, support)"
+      }
+    },
+    "by_time_horizon": {
+      "sprint_2_weeks": {
+        "advice": "Only Quick Wins and Fill-Ins. No Big Bets (can't complete in 2 weeks). Focus on 1-3 Quick Wins max. Expect interruptions (support, bugs).",
+        "typical_velocity": "3-5 effort points per sprint for 3-person team"
+      },
+      "quarter_3_months": {
+        "advice": "2-3 Quick Wins in first month, 1-2 Big Bets over remaining 2 months. Don't overcommit (leave buffer for Q-end support/planning).",
+        "typical_velocity": "15-25 effort points per quarter for 3-person team"
+      },
+      "annual_12_months": {
+        "advice": "Mix of 8-12 Quick Wins and 3-5 Big Bets across year. Revisit quarterly (don't lock in for full year). Balance short-term momentum and long-term strategy.",
+        "typical_velocity": "60-100 effort points per year for 3-person team"
+      }
+    }
+  },
+  "common_failure_modes": {
+    "all_quick_wins": {
+      "symptom": "50%+ of items scored as Quick Wins (high impact, low effort)",
+      "root_cause": "Optimism bias (underestimating effort or overestimating impact), lack of calibration, wishful thinking",
+      "fix": "Run pre-mortem on 'Quick Wins': If this is so easy and valuable, why haven't we done it already? Re-calibrate effort scores with engineering input. Validate impact with user research."
+    },
+    "no_quick_wins": {
+      "symptom": "0 Quick Wins identified (everything is low impact or high effort)",
+      "root_cause": "Pessimism bias (overestimating effort or underestimating impact), lack of creativity, analysis paralysis",
+      "fix": "Force brainstorm: What's the smallest thing we could do to deliver value? What's the lowest-hanging fruit? Consider config changes, UX tweaks, integrations."
+    },
+    "all_3s": {
+      "symptom": "80%+ of items scored 2.5-3.5 (no differentiation)",
+      "root_cause": "Lack of calibration, avoiding hard choices, consensus-seeking without debate",
+      "fix": "Forced ranking (only one item can be #1), use wider scale (1-10), calibrate with reference items, silent voting to avoid groupthink."
+    },
+    "time_sinks_in_roadmap": {
+      "symptom": "Time Sinks (low impact, high effort) scheduled in roadmap",
+      "root_cause": "Sunk cost fallacy, HIPPO pressure, not saying 'no', ignoring opportunity cost",
+      "fix": "Explicitly cut Time Sinks. Challenge: Can we descope to make this lower effort? If not, reject. Communicate to stakeholders: 'We're not doing X because low ROI.'"
+    },
+    "capacity_overload": {
+      "symptom": "Roadmap plans 100%+ of team capacity",
+      "root_cause": "Ignoring support/bugs/meetings, optimism about execution, not accounting for unknowns",
+      "fix": "Reduce planned capacity to 70-80% (buffer for unknowns). Calculate realistic capacity: team size × hours × utilization. Cut lowest-priority items to fit."
+    },
+    "solo_prioritization": {
+      "symptom": "One person (usually PM) scored everything alone",
+      "root_cause": "Lack of process, time pressure, avoiding conflict",
+      "fix": "Multi-stakeholder scoring session (2-hour workshop with eng, product, sales, CS). Diverse input improves accuracy and builds buy-in."
+    }
+  },
+  "excellence_indicators": {
+    "overall": [
+      "Scores are differentiated (use full 1-5 range, not clustered at 3)",
+      "Scoring rationale is transparent and defensible for all items",
+      "Diverse stakeholders contributed (eng, product, sales, CS, design)",
+      "Quadrant distribution is realistic (10-20% Quick Wins, 20-30% Big Bets, not 50% Quick Wins)",
+      "Roadmap sequences Quick Wins → Big Bets → Fill-Ins, explicitly cuts Time Sinks",
+      "Capacity planning includes buffer (70-80% utilization, not 100%)",
+      "Dependencies mapped and accounted for in sequencing",
+      "Trade-offs explicit (doing X means not doing Y)",
+      "Success metrics defined and review cadence set"
+    ],
+    "data_driven": [
+      "Impact scores validated with user research (surveys, interviews, usage analytics)",
+      "Effort scores calibrated with historical data (past estimates vs actuals)",
+      "Business value quantified (revenue impact, cost savings, NPS drivers)",
+      "User pain measured (support ticket frequency, NPS detractor feedback)",
+      "A/B test results inform prioritization (validate assumptions before big bets)"
+    ],
+    "stakeholder_alignment": [
+      "Participants documented (names, roles, contributions)",
+      "Bias mitigation used (silent voting, anonymous scoring, forced ranking)",
+      "Disagreements surfaced and resolved (documented how consensus reached)",
+      "Pre-mortem for controversial items (surface hidden assumptions/risks)",
+      "Stakeholder sign-off documented (alignment on final roadmap)"
+    ]
+  }
+}