Initial commit

2025-11-30 08:38:26 +08:00
commit 41d9f6b189
304 changed files with 98322 additions and 0 deletions
--- a/skills/metrics-tree/resources/evaluators/rubric_metrics_tree.json
+++ b/skills/metrics-tree/resources/evaluators/rubric_metrics_tree.json
@@ -0,0 +1,310 @@
+{
+  "name": "Metrics Tree Evaluator",
+  "description": "Evaluate metrics trees for North Star selection, decomposition quality, causal clarity, and actionability. Assess whether the metrics tree will drive effective decision-making and experimentation.",
+  "version": "1.0.0",
+  "criteria": [
+    {
+      "name": "North Star Selection",
+      "description": "Evaluates whether the chosen North Star metric appropriately captures value and business success",
+      "weight": 1.3,
+      "scale": {
+        "1": {
+          "label": "Poor North Star choice",
+          "description": "Vanity metric (registered users, pageviews) that doesn't reflect value delivered or business health. Not actionable or measurable."
+        },
+        "2": {
+          "label": "Weak North Star",
+          "description": "Metric somewhat related to value but indirect or lagging. Example: Revenue for early-stage product (reflects pricing not product-market fit)."
+        },
+        "3": {
+          "label": "Acceptable North Star",
+          "description": "Metric captures some value but missing key criteria. For example, measures usage but not business model alignment, or actionable but not predictive of revenue."
+        },
+        "4": {
+          "label": "Good North Star",
+          "description": "Metric captures value delivered to customers, is measurable and actionable, but relationship to business success could be stronger or rationale could be clearer."
+        },
+        "5": {
+          "label": "Excellent North Star",
+          "description": "Metric perfectly captures value delivered to customers, predicts business success (revenue/retention), is measurable and actionable by teams. Clear rationale provided. Examples: Slack's 'teams sending 100+ messages/week', Airbnb's 'nights booked'."
+        }
+      }
+    },
+    {
+      "name": "Decomposition Completeness",
+      "description": "Evaluates whether North Star is fully decomposed into mutually exclusive, collectively exhaustive drivers",
+      "weight": 1.2,
+      "scale": {
+        "1": {
+          "label": "No decomposition",
+          "description": "North Star stated but not broken down into component drivers. No input metrics (L2)."
+        },
+        "2": {
+          "label": "Incomplete decomposition",
+          "description": "1-2 input metrics identified but major drivers missing. Components overlap (not mutually exclusive) or gaps exist (not collectively exhaustive)."
+        },
+        "3": {
+          "label": "Basic decomposition",
+          "description": "3-5 input metrics cover major drivers but some gaps or overlaps exist. Mathematical relationship unclear (additive vs multiplicative)."
+        },
+        "4": {
+          "label": "Complete decomposition",
+          "description": "3-5 input metrics are mutually exclusive and collectively exhaustive. Clear mathematical relationship (e.g., sum or product). Minor gaps acceptable."
+        },
+        "5": {
+          "label": "Rigorous decomposition",
+          "description": "3-5 input metrics provably decompose North Star with explicit formula. MECE (mutually exclusive, collectively exhaustive). Each input can be owned by a team. Validated with data that components sum/multiply to North Star."
+        }
+      }
+    },
+    {
+      "name": "Causal Clarity",
+      "description": "Evaluates whether causal relationships between metrics are clearly specified and validated",
+      "weight": 1.2,
+      "scale": {
+        "1": {
+          "label": "No causal reasoning",
+          "description": "Metrics listed without explaining how they relate to each other or to North Star."
+        },
+        "2": {
+          "label": "Assumed causation",
+          "description": "Relationships implied but not validated. Possible confusion between correlation and causation. Direction unclear (does A cause B or B cause A?)."
+        },
+        "3": {
+          "label": "Plausible causation",
+          "description": "Causal relationships stated with reasoning but not validated with data. Direction clear. Lag times not specified."
+        },
+        "4": {
+          "label": "Validated causation",
+          "description": "Causal relationships supported by correlation data or past experiments. Direction and approximate lag times specified. Some relationships tested."
+        },
+        "5": {
+          "label": "Proven causation",
+          "description": "Causal relationships validated through experiments or strong observational data (cohort analysis, regression). Effect sizes quantified (e.g., 10% increase in X → 5% increase in Y). Lag times specified. Confounds controlled."
+        }
+      }
+    },
+    {
+      "name": "Actionability",
+      "description": "Evaluates whether metrics can actually be moved by teams through specific actions",
+      "weight": 1.1,
+      "scale": {
+        "1": {
+          "label": "Not actionable",
+          "description": "Metrics are outcomes outside team control (market conditions, competitor actions) or too abstract to act on."
+        },
+        "2": {
+          "label": "Weakly actionable",
+          "description": "Metrics are high-level (e.g., 'engagement') without specific user behaviors identified. Teams unsure what to do."
+        },
+        "3": {
+          "label": "Moderately actionable",
+          "description": "Some action metrics (L3) identified but not comprehensive. Clear which metrics each team owns but specific actions to move them are vague."
+        },
+        "4": {
+          "label": "Actionable",
+          "description": "Action metrics (L3) specified as concrete user behaviors for each input metric. Teams know what actions to encourage. Current rates measured."
+        },
+        "5": {
+          "label": "Highly actionable",
+          "description": "Action metrics are specific, observable behaviors with clear measurement (events tracked). Each input metric has 3-5 actions identified. Teams have explicit experiments to test moving actions. Ownership clear."
+        }
+      }
+    },
+    {
+      "name": "Leading Indicator Quality",
+      "description": "Evaluates whether true leading indicators are identified that predict North Star movement",
+      "weight": 1.0,
+      "scale": {
+        "1": {
+          "label": "No leading indicators",
+          "description": "Only lagging indicators provided (same time or after North Star changes)."
+        },
+        "2": {
+          "label": "Weak leading indicators",
+          "description": "Indicators proposed but timing unclear (do they actually predict?) or correlation weak/untested."
+        },
+        "3": {
+          "label": "Plausible leading indicators",
+          "description": "2-3 indicators identified that logically should predict North Star. Timing estimates provided but not validated. Correlation not measured."
+        },
+        "4": {
+          "label": "Validated leading indicators",
+          "description": "2-3 leading indicators with timing specified (e.g., 'predicts 7-day retention') and correlation measured (r > 0.6). Tested on historical data."
+        },
+        "5": {
+          "label": "High-quality leading indicators",
+          "description": "2-4 leading indicators with proven predictive power (r > 0.7), clear timing (days/weeks ahead), and actionable (teams can move them). Includes propensity models or cohort analysis showing predictive strength."
+        }
+      }
+    },
+    {
+      "name": "Prioritization Rigor",
+      "description": "Evaluates whether experiments and opportunities are prioritized using sound reasoning",
+      "weight": 1.0,
+      "scale": {
+        "1": {
+          "label": "No prioritization",
+          "description": "Metrics and experiments listed without ranking or rationale."
+        },
+        "2": {
+          "label": "Subjective prioritization",
+          "description": "Ranking provided but based on gut feel or opinion without framework or data."
+        },
+        "3": {
+          "label": "Framework-based prioritization",
+          "description": "ICE or RICE framework applied but scores are estimates without data support. Top 3 experiments identified."
+        },
+        "4": {
+          "label": "Data-informed prioritization",
+          "description": "ICE/RICE scores based on historical data or analysis. Impact estimates grounded in past experiments or correlations. Top 1-3 experiments have clear hypotheses and success criteria."
+        },
+        "5": {
+          "label": "Rigorous prioritization",
+          "description": "ICE/RICE scores validated with data. Tradeoffs considered (e.g., impact vs effort, short-term vs long-term). Sensitivity analysis performed (\"what if impact is half?\"). Top experiments have quantified hypotheses, clear metrics, and decision criteria. Portfolio approach if multiple experiments."
+        }
+      }
+    },
+    {
+      "name": "Guardrails & Counter-Metrics",
+      "description": "Evaluates whether risks, tradeoffs, and negative externalities are considered",
+      "weight": 0.9,
+      "scale": {
+        "1": {
+          "label": "No risk consideration",
+          "description": "Only positive metrics. No mention of potential downsides, gaming, or tradeoffs."
+        },
+        "2": {
+          "label": "Risks mentioned",
+          "description": "Potential issues noted but no concrete counter-metrics or guardrails defined."
+        },
+        "3": {
+          "label": "Some guardrails",
+          "description": "1-2 counter-metrics identified (e.g., quality, satisfaction) but no thresholds set. Tradeoffs acknowledged but not quantified."
+        },
+        "4": {
+          "label": "Clear guardrails",
+          "description": "2-4 counter-metrics with minimum acceptable thresholds (e.g., NPS must stay ≥40). Gaming risks identified. Monitoring plan included."
+        },
+        "5": {
+          "label": "Comprehensive risk framework",
+          "description": "Counter-metrics for each major risk (quality, trust, satisfaction, ecosystem health). Guardrail thresholds set based on data or policy. Gaming prevention mechanisms specified. Tradeoff analysis included (e.g., short-term growth vs long-term retention)."
+        }
+      }
+    },
+    {
+      "name": "Overall Usefulness",
+      "description": "Evaluates whether the metrics tree will effectively guide decision-making and experimentation",
+      "weight": 1.0,
+      "scale": {
+        "1": {
+          "label": "Not useful",
+          "description": "Missing critical components or so flawed that teams cannot use it for decisions."
+        },
+        "2": {
+          "label": "Limited usefulness",
+          "description": "Provides some structure but too many gaps, unclear relationships, or impractical to implement."
+        },
+        "3": {
+          "label": "Moderately useful",
+          "description": "Covers basics (North Star, input metrics, some actions) but lacks depth in actionability or prioritization. Teams can use it with significant additional work."
+        },
+        "4": {
+          "label": "Useful",
+          "description": "Complete metrics tree with clear structure. Teams can identify what to measure, understand relationships, and select experiments. Minor improvements needed."
+        },
+        "5": {
+          "label": "Highly useful",
+          "description": "Decision-ready artifact. Teams can immediately use it to align on goals, prioritize experiments, instrument dashboards, and make metric-driven decisions. Well-documented assumptions and data gaps. Review cadence specified."
+        }
+      }
+    }
+  ],
+  "guidance": {
+    "by_business_model": {
+      "saas_subscription": {
+        "north_star_options": "MRR, WAU/MAU for engaged users, Net Revenue Retention (NRR) for mature",
+        "key_inputs": "New users, retained users, expansion revenue, churn",
+        "leading_indicators": "Activation rate, feature adoption, usage frequency, product qualified leads (PQLs)",
+        "guardrails": "Customer satisfaction (NPS/CSAT), support ticket volume, technical reliability"
+      },
+      "marketplace": {
+        "north_star_options": "GMV, successful transactions, nights booked (supply × demand balanced metric)",
+        "key_inputs": "Supply-side (active suppliers), demand-side (buyers/searches), match rate/liquidity",
+        "leading_indicators": "New supplier activation, buyer intent signals, supply utilization rate",
+        "guardrails": "Supply/demand balance ratio, trust/safety metrics, quality scores"
+      },
+      "ecommerce": {
+        "north_star_options": "Revenue, orders per customer, customer LTV",
+        "key_inputs": "Traffic, conversion rate, AOV, repeat purchase rate",
+        "leading_indicators": "Add-to-cart rate, wishlist additions, email engagement, product page depth",
+        "guardrails": "Return rate, customer satisfaction, shipping time, product quality ratings"
+      },
+      "social_content": {
+        "north_star_options": "Engaged time, content created and consumed, network density (connections per user)",
+        "key_inputs": "Content creation rate, content consumption, social interactions, retention",
+        "leading_indicators": "Profile completion, first content post, first social interaction, 7-day activation",
+        "guardrails": "Content quality, user wellbeing, toxicity/moderation metrics, creator retention"
+      },
+      "mobile_app": {
+        "north_star_options": "DAU (for high-frequency) or WAU (for moderate-frequency), session frequency × duration",
+        "key_inputs": "New installs, activated users, retained users, resurrected users",
+        "leading_indicators": "Day 1 retention, tutorial completion, push notification opt-in, first core action",
+        "guardrails": "App rating, uninstall rate, crash-free rate, user-reported satisfaction"
+      }
+    },
+    "by_stage": {
+      "pre_pmf": {
+        "focus": "Finding product-market fit through retention and satisfaction signals",
+        "north_star": "Week-over-week retention (>40% is strong signal)",
+        "key_metrics": "Retention curves, NPS, 'very disappointed' score (>40%), organic usage frequency",
+        "experiments": "Rapid iteration on core value prop, onboarding, early activation"
+      },
+      "post_pmf_pre_scale": {
+        "focus": "Validating unit economics and early growth loops",
+        "north_star": "New activated users per week or month",
+        "key_metrics": "LTV/CAC ratio (>3), payback period (<12 months), month-over-month growth (>10%)",
+        "experiments": "Channel optimization, conversion funnel improvements, early retention tactics"
+      },
+      "growth": {
+        "focus": "Efficient scaling of acquisition, activation, and retention",
+        "north_star": "Revenue, ARR, or transaction volume",
+        "key_metrics": "Net revenue retention (>100%), magic number (>0.75), efficient growth",
+        "experiments": "Systematic A/B testing, multi-channel optimization, retention programs, expansion revenue"
+      },
+      "maturity": {
+        "focus": "Profitability, market share, operational efficiency",
+        "north_star": "Free cash flow, EBITDA, or market share",
+        "key_metrics": "Operating margin (>20%), customer concentration, competitive position",
+        "experiments": "Operational efficiency, new market expansion, product line extension, M&A"
+      }
+    }
+  },
+  "common_failure_modes": {
+    "vanity_north_star": "Chose metric that looks good but doesn't reflect value (total registered users, app downloads). Fix: Select metric tied to usage and business model.",
+    "incomplete_decomposition": "Input metrics don't fully explain North Star. Missing key drivers. Fix: Validate that inputs sum/multiply to North Star mathematically.",
+    "correlation_not_causation": "Assumed causation without validation. Metrics move together but one doesn't cause the other. Fix: Run experiments or use causal inference methods.",
+    "not_actionable": "Metrics are outcomes without clear actions. Teams don't know what to do. Fix: Add action metrics (L3) as specific user behaviors.",
+    "no_leading_indicators": "Only lagging metrics that react slowly. Can't make proactive decisions. Fix: Find early signals through cohort analysis or propensity modeling.",
+    "ignoring_tradeoffs": "Optimizing one metric hurts another. No guardrails set. Fix: Add counter-metrics with minimum thresholds.",
+    "gaming_risk": "Metric can be easily gamed without delivering real value. Fix: Add quality signals and combination metrics.",
+    "no_prioritization": "Too many metrics to focus on. No clear experiments. Fix: Use ICE/RICE framework to rank top 1-3 experiments."
+  },
+  "excellence_indicators": [
+    "North Star clearly captures value delivered to customers and predicts business success with explicit rationale",
+    "Decomposition is provably MECE (mutually exclusive, collectively exhaustive) with mathematical formula",
+    "Causal relationships validated through experiments or strong observational data with effect sizes quantified",
+    "Each input metric has 3-5 specific action metrics (observable user behaviors) with measurement defined",
+    "2-4 leading indicators identified with proven predictive power (r > 0.7) and clear timing",
+    "Top 1-3 experiments prioritized using data-informed ICE/RICE scores with quantified hypotheses",
+    "Counter-metrics and guardrails defined for major risks (quality, gaming, ecosystem health) with thresholds",
+    "Assumptions documented, data gaps identified, review cadence specified",
+    "Metrics tree diagram clearly shows relationships and hierarchy",
+    "Decision-ready artifact that teams can immediately use for alignment and experimentation"
+  ],
+  "evaluation_notes": {
+    "scoring": "Calculate weighted average across all criteria. Minimum passing score: 3.0 (basic quality). Production-ready target: 3.5+. Excellence threshold: 4.2+.",
+    "context": "Adjust expectations based on business stage, data availability, and complexity. Early-stage with limited data may score 3.0-3.5 and be acceptable. Growth-stage with resources should target 4.0+.",
+    "iteration": "Low scores indicate specific improvement areas. Prioritize fixing North Star selection and causal clarity first (highest weights), then improve actionability and prioritization. Revalidate after changes."
+  }
+}
--- a/skills/metrics-tree/resources/methodology.md
+++ b/skills/metrics-tree/resources/methodology.md
@@ -0,0 +1,474 @@
+# Metrics Tree Methodology
+
+**When to use this methodology:** You've used [template.md](template.md) and need advanced techniques for:
+- Multi-sided marketplaces or platforms
+- Complex metric interdependencies and feedback loops
+- Counter-metrics and guardrail systems
+- Network effects and viral growth
+- Preventing metric gaming
+- Seasonal adjustment and cohort aging effects
+- Portfolio approach for different business stages
+
+**If your metrics tree is straightforward:** Use [template.md](template.md) directly. This methodology is for complex metric systems.
+
+---
+
+## Table of Contents
+1. [Multi-Sided Marketplace Metrics](#1-multi-sided-marketplace-metrics)
+2. [Counter-Metrics & Guardrails](#2-counter-metrics--guardrails)
+3. [Network Effects & Viral Loops](#3-network-effects--viral-loops)
+4. [Preventing Metric Gaming](#4-preventing-metric-gaming)
+5. [Advanced Leading Indicators](#5-advanced-leading-indicators)
+6. [Metric Interdependencies](#6-metric-interdependencies)
+7. [Business Stage Metrics](#7-business-stage-metrics)
+
+---
+
+## 1. Multi-Sided Marketplace Metrics
+
+### Challenge
+Marketplaces have supply-side and demand-side that must be balanced. Optimizing one side can hurt the other.
+
+### Solution: Dual Tree Approach
+
+**Step 1: Identify constraint**
+- **Supply-constrained**: More demand than supply → Focus on supply-side metrics
+- **Demand-constrained**: More supply than demand → Focus on demand-side metrics
+- **Balanced**: Need both → Monitor ratio/balance metrics
+
+**Step 2: Create separate trees**
+
+**Supply-Side Tree:**
+```
+North Star: Active Suppliers (providing inventory)
+├─ New supplier activation
+├─ Retained suppliers (ongoing activity)
+└─ Supplier quality/performance
+```
+
+**Demand-Side Tree:**
+```
+North Star: Successful Transactions
+├─ New customer acquisition
+├─ Repeat customer rate
+└─ Customer satisfaction
+```
+
+**Step 3: Define balance metrics**
+- **Liquidity ratio**: Supply utilization rate (% of inventory sold)
+- **Match rate**: % of searches resulting in transaction
+- **Wait time**: Time from demand signal to fulfillment
+
+**Example (Uber):**
+- Supply NS: Active drivers with >10 hours/week
+- Demand NS: Completed rides
+- Balance metric: Average wait time <5 minutes, driver utilization >60%
+
+### Multi-Sided Decomposition Template
+
+```
+Marketplace GMV = (Supply × Utilization) × (Demand × Conversion) × Average Transaction
+
+Where:
+- Supply: Available inventory/capacity
+- Utilization: % of supply that gets used
+- Demand: Potential buyers/searches
+- Conversion: % of demand that transacts
+- Average Transaction: $ per transaction
+```
+
+---
+
+## 2. Counter-Metrics & Guardrails
+
+### Problem
+Optimizing primary metrics can create negative externalities (quality drops, trust declines, user experience suffers).
+
+### Solution: Balanced Scorecard with Guardrails
+
+**Framework:**
+1. **Primary metric** (North Star): What you're optimizing
+2. **Counter-metrics**: What could be harmed
+3. **Guardrail thresholds**: Minimum acceptable levels
+
+**Example (Content Platform):**
+```
+Primary: Content Views (maximize)
+
+Counter-metrics with guardrails:
+- Content quality score: Must stay ≥7/10 (current: 7.8)
+- User satisfaction (NPS): Must stay ≥40 (current: 52)
+- Creator retention: Must stay ≥70% (current: 75%)
+- Time to harmful content takedown: Must be ≤2 hours (current: 1.5h)
+
+Rule: If any guardrail is breached, pause optimization of primary metric
+```
+
+### Common Counter-Metric Patterns
+
+| Primary Metric | Potential Harm | Counter-Metric |
+|----------------|----------------|----------------|
+| Pageviews | Clickbait, low quality | Time on page, bounce rate |
+| Engagement time | Addictive dark patterns | User-reported wellbeing, voluntary sessions |
+| Viral growth | Spam | Unsubscribe rate, report rate |
+| Conversion rate | Aggressive upsells | Customer satisfaction, refund rate |
+| Speed to market | Technical debt | Bug rate, system reliability |
+
+### How to Set Guardrails
+
+1. **Historical baseline**: Look at metric over past 6-12 months, set floor at 10th percentile
+2. **Competitive benchmark**: Set floor at industry average
+3. **User feedback**: Survey users on acceptable minimum
+4. **Regulatory**: Compliance thresholds
+
+---
+
+## 3. Network Effects & Viral Loops
+
+### Measuring Network Effects
+
+**Network effect**: Product value increases as more users join.
+
+**Metrics to track:**
+- **Network density**: Connections per user (higher = stronger network)
+- **Cross-side interactions**: User A's action benefits User B
+- **Viral coefficient (K)**: New users generated per existing user
+  - K > 1: Exponential growth (viral)
+  - K < 1: Sub-viral (need paid acquisition)
+
+**Decomposition:**
+```
+New Users = Existing Users × Invitation Rate × Invitation Acceptance Rate
+
+Example:
+100,000 users × 2 invites/user × 50% accept = 100,000 new users (K=1.0)
+```
+
+### Viral Loop Metrics Tree
+
+**North Star:** Viral Coefficient (K)
+
+**Decomposition:**
+```
+K = (Invitations Sent / User) × (Acceptance Rate) × (Activation Rate)
+
+Input metrics:
+├─ Invitations per user
+│  ├─ % users who send ≥1 invite
+│  ├─ Average invites per sender
+│  └─ Invitation prompts shown
+├─ Invite acceptance rate
+│  ├─ Invite message quality
+│  ├─ Social proof (sender credibility)
+│  └─ Landing page conversion
+└─ New user activation rate
+   ├─ Onboarding completion
+   ├─ Value realization time
+   └─ Early engagement actions
+```
+
+### Network Density Metrics
+
+**Measure connectedness:**
+- Average connections per user
+- % of users with ≥N connections
+- Clustering coefficient (friends-of-friends)
+- Active daily/weekly connections
+
+**Threshold effects:**
+- Users with 7+ friends have 10x retention (identify critical mass)
+- Teams with 10+ members have 5x engagement (team size threshold)
+
+---
+
+## 4. Preventing Metric Gaming
+
+### Problem
+Teams optimize for the letter of the metric, not the spirit, creating perverse incentives.
+
+### Gaming Detection Framework
+
+**Step 1: Anticipate gaming**
+For each metric, ask: "How could someone game this?"
+
+**Example metric: Time on site**
+- Gaming: Auto-play videos, infinite scroll, fake engagement
+- Intent: User finds value, willingly spends time
+
+**Step 2: Add quality signals**
+Distinguish genuine value from gaming:
+
+```
+Time on site (primary)
+ Quality signals (guards against gaming):
+  - Active engagement (clicks, scrolls, interactions) vs passive
+  - Return visits (indicates genuine interest)
+  - Completion rate (finished content vs bounced)
+  - User satisfaction rating
+  - Organic shares (not prompted)
+```
+
+**Step 3: Test for gaming**
+- Spot check: Sample user sessions, review for patterns
+- Anomaly detection: Flag outliers (10x normal behavior)
+- User feedback: "Was this session valuable to you?"
+
+### Gaming Prevention Patterns
+
+**Pattern 1: Combination metrics**
+Don't measure single metric; require multiple signals:
+```
+❌ Single: Pageviews
+✓ Combined: Pageviews + Time on page >30s + Low bounce rate
+```
+
+**Pattern 2: User-reported value**
+Add subjective quality check:
+```
+Primary: Feature adoption rate
+ Counter: "Did this feature help you?" (must be >80% yes)
+```
+
+**Pattern 3: Long-term outcome binding**
+Tie short-term to long-term:
+```
+Primary: New user signups
+ Bound to: 30-day retention (signups only count if user retained)
+```
+
+**Pattern 4: Peer comparison**
+Normalize by cohort or segment:
+```
+Primary: Sales closed
+ Normalized: Sales closed / Sales qualified leads (prevents cherry-picking easy wins)
+```
+
+---
+
+## 5. Advanced Leading Indicators
+
+### Technique 1: Propensity Scoring
+
+**Predict future behavior from early signals.**
+
+**Method:**
+1. Collect historical data: New users + their 30-day outcomes
+2. Identify features: Day 1 behaviors (actions, time spent, features used)
+3. Build model: Logistic regression or decision tree predicting 30-day retention
+4. Score new users: Probability of retention based on day 1 behavior
+5. Threshold: Users with >70% propensity score are "likely retained"
+
+**Example (SaaS):**
+```
+30-day retention model (R² = 0.78):
+Retention = 0.1 + 0.35×(invited teammate) + 0.25×(completed 3 workflows) + 0.20×(time in app >20min)
+
+Leading indicator: % of users with propensity score >0.7
+Current: 45% → Target: 60% (predicts 15% retention increase)
+```
+
+### Technique 2: Cohort Behavior Clustering
+
+**Find archetypes that predict outcomes.**
+
+**Method:**
+1. Segment users by first-week behavior patterns
+2. Measure long-term outcomes per segment
+3. Identify high-value archetypes
+
+**Example:**
+```
+Archetypes (first week):
+- "Power user": 5+ days active, 20+ actions → 85% retain
+- "Social": Invites 2+ people, comments 3+ times → 75% retain
+- "Explorer": Views 10+ pages, low actions → 40% retain
+- "Passive": <3 days active, <5 actions → 15% retain
+
+Leading indicator: % of new users becoming "Power" or "Social" archetypes
+Target: Move 30% → 45% into high-value archetypes
+```
+
+### Technique 3: Inflection Point Analysis
+
+**Find tipping points where behavior changes sharply.**
+
+**Method:**
+1. Plot outcome (retention) vs candidate metric (actions taken)
+2. Find where curve steepens (inflection point)
+3. Set that as leading indicator threshold
+
+**Example:**
+```
+Retention by messages sent (first week):
+- 0-2 messages: 20% retention (slow growth)
+- 3-9 messages: 45% retention (moderate growth)
+- 10+ messages: 80% retention (sharp jump)
+
+Inflection point: 10 messages
+Leading indicator: % of users hitting 10+ messages in first week
+```
+
+---
+
+## 6. Metric Interdependencies
+
+### Problem
+Metrics aren't independent; changing one affects others in complex ways.
+
+### Solution: Causal Diagram
+
+**Step 1: Map relationships**
+Draw arrows showing how metrics affect each other:
+```
+[Acquisition] → [Active Users] → [Engagement] → [Retention]
+     ↓                                              ↑
+[Activation] ----------------------------------------
+```
+
+**Step 2: Identify feedback loops**
+- **Positive loop** (reinforcing): A → B → A (exponential)
+  Example: More users → more network value → more users
+- **Negative loop** (balancing): A → B → ¬A (equilibrium)
+  Example: More supply → lower prices → less supply
+
+**Step 3: Predict second-order effects**
+If you increase metric X by 10%:
+- Direct effect: Y increases 5%
+- Indirect effect: Y affects Z, which feeds back to X
+- Net effect: May be amplified or dampened
+
+**Example (Marketplace):**
+```
+Increase driver supply +10%:
+  → Wait time decreases -15%
+  → Rider satisfaction increases +8%
+  → Rider demand increases +5%
+  → Driver earnings decrease -3% (more competition)
+  → Driver churn increases +2%
+  → Net driver supply increase: +10% -2% = +8%
+```
+
+### Modeling Tradeoffs
+
+**Technique: Regression or experiments**
+```
+Run A/B test increasing X
+Measure all related metrics
+Calculate elasticities:
+  - If X increases 1%, Y changes by [elasticity]%
+Build tradeoff matrix
+```
+
+**Tradeoff Matrix Example:**
+| If increase by 10% | Acquisition | Activation | Retention | Revenue |
+|--------------------|-------------|------------|-----------|---------|
+| **Acquisition** | +10% | -2% | -1% | +6% |
+| **Activation** | 0% | +10% | +5% | +12% |
+| **Retention** | 0% | +3% | +10% | +15% |
+
+**Interpretation:** Investing in retention has best ROI (15% revenue lift vs 6% from acquisition).
+
+---
+
+## 7. Business Stage Metrics
+
+### Problem
+Optimal metrics change as business matures. Early-stage metrics differ from growth or maturity stages.
+
+### Stage-Specific North Stars
+
+**Pre-Product/Market Fit (PMF):**
+- **Focus**: Finding PMF, not scaling
+- **North Star**: Retention (evidence of value)
+- **Key metrics**:
+  - Week 1 → Week 2 retention (>40% = promising)
+  - NPS or "very disappointed" survey (>40% = good signal)
+  - Organic usage frequency (weekly+ = habit-forming)
+
+**Post-PMF, Pre-Scale:**
+- **Focus**: Unit economics and growth
+- **North Star**: New activated users per week (acquisition + activation)
+- **Key metrics**:
+  - LTV/CAC ratio (target >3:1)
+  - Payback period (target <12 months)
+  - Month-over-month growth rate (target >10%)
+
+**Growth Stage:**
+- **Focus**: Efficient scaling
+- **North Star**: Revenue or gross profit
+- **Key metrics**:
+  - Net revenue retention (target >100%)
+  - Magic number (ARR growth / S&M spend, target >0.75)
+  - Burn multiple (cash burned / ARR added, target <1.5)
+
+**Maturity Stage:**
+- **Focus**: Profitability and market share
+- **North Star**: Free cash flow or EBITDA
+- **Key metrics**:
+  - Operating margin (target >20%)
+  - Market share / competitive position
+  - Customer lifetime value
+
+### Transition Triggers
+
+**When to change North Star:**
+```
+PMF → Growth: When retention >40%, NPS >40, organic growth observed
+Growth → Maturity: When growth rate <20% for 2+ quarters, market share >30%
+```
+
+**Migration approach:**
+1. Track both old and new North Star for 2 quarters
+2. Align teams on new metric
+3. Deprecate old metric
+4. Update dashboards and incentives
+
+---
+
+## Quick Decision Trees
+
+### "Should I use counter-metrics?"
+
+```
+Is primary metric easy to game or has quality risk?
+├─ YES → Add counter-metrics with guardrails
+└─ NO → Is metric clearly aligned with user value?
+    ├─ YES → Primary metric sufficient, monitor only
+    └─ NO → Redesign metric to better capture value
+```
+
+### "Do I have network effects?"
+
+```
+Does value increase as more users join?
+├─ YES → Track network density, K-factor, measure at different scales
+└─ NO → Does one user's action benefit others?
+    ├─ YES → Measure cross-user interactions, content creation/consumption
+    └─ NO → Standard metrics tree (no network effects)
+```
+
+### "Should I segment my metrics tree?"
+
+```
+Do different user segments have different behavior patterns?
+├─ YES → Do segments have different value to business?
+    ├─ YES → Create separate trees per segment, track segment mix
+    └─ NO → Single tree, annotate with segment breakdowns
+└─ NO → Are there supply/demand sides?
+    ├─ YES → Dual trees (Section 1)
+    └─ NO → Single unified tree
+```
+
+---
+
+## Summary: Advanced Technique Selector
+
+| Scenario | Use This Technique | Section |
+|----------|-------------------|---------|
+| **Multi-sided marketplace** | Dual tree + balance metrics | 1 |
+| **Risk of negative externalities** | Counter-metrics + guardrails | 2 |
+| **Viral or network product** | K-factor + network density | 3 |
+| **Metric gaming risk** | Quality signals + combination metrics | 4 |
+| **Need better prediction** | Propensity scoring + archetypes | 5 |
+| **Complex interdependencies** | Causal diagram + elasticities | 6 |
+| **Changing business stage** | Stage-appropriate North Star | 7 |
--- a/skills/metrics-tree/resources/template.md
+++ b/skills/metrics-tree/resources/template.md
@@ -0,0 +1,493 @@
+# Metrics Tree Template
+
+## How to Use This Template
+
+Follow this structure to create a metrics tree for your product or business:
+
+1. Start with North Star metric definition
+2. Apply appropriate decomposition method
+3. Map action metrics for each input
+4. Identify leading indicators
+5. Prioritize experiments using ICE framework
+6. Output to `metrics-tree.md`
+
+---
+
+## Part 1: North Star Metric
+
+### Define Your North Star
+
+**North Star Metric:** [Name of metric]
+
+**Definition:** [Precise definition including time window]
+Example: "Number of unique users who complete at least one transaction per week"
+
+**Rationale:** [Why this metric?]
+- ✓ Captures value delivered to customers: [how]
+- ✓ Reflects business model: [revenue connection]
+- ✓ Measurable and trackable: [data source]
+- ✓ Actionable by teams: [who can influence]
+
+**Current Value:** [Number] as of [Date]
+
+**Target:** [Goal] by [Date]
+
+### North Star Selection Checklist
+
+- [ ] **Customer value**: Does it measure value delivered to customers?
+- [ ] **Business correlation**: Does it predict revenue/business success?
+- [ ] **Actionable**: Can teams influence it through their work?
+- [ ] **Measurable**: Do we have reliable data?
+- [ ] **Not vanity**: Does it reflect actual usage/value, not just interest?
+- [ ] **Time-bounded**: Does it have a clear time window (daily/weekly/monthly)?
+
+---
+
+## Part 2: Metric Decomposition
+
+Choose the decomposition method that best fits your North Star:
+
+### Method 1: Additive Decomposition
+
+**Use when:** North Star is sum of independent segments
+
+**Formula:**
+```
+North Star = Component A + Component B + Component C + ...
+```
+
+**Template:**
+```
+[North Star] =
+  + [New users/customers]
+  + [Retained users/customers]
+  + [Resurrected users/customers]
+  + [Other segment]
+```
+
+**Example (SaaS WAU):**
+```
+Weekly Active Users =
+  + New activated users this week (30%)
+  + Retained from previous week (60%)
+  + Resurrected (inactive→active) (10%)
+```
+
+### Method 2: Multiplicative Decomposition
+
+**Use when:** North Star is product of rates/factors
+
+**Formula:**
+```
+North Star = Factor A × Factor B × Factor C × ...
+```
+
+**Template:**
+```
+[North Star] =
+  [Total addressable users/visits]
+  × [Conversion rate at step 1]
+  × [Conversion rate at step 2]
+  × [Value per conversion]
+```
+
+**Example (E-commerce Revenue):**
+```
+Monthly Revenue =
+  Monthly site visitors
+  × Purchase conversion rate (3%)
+  × Average order value ($75)
+```
+
+### Method 3: Funnel Decomposition
+
+**Use when:** North Star is end of sequential conversion process
+
+**Formula:**
+```
+North Star = Top of funnel → Step 1 → Step 2 → ... → Final conversion
+```
+
+**Template:**
+```
+[North Star] =
+  [Total entries]
+  × [Step 1 conversion %]
+  × [Step 2 conversion %]
+  × [Final conversion %]
+```
+
+**Example (Paid SaaS Customers):**
+```
+New paid customers/month =
+  Free signups
+  × Activation rate (complete onboarding) (40%)
+  × Trial start rate (25%)
+  × Trial→Paid conversion rate (20%)
+
+Math: 1000 signups × 0.4 × 0.25 × 0.2 = 20 paid customers
+```
+
+### Method 4: Cohort Decomposition
+
+**Use when:** Retention is key driver, need to separate acquisition from retention
+
+**Formula:**
+```
+North Star = Σ (Cohort Size_t × Retention Rate_t,n) for all cohorts
+```
+
+**Template:**
+```
+[North Star today] =
+  [Users from Month 0] × [Month 0 retention rate]
+  + [Users from Month 1] × [Month 1 retention rate]
+  + ...
+  + [Users from Month N] × [Month N retention rate]
+```
+
+**Example (Subscription Service MAU):**
+```
+March Active Users =
+  Jan signups (500) × Month 2 retention (50%) = 250
+  + Feb signups (600) × Month 1 retention (70%) = 420
+  + Mar signups (700) × Month 0 retention (100%) = 700
+  = 1,370 MAU
+```
+
+---
+
+## Part 3: Input Metrics (L2)
+
+For each component in your decomposition, define as input metric:
+
+### Input Metric Template
+
+**Input Metric 1:** [Name]
+- **Definition:** [Precise definition]
+- **Current value:** [Number]
+- **Target:** [Goal]
+- **Owner:** [Team/person]
+- **Relationship to North Star:** [How it affects NS, with estimated coefficient]
+  Example: "Increasing activation rate by 10% → 5% increase in WAU"
+
+**Input Metric 2:** [Name]
+[Repeat for 3-5 input metrics]
+
+### Validation Questions
+
+- [ ] Are all input metrics **mutually exclusive**? (No double-counting)
+- [ ] Do they **collectively exhaust** the North Star? (Nothing missing)
+- [ ] Can each be **owned by a single team**?
+- [ ] Is each **measurable** with existing/planned instrumentation?
+- [ ] Are they all **at same level of abstraction**?
+
+---
+
+## Part 4: Action Metrics (L3)
+
+For each input metric, identify specific user behaviors that drive it:
+
+### Action Metrics Template
+
+**For Input Metric: [Name of L2 metric]**
+
+**Action 1:** [Specific user behavior]
+- **Measurement:** [How to track it]
+- **Frequency:** [How often it happens]
+- **Impact:** [Estimated effect on input metric]
+- **Current rate:** [% of users doing this]
+
+**Action 2:** [Another behavior]
+[Repeat for 3-5 actions per input]
+
+**Example (For input metric "Retained Users"):**
+
+**Action 1:** User completes core workflow
+- Measurement: Track "workflow_completed" event
+- Frequency: 5x per week average for active users
+- Impact: Users with 3+ completions have 80% retention vs 20% baseline
+- Current rate: 45% of users complete workflow at least once
+
+**Action 2:** User invites teammate
+- Measurement: "invite_sent" event with "invite_accepted" event
+- Frequency: 1.2 invites per user on average
+- Impact: Users who invite have 90% retention vs 40% baseline
+- Current rate: 20% of users send at least one invite
+
+---
+
+## Part 5: Leading Indicators
+
+Identify early signals that predict North Star movement:
+
+### Leading Indicator Template
+
+**Leading Indicator 1:** [Metric name]
+- **Definition:** [What it measures]
+- **Timing:** [How far in advance it predicts] Example: "Predicts week 4 retention"
+- **Correlation:** [Strength of relationship] Example: "r=0.75 with 30-day retention"
+- **Actionability:** [How teams can move it]
+- **Current value:** [Number]
+
+**Example:**
+
+**Leading Indicator: Day 1 Activation Rate**
+- Definition: % of new users who complete 3 key actions on first day
+- Timing: Predicts 7-day and 30-day retention (measured day 1, predicts weeks ahead)
+- Correlation: r=0.82 with 30-day retention. Users with Day 1 activation have 70% retention vs 15% without
+- Actionability: Improve onboarding flow, reduce time-to-value, send activation nudges
+- Current value: 35%
+
+### How to Find Leading Indicators
+
+**Method 1: Cohort analysis**
+- Segment users by early behavior (first day, first week)
+- Measure long-term outcomes (retention, LTV)
+- Find behaviors that predict positive outcomes
+
+**Method 2: Correlation analysis**
+- List all early-funnel metrics
+- Calculate correlation with North Star or key inputs
+- Select metrics with r > 0.6 and actionable
+
+**Method 3: High-performer analysis**
+- Identify users in top 20% for North Star metric
+- Look at their first week/month behavior
+- Find patterns that distinguish them from average users
+
+---
+
+## Part 6: Experiment Prioritization
+
+Use ICE framework to prioritize which metrics to improve:
+
+### ICE Scoring Template
+
+**Impact (1-10):** How much will improving this metric affect North Star?
+- 10 = Direct, large effect (e.g., 10% improvement → 8% NS increase)
+- 5 = Moderate effect (e.g., 10% improvement → 3% NS increase)
+- 1 = Small effect (e.g., 10% improvement → 0.5% NS increase)
+
+**Confidence (1-10):** How certain are we about the relationship?
+- 10 = Proven causal relationship with data
+- 5 = Correlated, plausible causation
+- 1 = Hypothesis, no data yet
+
+**Ease (1-10):** How easy is it to move this metric?
+- 10 = Simple change, 1-2 weeks
+- 5 = Moderate effort, 1-2 months
+- 1 = Major project, 6+ months
+
+**ICE Score = (Impact + Confidence + Ease) / 3**
+
+### Prioritization Table
+
+| Metric/Experiment | Impact | Confidence | Ease | ICE Score | Rank |
+|-------------------|--------|------------|------|-----------|------|
+| [Experiment 1] | [1-10] | [1-10] | [1-10] | [Avg] | [#] |
+| [Experiment 2] | [1-10] | [1-10] | [1-10] | [Avg] | [#] |
+| [Experiment 3] | [1-10] | [1-10] | [1-10] | [Avg] | [#] |
+
+### Top 3 Experiments
+
+**Experiment 1:** [Name - highest ICE score]
+- **Hypothesis:** [What we believe will happen]
+- **Metric to move:** [Target metric]
+- **Expected impact:** [Quantified prediction]
+- **Timeline:** [Duration]
+- **Success criteria:** [How we'll know it worked]
+
+**Experiment 2:** [Second highest]
+[Repeat structure]
+
+**Experiment 3:** [Third highest]
+[Repeat structure]
+
+---
+
+## Part 7: Metric Relationships Diagram
+
+Create visual representation of your metrics tree:
+
+### ASCII Tree Format
+
+```
+North Star: [Metric Name] = [Current Value]
+│
+├─ Input Metric 1: [Name] = [Value]
+│  ├─ Action 1.1: [Behavior] = [Rate]
+│  ├─ Action 1.2: [Behavior] = [Rate]
+│  └─ Action 1.3: [Behavior] = [Rate]
+│
+├─ Input Metric 2: [Name] = [Value]
+│  ├─ Action 2.1: [Behavior] = [Rate]
+│  ├─ Action 2.2: [Behavior] = [Rate]
+│  └─ Action 2.3: [Behavior] = [Rate]
+│
+└─ Input Metric 3: [Name] = [Value]
+   ├─ Action 3.1: [Behavior] = [Rate]
+   ├─ Action 3.2: [Behavior] = [Rate]
+   └─ Action 3.3: [Behavior] = [Rate]
+
+Leading Indicators:
+→ [Indicator 1]: Predicts [what] by [timing]
+→ [Indicator 2]: Predicts [what] by [timing]
+```
+
+### Example (Complete Tree)
+
+```
+North Star: Weekly Active Users = 10,000
+│
+├─ New Activated Users = 3,000/week (30%)
+│  ├─ Complete onboarding: 40% of signups
+│  ├─ Connect data source: 25% of signups
+│  └─ Invite teammate: 20% of signups
+│
+├─ Retained Users = 6,000/week (60%)
+│  ├─ Use core feature 3+ times: 45% of users
+│  ├─ Create content: 30% of users
+│  └─ Engage with team: 25% of users
+│
+└─ Resurrected Users = 1,000/week (10%)
+   ├─ Receive reactivation email: 50% open rate
+   ├─ See new feature announcement: 30% click rate
+   └─ Get @mentioned by teammate: 40% return rate
+
+Leading Indicators:
+→ Day 1 activation rate (35%): Predicts 30-day retention
+→ 3 key actions in first session (22%): Predicts weekly usage
+```
+
+---
+
+## Output Format
+
+Create `metrics-tree.md` with this structure:
+
+```markdown
+# Metrics Tree: [Product/Business Name]
+
+**Date:** [YYYY-MM-DD]
+**Owner:** [Team/Person]
+**Review Frequency:** [Weekly/Monthly]
+
+## North Star Metric
+
+**Metric:** [Name]
+**Current:** [Value] as of [Date]
+**Target:** [Goal] by [Date]
+**Rationale:** [Why this metric]
+
+## Decomposition Method
+
+[Additive/Multiplicative/Funnel/Cohort]
+
+**Formula:**
+[Mathematical relationship]
+
+## Input Metrics (L2)
+
+### 1. [Input Metric Name]
+- **Current:** [Value]
+- **Target:** [Goal]
+- **Owner:** [Team]
+- **Impact:** [Effect on NS]
+
+#### Actions (L3):
+1. [Action 1]: [Current rate]
+2. [Action 2]: [Current rate]
+3. [Action 3]: [Current rate]
+
+[Repeat for all input metrics]
+
+## Leading Indicators
+
+1. **[Indicator 1]:** [Definition]
+   - Timing: [When it predicts]
+   - Correlation: [Strength]
+   - Current: [Value]
+
+2. **[Indicator 2]:** [Definition]
+   [Repeat structure]
+
+## Prioritized Experiments
+
+### Experiment 1: [Name] (ICE: [Score])
+- **Hypothesis:** [What we believe]
+- **Metric:** [Target]
+- **Expected Impact:** [Quantified]
+- **Timeline:** [Duration]
+- **Success Criteria:** [Threshold]
+
+[Repeat for top 3 experiments]
+
+## Metrics Tree Diagram
+
+[Include ASCII or visual diagram]
+
+## Notes
+
+- [Assumptions made]
+- [Data gaps or limitations]
+- [Next review date]
+```
+
+---
+
+## Quick Examples by Business Model
+
+### SaaS Example (Slack-style)
+
+**North Star:** Teams sending 100+ messages per week
+
+**Decomposition (Additive):**
+```
+Active Teams = New Active Teams + Retained Active Teams + Resurrected Teams
+```
+
+**Input Metrics:**
+- New active teams: Complete onboarding + hit 100 messages in week 1
+- Retained active teams: Hit 100 messages this week and last week
+- Resurrected teams: Hit 100 messages this week but not last 4 weeks
+
+**Leading Indicators:**
+- 10 members invited in first day (predicts team activation)
+- 50 messages sent in first week (predicts long-term retention)
+
+### E-commerce Example
+
+**North Star:** Monthly Revenue
+
+**Decomposition (Multiplicative):**
+```
+Revenue = Visitors × Purchase Rate × Average Order Value
+```
+
+**Input Metrics:**
+- Monthly unique visitors (owned by Marketing)
+- Purchase conversion rate (owned by Product)
+- Average order value (owned by Merchandising)
+
+**Leading Indicators:**
+- Add-to-cart rate (predicts purchase)
+- Product page views per session (predicts purchase intent)
+
+### Marketplace Example (Airbnb-style)
+
+**North Star:** Nights Booked
+
+**Decomposition (Multi-sided):**
+```
+Nights Booked = (Active Listings × Availability Rate) × (Searches × Booking Rate)
+```
+
+**Input Metrics:**
+- Active host supply: Listings with ≥1 available night
+- Guest demand: Unique searches
+- Match rate: Searches resulting in booking
+
+**Leading Indicators:**
+- Host completes first listing (predicts long-term hosting)
+- Guest saves listings (predicts future booking)