From 2e8d89fca39cdc7196b491f8e47c92fc8a1fff76 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:47:43 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 18 + README.md | 3 + agents/api/api-reviewer.md | 52 + agents/app/app-reviewer.md | 62 ++ commands/create-pr.md | 328 +++++++ commands/encode-policy.md | 303 ++++++ commands/fix-pr.md | 333 +++++++ commands/review-pr.md | 420 ++++++++ plugin.lock.json | 193 ++++ .../policyengine-analysis-skill/SKILL.md | 569 +++++++++++ .../examples/reform_template.py | 178 ++++ skills/data-science/l0-skill/SKILL.md | 277 ++++++ .../microcalibrate-skill/SKILL.md | 404 ++++++++ skills/data-science/microdf-skill/SKILL.md | 329 +++++++ .../data-science/microimpute-skill/SKILL.md | 415 ++++++++ .../policyengine-design-skill/SKILL.md | 880 +++++++++++++++++ .../policyengine-standards-skill/SKILL.md | 768 +++++++++++++++ .../policyengine-user-guide-skill/SKILL.md | 295 ++++++ .../policyengine-writing-skill/SKILL.md | 526 ++++++++++ .../policyengine-uk-skill/SKILL.md | 660 +++++++++++++ .../examples/couple.yaml | 29 + .../examples/family_with_children.yaml | 41 + .../examples/single_person.yaml | 21 + .../examples/universal_credit_sweep.yaml | 38 + .../scripts/situation_helpers.py | 339 +++++++ .../policyengine-us-skill/SKILL.md | 524 ++++++++++ .../examples/donation_sweep.yaml | 71 ++ .../examples/single_filer.yaml | 38 + .../scripts/situation_helpers.py | 257 +++++ .../policyengine-aggregation-skill/SKILL.md | 329 +++++++ .../policyengine-code-style-skill/SKILL.md | 382 ++++++++ .../SKILL.md | 739 ++++++++++++++ .../SKILL.md | 440 +++++++++ .../SKILL.md | 478 ++++++++++ .../SKILL.md | 376 ++++++++ .../SKILL.md | 412 ++++++++ .../policyengine-vectorization-skill/SKILL.md | 303 ++++++ .../policyengine-api-skill/SKILL.md | 478 ++++++++++ .../policyengine-app-skill/SKILL.md | 900 ++++++++++++++++++ .../policyengine-core-skill/SKILL.md | 487 ++++++++++ .../policyengine-python-client-skill/SKILL.md | 356 +++++++ 41 files changed, 14051 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 agents/api/api-reviewer.md create mode 100644 agents/app/app-reviewer.md create mode 100644 commands/create-pr.md create mode 100644 commands/encode-policy.md create mode 100644 commands/fix-pr.md create mode 100644 commands/review-pr.md create mode 100644 plugin.lock.json create mode 100644 skills/analysis/policyengine-analysis-skill/SKILL.md create mode 100644 skills/analysis/policyengine-analysis-skill/examples/reform_template.py create mode 100644 skills/data-science/l0-skill/SKILL.md create mode 100644 skills/data-science/microcalibrate-skill/SKILL.md create mode 100644 skills/data-science/microdf-skill/SKILL.md create mode 100644 skills/data-science/microimpute-skill/SKILL.md create mode 100644 skills/documentation/policyengine-design-skill/SKILL.md create mode 100644 skills/documentation/policyengine-standards-skill/SKILL.md create mode 100644 skills/documentation/policyengine-user-guide-skill/SKILL.md create mode 100644 skills/documentation/policyengine-writing-skill/SKILL.md create mode 100644 skills/domain-knowledge/policyengine-uk-skill/SKILL.md create mode 100644 skills/domain-knowledge/policyengine-uk-skill/examples/couple.yaml create mode 100644 skills/domain-knowledge/policyengine-uk-skill/examples/family_with_children.yaml create mode 100644 skills/domain-knowledge/policyengine-uk-skill/examples/single_person.yaml create mode 100644 skills/domain-knowledge/policyengine-uk-skill/examples/universal_credit_sweep.yaml create mode 100644 skills/domain-knowledge/policyengine-uk-skill/scripts/situation_helpers.py create mode 100644 skills/domain-knowledge/policyengine-us-skill/SKILL.md create mode 100644 skills/domain-knowledge/policyengine-us-skill/examples/donation_sweep.yaml create mode 100644 skills/domain-knowledge/policyengine-us-skill/examples/single_filer.yaml create mode 100644 skills/domain-knowledge/policyengine-us-skill/scripts/situation_helpers.py create mode 100644 skills/technical-patterns/policyengine-aggregation-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-code-style-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-implementation-patterns-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-parameter-patterns-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-period-patterns-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-review-patterns-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-testing-patterns-skill/SKILL.md create mode 100644 skills/technical-patterns/policyengine-vectorization-skill/SKILL.md create mode 100644 skills/tools-and-apis/policyengine-api-skill/SKILL.md create mode 100644 skills/tools-and-apis/policyengine-app-skill/SKILL.md create mode 100644 skills/tools-and-apis/policyengine-core-skill/SKILL.md create mode 100644 skills/tools-and-apis/policyengine-python-client-skill/SKILL.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..08b8d3c --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,18 @@ +{ + "name": "complete", + "description": "Complete PolicyEngine knowledge base - all agents, commands, and skills for users, analysts, and contributors", + "version": "0.0.0-2025.11.28", + "author": { + "name": "PolicyEngine", + "email": "hello@policyengine.org" + }, + "skills": [ + "./skills" + ], + "agents": [ + "./agents" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6c37315 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# complete + +Complete PolicyEngine knowledge base - all agents, commands, and skills for users, analysts, and contributors diff --git a/agents/api/api-reviewer.md b/agents/api/api-reviewer.md new file mode 100644 index 0000000..4f52b58 --- /dev/null +++ b/agents/api/api-reviewer.md @@ -0,0 +1,52 @@ +# API Reviewer Agent + +## Role +You are the API Reviewer Agent responsible for ensuring PolicyEngine API implementations follow best practices, are performant, secure, and properly tested. + +## Core Responsibilities + +### 1. Code Review +- Verify Flask best practices +- Check proper error handling and status codes +- Ensure proper input validation and sanitization +- Review database query optimization +- Check for proper caching strategies with Redis +- Verify API versioning practices + +### 2. Security Review +- Check for SQL injection vulnerabilities +- Verify authentication/authorization where needed +- Review CORS configuration +- Check for sensitive data exposure +- Ensure proper rate limiting + +### 3. Performance Review +- Check for N+1 query problems +- Verify efficient database indexing +- Review Redis caching implementation +- Check for proper pagination +- Review async/background job handling + +### 4. Testing Review +- Verify API endpoint tests exist +- Check for edge case coverage +- Review mock usage for external dependencies +- Verify error condition testing + +### 5. Documentation Review +- Check that new endpoints are documented +- Verify request/response schemas are clear +- Ensure error responses are documented + +## Standards Reference +Refer to `/agents/shared/policyengine-standards.md` for general PolicyEngine standards. + +## Review Checklist +- [ ] Endpoints follow RESTful conventions +- [ ] Proper HTTP status codes used +- [ ] Error messages are helpful and safe +- [ ] Database queries are optimized +- [ ] Caching is implemented where appropriate +- [ ] Tests cover happy and error paths +- [ ] No security vulnerabilities introduced +- [ ] API documentation updated \ No newline at end of file diff --git a/agents/app/app-reviewer.md b/agents/app/app-reviewer.md new file mode 100644 index 0000000..278907f --- /dev/null +++ b/agents/app/app-reviewer.md @@ -0,0 +1,62 @@ +# App Reviewer Agent + +## Role +You are the App Reviewer Agent responsible for ensuring PolicyEngine React application code follows best practices, is performant, accessible, and provides excellent user experience. + +## Core Responsibilities + +### 1. React Code Review +- Verify functional components only (no class components) +- Check proper hook usage and dependencies +- Ensure state management follows lifting state up pattern +- Review component composition and reusability +- Verify proper key usage in lists +- Check for unnecessary re-renders + +### 2. Code Quality +- Ensure ESLint rules pass (no warnings in CI) +- Verify Prettier formatting applied +- Check TypeScript usage where applicable +- Review proper error boundaries +- Ensure no console.logs in production code + +### 3. Performance Review +- Check for proper memoization (React.memo, useMemo, useCallback) +- Verify lazy loading for large components +- Review bundle size impact +- Check for proper image optimization +- Ensure efficient Plotly chart rendering + +### 4. Accessibility Review +- Verify semantic HTML usage +- Check ARIA labels for complex widgets +- Ensure keyboard navigation support +- Review color contrast compliance +- Check screen reader compatibility + +### 5. User Experience Review +- Verify loading states are shown +- Check error messages are helpful +- Ensure responsive design works +- Review form validation and feedback +- Check for proper URL parameter handling + +### 6. Testing Review +- Verify Jest tests exist for new components +- Check React Testing Library best practices +- Ensure user interactions are tested +- Review mock usage for API calls + +## Standards Reference +Refer to `/agents/shared/policyengine-standards.md` for general PolicyEngine standards. + +## Review Checklist +- [ ] No class components used +- [ ] ESLint passes with no warnings +- [ ] Prettier formatting applied +- [ ] Component under 150 lines +- [ ] Proper loading and error states +- [ ] Accessible to screen readers +- [ ] Responsive on mobile +- [ ] Tests cover user interactions +- [ ] No performance regressions \ No newline at end of file diff --git a/commands/create-pr.md b/commands/create-pr.md new file mode 100644 index 0000000..a3b5373 --- /dev/null +++ b/commands/create-pr.md @@ -0,0 +1,328 @@ +--- +description: Create PR as draft, wait for CI to pass, then mark ready for review (solves the "I'll check back later" problem) +--- + +# Creating PR with CI Verification + +**IMPORTANT:** This command solves the common problem where Claude creates a PR, says "I'll wait for CI", then gives up. This command ACTUALLY waits for CI completion. + +## Workflow + +This command will: +1. โœ… Create PR as draft +2. โœ… Wait for CI checks to complete (actually wait, not give up) +3. โœ… Mark as ready for review if CI passes +4. โœ… Report failures with links if CI fails + +## Step 1: Determine Current Branch and Changes + +```bash +# Get current branch +CURRENT_BRANCH=$(git branch --show-current) + +# Get base branch (usually main or master) +BASE_BRANCH=$(git remote show origin | grep 'HEAD branch' | cut -d' ' -f5) + +# Verify we're not on main/master +if [[ "$CURRENT_BRANCH" == "main" || "$CURRENT_BRANCH" == "master" ]]; then + echo "ERROR: Cannot create PR from main/master branch" + echo "Create a feature branch first: git checkout -b feature-name" + exit 1 +fi + +# Check if branch is pushed +git rev-parse --verify origin/$CURRENT_BRANCH > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "Branch not pushed yet. Pushing now..." + git push -u origin $CURRENT_BRANCH +fi +``` + +## Step 2: Gather PR Context + +Run in parallel to gather information: + +```bash +# 1. Git status +git status + +# 2. Diff since divergence from base +git diff ${BASE_BRANCH}...HEAD + +# 3. Commit history for this branch +git log ${BASE_BRANCH}..HEAD --oneline + +# 4. Check if PR already exists +gh pr view --json number,state,isDraft 2>/dev/null +``` + +**If PR already exists:** +```bash +EXISTING_PR=$(gh pr view --json number --jq '.number') +echo "PR #$EXISTING_PR already exists for this branch" +echo "Use /review-pr $EXISTING_PR or /fix-pr $EXISTING_PR instead" +exit 0 +``` + +## Step 3: Create PR Title and Body + +Based on the changes (read from git diff and commit history): + +**Title:** Concise summary of changes (50 chars max) + +**Body format:** +```markdown +## Summary +- Bullet point 1 +- Bullet point 2 + +## Changes +- Specific change 1 +- Specific change 2 + +## Testing +- Test approach +- Verification steps + +## Checklist +- [ ] Tests pass locally +- [ ] Code formatted (make format) +- [ ] Changelog entry created (if applicable) + +--- + +๐Ÿค– Generated with [Claude Code](https://claude.com/claude-code) + +Co-Authored-By: Claude +``` + +## Step 4: Create PR as Draft + +**CRITICAL:** Always create as draft initially. + +```bash +# Create PR as draft +gh pr create \ + --title "PR Title Here" \ + --body "$(cat <<'EOF' +PR body here +EOF +)" \ + --draft + +# Get PR number +PR_NUMBER=$(gh pr view --json number --jq '.number') +echo "Created draft PR #$PR_NUMBER" +echo "URL: $(gh pr view --json url --jq '.url')" +``` + +## Step 5: Wait for CI Checks (DO NOT GIVE UP!) + +**This is where Claude usually fails. DO NOT say "I'll check back later".** + +**Instead, ACTUALLY wait using a polling loop with NO timeout:** + +```bash +echo "Waiting for CI checks to complete..." +echo "Typical CI times by repository:" +echo " - policyengine-app: 3-6 minutes" +echo " - policyengine-uk: 9-12 minutes" +echo " - policyengine-api: 30 minutes" +echo " - policyengine-us: 60-75 minutes (longest)" +echo "" +echo "I will poll every 15 seconds until all checks complete. No time limit." + +POLL_INTERVAL=15 # Check every 15 seconds +ELAPSED=0 + +while true; do + # Get check status + CHECKS_JSON=$(gh pr checks $PR_NUMBER --json name,status,conclusion) + + # Count checks + TOTAL=$(echo "$CHECKS_JSON" | jq '. | length') + + if [ "$TOTAL" -eq 0 ]; then + echo "No CI checks found yet. Waiting for checks to start..." + sleep $POLL_INTERVAL + ELAPSED=$((ELAPSED + POLL_INTERVAL)) + continue + fi + + # Count by status + COMPLETED=$(echo "$CHECKS_JSON" | jq '[.[] | select(.status == "COMPLETED")] | length') + FAILED=$(echo "$CHECKS_JSON" | jq '[.[] | select(.conclusion == "FAILURE")] | length') + + echo "[$ELAPSED s] CI Checks: $COMPLETED/$TOTAL completed, $FAILED failed" + + # If all completed + if [ "$COMPLETED" -eq "$TOTAL" ]; then + if [ "$FAILED" -eq 0 ]; then + echo "โœ… All CI checks passed after $ELAPSED seconds!" + break + else + echo "โŒ Some CI checks failed after $ELAPSED seconds." + # Show failures + gh pr checks $PR_NUMBER + break + fi + fi + + # Continue waiting (no timeout!) + sleep $POLL_INTERVAL + ELAPSED=$((ELAPSED + POLL_INTERVAL)) +done +``` + +**Important:** No timeout means we actually wait as long as needed. Population simulations can take 30+ minutes. + +## Step 6: Mark Ready for Review (If CI Passed) + +```bash +if [ "$FAILED" -eq 0 ] && [ "$COMPLETED" -eq "$TOTAL" ]; then + echo "Marking PR as ready for review..." + gh pr ready $PR_NUMBER + + echo "" + echo "โœ… PR #$PR_NUMBER is ready for review!" + echo "URL: $(gh pr view $PR_NUMBER --json url --jq '.url')" + echo "" + echo "All CI checks passed:" + gh pr checks $PR_NUMBER +else + echo "" + echo "โš ๏ธ PR remains as draft due to CI issues." + echo "URL: $(gh pr view $PR_NUMBER --json url --jq '.url')" + echo "" + echo "CI status:" + gh pr checks $PR_NUMBER + echo "" + echo "To fix and retry:" + echo "1. Fix the failing checks" + echo "2. Push changes: git push" + echo "3. Run this command again to re-check CI" +fi +``` + + +## Key Differences from Default Behavior + +**Old way (Claude gives up):** +``` +Claude: "I've created the PR. CI will take a while, I'll check back later..." +[Chat ends, Claude never checks back] +User: Has to manually check CI and mark ready +``` + +**New way (this command actually waits):** +``` +Claude: "I've created the PR as draft. Now waiting for CI..." +[Actually polls CI status every 15 seconds] +Claude: "CI passed! Marking as ready for review." +[PR is ready, user doesn't have to do anything] +``` + +## Error Handling + +**If CI fails:** +```bash +echo "โŒ CI checks failed. Here are the failures:" +gh pr checks $PR_NUMBER + +echo "" +echo "Common fixes:" +echo "- Linting errors: make format && git push" +echo "- Test failures: See logs at PR URL" +echo "- Use /fix-pr $PR_NUMBER to attempt automated fixes" +``` + +**If no CI configured:** +```bash +if [ "$TOTAL" -eq 0 ] && [ $ELAPSED -gt 60 ]; then + echo "โš ๏ธ No CI checks detected after 60 seconds." + echo "Repository may not have CI configured." + echo "Marking PR as ready for manual review." + gh pr ready $PR_NUMBER +fi +``` + +## Usage Examples + +**Basic:** +```bash +# Make changes, commit +git add . +git commit -m "Add feature" + +# Create PR (command waits for CI) +/create-pr +``` + +**With custom title:** +```bash +# Command can accept optional arguments +/create-pr "Add California EITC implementation" +``` + +**Checking existing PR:** +```bash +# If PR exists, command tells you and suggests alternatives +/create-pr +# Output: "PR #123 exists. Use /review-pr 123 or /fix-pr 123" +``` + +## Integration with Other Commands + +**After /encode-policy or /fix-pr:** +```bash +# These commands might push code +# Use /create-pr to create PR and wait for CI +/create-pr +``` + +**After fixing CI issues:** +```bash +# Fix issues locally +make format +git add . +git commit -m "Fix linting" +git push + +# Command will detect existing PR and re-check CI +/create-pr # "PR #123 exists, checking CI status..." +``` + +## CI Duration Expectations + +**By repository (based on actual data):** +- **policyengine-app:** 3-6 minutes (fast) +- **policyengine-uk:** 9-12 minutes (medium) +- **policyengine-api:** ~30 minutes (slow) +- **policyengine-us:** 60-75 minutes (very slow - population simulations) + +**The command has no timeout** - it will wait as long as needed, even for policyengine-us's 75-minute CI runs. + +## Why This Works + +**Problem:** Claude's internal timeout or context leads it to give up + +**Solution:** +1. โœ… Explicit polling loop (Claude sees the code will wait) +2. โœ… Clear status updates (Claude knows it's still working) +3. โœ… Timeout is explicit (Claude knows how long to wait) +4. โœ… Fallback handling (What to do if timeout occurs) + +**Key insight:** By having the command contain the waiting logic in bash, Claude executes it and actually waits instead of just saying "I'll wait." + +## Notes + +**This command is essential for:** +- Automated PR workflows +- CI-dependent merges +- Team workflows requiring CI validation +- Reducing manual PR management + +**Use this instead of:** +- Manual `gh pr create` followed by manual CI checking +- Asking Claude to "wait for CI" (which it can't do reliably) +- Creating ready PRs before CI passes diff --git a/commands/encode-policy.md b/commands/encode-policy.md new file mode 100644 index 0000000..4e5aa9a --- /dev/null +++ b/commands/encode-policy.md @@ -0,0 +1,303 @@ +--- +description: Orchestrates multi-agent workflow to implement new government benefit programs +--- + +# Implementing $ARGUMENTS in PolicyEngine + +Coordinate the multi-agent workflow to implement $ARGUMENTS as a complete, production-ready government benefit program. + +## Program Type Detection + +This workflow adapts based on the type of program being implemented: + +**TANF/Benefit Programs** (e.g., state TANF, SNAP, WIC): +- Phase 4: test-creator creates both unit and integration tests +- Phase 7: Uses specialized @complete:country-models:tanf-program-reviewer agent in parallel validation +- Optional phases: May be skipped for simplified implementations + +**Other Government Programs** (e.g., tax credits, deductions): +- Phase 4: test-creator creates both unit and integration tests +- Phase 7: Uses general @complete:country-models:implementation-validator agent in parallel validation +- Optional phases: Include based on production requirements + +## Phase 0: Implementation Approach (TANF Programs Only) + +**For TANF programs, detect implementation approach from $ARGUMENTS:** + +**Auto-detect from user's request:** +- If $ARGUMENTS contains "simple" or "simplified" โ†’ Use **Simplified** approach +- If $ARGUMENTS contains "full" or "complete" โ†’ Use **Full** approach +- If unclear โ†’ Default to **Simplified** approach + +**Simplified Implementation:** +- Use federal baseline for gross income, demographic eligibility, immigration eligibility +- Faster implementation +- Suitable for most states that follow federal definitions +- Only creates state-specific variables for: income limits, disregards, benefit amounts, final calculation + +**Full Implementation:** +- Create state-specific income definitions, eligibility criteria +- More detailed implementation +- Required when state has unique income definitions or eligibility rules +- Creates all state-specific variables + +**Record the detected approach and pass it to Phase 4 (rules-engineer).** + +## Phase 1: Issue and PR Setup + +Invoke @complete:country-models:issue-manager agent to: +- Search for existing issue or create new one for $ARGUMENTS +- Create draft PR immediately in PolicyEngine/policyengine-us repository (NOT personal fork) +- Return issue number and PR URL for tracking + +## Phase 2: Variable Naming Convention +Invoke @complete:country-models:naming-coordinator agent to: +- Analyze existing naming patterns in the codebase +- Establish variable naming convention for $ARGUMENTS +- Analyze existing folder structure patterns in the codebase +- Post naming decisions and folder structure to GitHub issue for all agents to reference + +**Quality Gate**: Naming convention and folder structure must be documented before proceeding to ensure consistency across parallel development. + +## Phase 3: Document Collection + +**Phase 3A: Initial Document Gathering** + +Invoke @complete:country-models:document-collector agent to gather official $ARGUMENTS documentation, save as `working_references.md` in the repository, and post to GitHub issue + +**After agent completes:** +1. Check the agent's report for "๐Ÿ“„ PDFs Requiring Extraction" section +2. **Decision Point:** + - **If PDFs are CRITICAL** (State Plans with benefit formulas, calculation methodology): + - Ask the user: "Please send me these PDF URLs so I can extract their content:" + - List each PDF URL on a separate line + - Wait for user to send the URLs (they will auto-extract) + - Proceed to Phase 3B + - **If PDFs are SUPPLEMENTARY** (additional reference, not essential): + - Note them for future reference + - Proceed directly to Phase 4 with current documentation + +3. **If no PDFs listed:** + - Skip to Phase 4 (documentation complete) + +**Phase 3B: PDF Extraction & Complete Documentation** (Only if CRITICAL PDFs found) + +1. After receiving extracted PDF content from user: +2. Relaunch @complete:country-models:document-collector agent with: + - Original task description + - Extracted PDF content included in prompt + - Instruction: "You are in Phase 2 - integrate this PDF content with your HTML research" +3. Agent creates complete documentation + +**Quality Gate**: Documentation must include: +- Official program guidelines or state plan +- Income limits and benefit schedules +- Eligibility criteria and priority groups +- Seasonal/temporal rules if applicable +- โœ… All critical PDFs extracted and integrated (if applicable) + +## Phase 4: Development (Parallel on Same Branch) + +Run both agents IN PARALLEL - they work on different folders so no conflicts: + +**@complete:country-models:test-creator** โ†’ works in `tests/` folder: +- Create comprehensive INTEGRATION tests from documentation +- Create UNIT tests for each variable that will have a formula +- Both test types created in ONE invocation +- Use only existing PolicyEngine variables +- Test realistic calculations based on documentation + +**@complete:country-models:rules-engineer** โ†’ works in `variables/` + `parameters/` folders: +- **Implementation Approach:** [Pass the decision from Phase 0: "simplified" or "full"] + - **If Simplified TANF:** Do NOT create state-specific gross income variables - use federal baseline (`tanf_gross_earned_income`, `tanf_gross_unearned_income`) + - **If Full TANF:** Create complete state-specific income definitions as needed +- **Step 1:** Create all parameters first using embedded parameter-architect patterns + - Complete parameter structure with all thresholds, amounts, rates + - Include proper references from documentation +- **Step 2:** Implement variables using the parameters + - Zero hard-coded values + - Complete implementations only + - Follow simplified/full approach from Phase 0 + +**Quality Requirements**: +- rules-engineer: ZERO hard-coded values, parameters created before variables +- test-creator: All tests (unit + integration) created together, based purely on documentation + +## Phase 5: Pre-Push Validation +Invoke @complete:country-models:pr-pusher agent to: +- Ensure changelog entry exists +- Run formatters (black, isort) +- Fix any linting issues +- Run local tests for quick validation +- Push branch and report initial CI status + +**Quality Gate**: Branch must be properly formatted with changelog before continuing. + +## Optional Enhancement Phases + +**These phases are OPTIONAL and should be considered based on implementation type:** + +### For Production-Ready Implementations: +The following enhancements may be applied to ensure production quality: + +1. **Cross-Program Validation** + - @complete:country-models:cross-program-validator: Check interactions with other benefits + - Prevents benefit cliffs and unintended interactions + +2. **Documentation Enhancement** + - @complete:country-models:documentation-enricher: Add examples and regulatory citations + - Improves maintainability and compliance verification + +3. **Performance Optimization** + - @complete:country-models:performance-optimizer: Vectorize and optimize calculations + - Ensures scalability for large-scale simulations + +**Decision Criteria:** +- **Simplified/Experimental TANF**: Skip these optional phases +- **Production TANF**: Include based on specific requirements +- **Full Production Deployment**: Include all enhancements + +## Phase 6: Validation + +**Run validators to check implementation quality:** + +**@complete:country-models:implementation-validator:** +- Check for hard-coded values in variables +- Verify placeholder or incomplete implementations +- Check federal/state parameter organization +- Assess test quality and coverage +- Identify performance and vectorization issues + +**For TANF/Benefit Programs, also run @complete:country-models:tanf-program-reviewer:** +- Learn from PA TANF and OH OWF reference implementations first +- Validate code formulas against regulations +- Verify test coverage with manual calculations +- Check parameter structure and references +- Focus on: eligibility rules, income disregards, benefit formulas + +**Quality Gate:** Review validator reports before proceeding + +## Phase 7: Local Testing & Fixes +**CRITICAL: ALWAYS invoke @complete:country-models:ci-fixer agent - do NOT manually fix issues** + +Invoke @complete:country-models:ci-fixer agent to: +- Run all tests locally: `policyengine-core test policyengine_us/tests/policy/baseline/gov/states/[STATE]/[PROGRAM] -c policyengine_us -v` +- Identify ALL failing tests +- For each failing test: + - Read the test file to understand expected values + - Read the actual test output to see what was calculated + - Determine root cause: incorrect test expectations OR bug in implementation + - Fix the issue: + - If test expectations are wrong: update the test file with correct values + - If implementation is wrong: fix the variable/parameter code + - Re-run tests to verify fix +- Iterate until ALL tests pass locally +- **Reference Verification** + - Verify all parameters have reference metadata + - Verify all variables have reference fields + - Keep `sources/` folder files for future reference + - If references missing: Create todos for adding them +- Run `make format` before committing fixes +- Push final fixes to PR branch + +**Success Metrics**: +- All tests pass locally (green output) +- All references embedded in code metadata +- `sources/` folder kept for future reference +- Code properly formatted +- Implementation complete and working +- Clean commit history + +## Phase 8: Final Summary + +After all tests pass and references are embedded: +- Update PR description with final implementation status +- Add summary of what was implemented +- Report completion to user +- **Keep PR as draft** - user will mark ready when they choose +- **WORKFLOW COMPLETE** + +## Anti-Patterns This Workflow Prevents + +1. **Hard-coded values**: Rules-engineer enforces parameterization +2. **Incomplete implementations**: Validator catches before PR +3. **Federal/state mixing**: Proper parameter organization enforced +4. **Non-existent variables in tests**: Test creator uses only real variables +5. **Missing edge cases**: Edge-case-generator covers all boundaries +6. **Benefit cliffs**: Cross-program-validator identifies interactions +7. **Poor documentation**: Documentation-enricher adds examples +8. **Performance issues**: Performance-optimizer ensures vectorization +9. **Review delays**: Most issues caught and fixed automatically + +## Execution Instructions + +**YOUR ROLE**: You are an orchestrator ONLY. You must: +1. Invoke agents using the Task tool +2. Wait for their completion +3. Check quality gates +4. **PAUSE and wait for user confirmation before proceeding to next phase** + +**YOU MUST NOT**: +- Write any code yourself +- Fix any issues manually +- Run tests directly +- Edit files + +**Execution Flow (ONE PHASE AT A TIME)**: + +Execute each phase sequentially and **STOP after each phase** to wait for user instructions: + +0. **Phase 0**: Implementation Approach (TANF Programs Only) + - Auto-detect from $ARGUMENTS ("simple"/"simplified" vs. "full"/"complete") + - Default to Simplified if unclear + - Inform user of detected approach + - **STOP - Wait for user to say "continue" or provide adjustments** + +1. **Phase 1**: Issue and PR Setup + - Complete the phase + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +2. **Phase 2**: Variable Naming Convention + - Complete the phase + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +3. **Phase 3**: Document Collection + - Complete the phase + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +4. **Phase 4**: Development (Test + Implementation) + - Pass simplified/full decision to rules-engineer + - Run test-creator and rules-engineer in parallel (different folders) + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +5. **Phase 5**: Pre-Push Validation + - Complete the phase + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +6. **Phase 6**: Validation + - Run validators + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +7. **Phase 7**: Local Testing & Fixes (Including Reference Verification) + - Complete the phase + - Report results + - **STOP - Wait for user to say "continue" or provide adjustments** + +8. **Phase 8**: Final Summary + - Update PR description + - Report final results (keep PR as draft) + - **WORKFLOW COMPLETE** + +**CRITICAL RULES**: +- Do NOT proceed to the next phase until user explicitly says to continue +- After each phase, summarize what was accomplished +- If user provides adjustments, incorporate them before continuing +- All 8 phases are REQUIRED - pausing doesn't mean skipping + +If any agent fails, report the failure but DO NOT attempt to fix it yourself. Wait for user instructions. diff --git a/commands/fix-pr.md b/commands/fix-pr.md new file mode 100644 index 0000000..51a5d60 --- /dev/null +++ b/commands/fix-pr.md @@ -0,0 +1,333 @@ +--- +description: Apply fixes to an existing PR, addressing all review comments and validation issues +--- + +# Fixing PR: $ARGUMENTS + +## Determining Which PR to Review + +First, determine which PR to review based on the arguments: + +```bash +# If no arguments provided, use current branch's PR +if [ -z "$ARGUMENTS" ]; then + CURRENT_BRANCH=$(git branch --show-current) + PR_NUMBER=$(gh pr list --head "$CURRENT_BRANCH" --json number --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then + echo "No PR found for current branch $CURRENT_BRANCH" + exit 1 + fi +# If argument is a number, use it directly +elif [[ "$ARGUMENTS" =~ ^[0-9]+$ ]]; then + PR_NUMBER=$ARGUMENTS +# Otherwise, search for PR by description/title +else + PR_NUMBER=$(gh pr list --search "$ARGUMENTS" --json number,title --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then + echo "No PR found matching: $ARGUMENTS" + exit 1 + fi +fi + +echo "Reviewing PR #$PR_NUMBER" +``` + +Orchestrate agents to review, validate, and fix issues in PR #$PR_NUMBER, addressing all GitHub review comments. + +## โš ๏ธ CRITICAL: Agent Usage is MANDATORY + +**You are a coordinator, NOT an implementer. You MUST:** +1. **NEVER make direct code changes** - always use agents +2. **INVOKE agents with specific, detailed instructions** +3. **WAIT for each agent to complete** before proceeding +4. **VERIFY agent outputs** before moving to next phase + +**If you find yourself using Edit, Write, or MultiEdit directly, STOP and invoke the appropriate agent instead.** + +## Phase 1: PR Analysis +After determining PR_NUMBER above, gather context about the PR and review comments: + +```bash +gh pr view $PR_NUMBER --comments +gh pr checks $PR_NUMBER # Note: CI runs on draft PRs too! +gh pr diff $PR_NUMBER +``` + +Document findings: +- Current CI status (even draft PRs have CI) +- Review comments to address +- Files changed +- Type of implementation (new program, bug fix, enhancement) + +## Phase 2: Enhanced Domain Validation +Run comprehensive validation using specialized agents: + +### Step 1: Domain-Specific Validation +Invoke **implementation-validator** to check: +- Federal/state jurisdiction separation +- Variable naming conventions and duplicates +- Hard-coded value patterns +- Performance optimization opportunities +- Documentation placement +- PolicyEngine-specific patterns + +### Step 2: Reference Validation +Invoke **reference-validator** to verify: +- All parameters have references +- References actually corroborate values +- Federal sources for federal params +- State sources for state params +- Specific citations, not generic links + +### Step 3: Implementation Validation +Invoke **implementation-validator** to check: +- No hard-coded values in formulas +- Complete implementations (no TODOs) +- Proper entity usage +- Correct formula patterns + +Create a comprehensive checklist: +- [ ] Domain validation issues +- [ ] Reference validation issues +- [ ] Implementation issues +- [ ] Review comments to address +- [ ] CI failures to fix + +## Phase 3: Sequential Fix Application + +**CRITICAL: You MUST use agents for ALL fixes. DO NOT make direct edits yourself.** + +Based on issues found, invoke agents IN ORDER to avoid conflicts. + +**Why Sequential**: Unlike initial implementation where we need isolation, PR fixes must be applied sequentially because: +- Each fix builds on the previous one +- Avoids merge conflicts +- Tests need to see the fixed implementation +- Documentation needs to reflect the final code + +**MANDATORY AGENT USAGE - Apply fixes in this exact order:** + +### Step 1: Fix Domain & Parameter Issues +```markdown +YOU MUST INVOKE THESE AGENTS - DO NOT FIX DIRECTLY: + +1. First, invoke implementation-validator: + "Scan all files in this PR and create a comprehensive list of all domain violations and implementation issues" + +2. Then invoke parameter-architect (REQUIRED for ANY hard-coded values): + "Design parameter structure for these hard-coded values found: [list all values] + Create the YAML parameter files with proper federal/state separation" + +3. Then invoke rules-engineer (REQUIRED for code changes): + "Refactor all variables to use the new parameters created by parameter-architect. + Fix all hard-coded values in: [list files]" + +4. Then invoke reference-validator: + "Add proper references to all new parameters created" + +5. ONLY AFTER all agents complete: Commit changes +``` + +### Step 2: Add Missing Tests (MANDATORY) +```markdown +REQUIRED - Must generate tests even if none were failing: + +1. Invoke edge-case-generator: + "Generate boundary tests for all parameters created in Step 1. + Test edge cases for: [list all new parameters]" + +2. Invoke test-creator: + "Create integration tests for the refactored Idaho LIHEAP implementation. + Include tests for all new parameter files created." + +3. VERIFY tests pass before committing +4. Commit test additions +``` + +### Step 3: Enhance Documentation +1. **documentation-enricher**: Add examples and references to updated code +2. Commit documentation improvements + +### Step 4: Optimize Performance (if needed) +1. **performance-optimizer**: Vectorize and optimize calculations +2. Run tests to ensure no regressions +3. Commit optimizations + +### Step 5: Validate Integrations +1. **cross-program-validator**: Check benefit interactions +2. Fix any cliff effects or integration issues found +3. Commit integration fixes + +## Phase 4: Apply Fixes +For each issue identified: + +1. **Read current implementation** + ```bash + gh pr checkout $PR_NUMBER + ``` + +2. **Apply agent-generated fixes** + - Use Edit/MultiEdit for targeted fixes + - Preserve existing functionality + - Add only what's needed + +3. **Verify fixes locally** + ```bash + make test + make format + ``` + +## Phase 5: Address Review Comments +For each GitHub comment: + +1. **Parse comment intent** + - Is it requesting a change? + - Is it asking for clarification? + - Is it pointing out an issue? + +2. **Generate response** + - If change requested: Apply fix and confirm + - If clarification: Add documentation/comment + - If issue: Fix and explain approach + +3. **Post response on GitHub** + ```bash + gh pr comment $PR_NUMBER --body "Addressed: [explanation of fix]" + ``` + +## Phase 6: CI Validation +Invoke ci-fixer to ensure all checks pass: + +1. **Push fixes** + ```bash + git add -A + git commit -m "Address review comments + + - Fixed hard-coded values identified in review + - Added missing tests for edge cases + - Enhanced documentation with examples + - Optimized performance issues + + Addresses comments from @reviewer" + git push + ``` + +2. **Monitor CI** + ```bash + gh pr checks $PR_NUMBER --watch + ``` + +3. **Fix any CI failures** + - Format issues: `make format` + - Test failures: Fix with targeted agents + - Lint issues: Apply corrections + +## Phase 7: Final Review & Summary +Invoke implementation-validator for final validation: +- All comments addressed? +- All tests passing? +- No regressions introduced? + +Post summary comment: +```bash +gh pr comment $PR_NUMBER --body "## Summary of Changes + +### Issues Addressed +โœ… Fixed hard-coded values in [files] +โœ… Added parameterization for [values] +โœ… Enhanced test coverage (+X tests) +โœ… Improved documentation +โœ… All CI checks passing + +### Review Comments Addressed +- @reviewer1: [Issue] โ†’ [Fix applied] +- @reviewer2: [Question] โ†’ [Clarification added] + +### Ready for Re-Review +All identified issues have been addressed. The implementation now: +- Uses parameters for all configurable values +- Has comprehensive test coverage +- Includes documentation with examples +- Passes all CI checks" +``` + +## Command Options + +### Usage Examples +- `/review-pr` - Review PR for current branch +- `/review-pr 6444` - Review PR #6444 +- `/review-pr "Idaho LIHEAP"` - Search for and review PR by title/description + +### Quick Fix Mode +`/review-pr [PR] --quick` +- Only fix CI failures +- Skip comprehensive review +- Focus on getting checks green + +### Deep Review Mode +`/review-pr [PR] --deep` +- Run all validators +- Generate comprehensive tests +- Full documentation enhancement +- Cross-program validation + +### Comment Only Mode +`/review-pr [PR] --comments-only` +- Only address GitHub review comments +- Skip additional validation +- Faster turnaround + +## Success Metrics + +The PR is ready when: +- โœ… All CI checks passing +- โœ… All review comments addressed +- โœ… No hard-coded values +- โœ… Comprehensive test coverage +- โœ… Documentation complete +- โœ… No performance issues + +## Common Review Patterns + +### "Hard-coded value" Comment +1. Identify the value +2. Create parameter with parameter-architect +3. Update implementation with rules-engineer +4. Add test with test-creator + +### "Missing test" Comment +1. Identify the scenario +2. Use edge-case-generator for boundaries +3. Use test-creator for integration tests +4. Verify with implementation-validator + +### "Needs documentation" Comment +1. Use documentation-enricher +2. Add calculation examples +3. Add regulatory references +4. Explain edge cases + +### "Performance issue" Comment +1. Use performance-optimizer +2. Vectorize operations +3. Remove redundant calculations +4. Test with large datasets + +## Error Handling + +If agents produce conflicting fixes: +1. Prioritize fixes that address review comments +2. Ensure no regressions +3. Maintain backward compatibility +4. Document any tradeoffs + +## Pre-Flight Checklist + +Before starting, confirm: +- [ ] I will NOT make direct edits (no Edit/Write/MultiEdit by coordinator) +- [ ] I will invoke agents for ALL changes +- [ ] I will wait for each agent to complete +- [ ] I will generate tests even if current tests pass +- [ ] I will commit after each agent phase + +Start with determining which PR to review, then proceed to Phase 1: Analyze the PR and review comments. \ No newline at end of file diff --git a/commands/review-pr.md b/commands/review-pr.md new file mode 100644 index 0000000..1c3d51c --- /dev/null +++ b/commands/review-pr.md @@ -0,0 +1,420 @@ +--- +description: Review an existing PR and post findings to GitHub (does not make code changes) +--- + +# Reviewing PR: $ARGUMENTS + +**READ-ONLY MODE**: This command analyzes the PR and posts a comprehensive review to GitHub WITHOUT making any code changes. + +Use `/fix-pr` to apply fixes automatically. + +## Important: Avoiding Duplicate Reviews + +**Before posting**, check if you already have an active review on this PR. If you do AND there are no replies from others: +- Delete your old comments +- Post a single updated review +- DO NOT create multiple review comments + +Only post additional reviews if others have engaged with your previous review. + +## Determining Which PR to Review + +First, determine which PR to review based on the arguments: + +```bash +# If no arguments provided, use current branch's PR +if [ -z "$ARGUMENTS" ]; then + CURRENT_BRANCH=$(git branch --show-current) + PR_NUMBER=$(gh pr list --head "$CURRENT_BRANCH" --json number --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then + echo "No PR found for current branch $CURRENT_BRANCH" + exit 1 + fi +# If argument is a number, use it directly +elif [[ "$ARGUMENTS" =~ ^[0-9]+$ ]]; then + PR_NUMBER=$ARGUMENTS +# Otherwise, search for PR by description/title +else + PR_NUMBER=$(gh pr list --search "$ARGUMENTS" --json number,title --jq '.[0].number') + if [ -z "$PR_NUMBER" ]; then + echo "No PR found matching: $ARGUMENTS" + exit 1 + fi +fi + +echo "Reviewing PR #$PR_NUMBER" +``` + +## Phase 1: PR Analysis + +Gather context about the PR: + +```bash +gh pr view $PR_NUMBER --comments +gh pr checks $PR_NUMBER +gh pr diff $PR_NUMBER +``` + +Document findings: +- Current CI status +- Existing review comments +- Files changed +- Type of implementation (new program, bug fix, enhancement) + +## Phase 2: Comprehensive Validation (Read-Only) + +Run validators to COLLECT issues (do not fix): + +### Step 1: Domain Validation +Invoke **implementation-validator** to identify: +- Federal/state jurisdiction issues +- Variable naming violations +- Hard-coded values +- Performance optimization opportunities +- Documentation gaps +- PolicyEngine pattern violations + +**IMPORTANT**: Instruct agent to ONLY report issues, not fix them. + +### Step 2: Reference Validation +Invoke **reference-validator** to check: +- Missing parameter references +- References that don't corroborate values +- Incorrect source types (federal vs state) +- Generic/vague citations + +### Step 3: Implementation Validation +Invoke **implementation-validator** to identify: +- Hard-coded values in formulas +- Incomplete implementations (TODOs, stubs) +- Entity usage errors +- Formula pattern violations + +### Step 4: Test Coverage Analysis +Invoke **edge-case-generator** to identify: +- Missing boundary tests +- Untested edge cases +- Parameter combinations not tested + +### Step 5: Documentation Review +Invoke **documentation-enricher** to find: +- Missing docstrings +- Lack of calculation examples +- Missing regulatory references +- Unclear variable descriptions + +## Phase 3: Collect and Organize Findings + +Aggregate all issues into structured format: + +```json +{ + "overall_summary": "Brief summary of PR quality and main concerns", + "severity": "APPROVE|COMMENT|REQUEST_CHANGES", + "line_comments": [ + { + "path": "policyengine_us/variables/gov/states/ma/dta/ccfa/income.py", + "line": 42, + "body": "๐Ÿ’ก **Hard-coded value detected**\n\nThis value should be parameterized:\n\n```suggestion\nmax_income = parameters(period).gov.states.ma.dta.ccfa.max_income\n```\n\n๐Ÿ“š See: [Parameter Guidelines](https://github.com/PolicyEngine/policyengine-us/blob/master/CLAUDE.md#parameter-structure-best-practices)" + }, + { + "path": "policyengine_us/tests/variables/gov/states/ma/dta/ccfa/test_eligibility.yaml", + "line": 15, + "body": "๐Ÿงช **Missing edge case test**\n\nConsider adding a test for income exactly at the threshold:\n\n```yaml\n- name: At threshold\n period: 2024-01\n input:\n household_income: 75_000\n output:\n ma_ccfa_eligible: true\n```" + } + ], + "general_comments": [ + "Consider adding integration tests for multi-child households", + "Documentation could benefit from real-world calculation examples" + ] +} +``` + +## Phase 4: Check for Existing Reviews + +**IMPORTANT**: Before posting a new review, check if you already have an active review: + +```bash +# Check for existing reviews from you (Claude) +EXISTING_REVIEWS=$(gh api "/repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews" \ + --jq '[.[] | select(.user.login == "MaxGhenis") | {id: .id, state: .state, submitted_at: .submitted_at}]') + +# Check if there are any comments/replies from others since your last review +LATEST_REVIEW_TIME=$(echo "$EXISTING_REVIEWS" | jq -r '.[0].submitted_at // empty') + +if [ -n "$LATEST_REVIEW_TIME" ]; then + # Check for comments after your review + COMMENTS_AFTER=$(gh api "/repos/{owner}/{repo}/issues/$PR_NUMBER/comments" \ + --jq "[.[] | select(.created_at > \"$LATEST_REVIEW_TIME\" and .user.login != \"MaxGhenis\")]") + + if [ -z "$COMMENTS_AFTER" ] || [ "$COMMENTS_AFTER" == "[]" ]; then + echo "Existing review found with no replies from others" + echo "STRATEGY: Delete old comments and post single updated review" + + # Delete old comment-only reviews/comments + gh api "/repos/{owner}/{repo}/issues/$PR_NUMBER/comments" \ + --jq '.[] | select(.user.login == "MaxGhenis") | .id' | \ + while read comment_id; do + gh api --method DELETE "/repos/{owner}/{repo}/issues/comments/$comment_id" + done + else + echo "Others have replied to your review - will add new review" + fi +fi +``` + +## Phase 5: Post GitHub Review + +Post the review using GitHub CLI: + +```bash +# Create review comments JSON +cat > /tmp/review_comments.json <<'EOF' +[FORMATTED_COMMENTS_ARRAY] +EOF + +# Post the review with inline comments +gh api \ + --method POST \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/{owner}/{repo}/pulls/$PR_NUMBER/reviews" \ + -f body="$OVERALL_SUMMARY" \ + -f event="$SEVERITY" \ + -F comments=@/tmp/review_comments.json +``` + +### Severity Guidelines + +- **APPROVE**: Minor suggestions only, no critical issues +- **COMMENT**: Has issues but not blocking (educational feedback) +- **REQUEST_CHANGES**: Has critical issues that must be fixed: + - Hard-coded values + - Missing tests for core functionality + - Incorrect implementations + - Missing required documentation + +## Phase 6: Post Summary Comment + +Add a comprehensive summary as a regular comment: + +```bash +gh pr comment $PR_NUMBER --body "## ๐Ÿ“‹ Review Summary + +### โœ… Strengths +- [List positive aspects] +- Well-structured variable organization +- Clear parameter names + +### ๐Ÿ” Issues Found + +#### Critical (Must Fix) +- [ ] 3 hard-coded values in income calculations +- [ ] Missing edge case tests for boundary conditions +- [ ] Parameter references not corroborated by sources + +#### Suggestions (Optional) +- [ ] Consider vectorization in \`calculate_benefit()\` +- [ ] Add calculation walkthrough to documentation +- [ ] Extract shared logic into helper variable + +### ๐Ÿ“Š Validation Results +- **Domain Validation**: X issues +- **Reference Validation**: X issues +- **Implementation Validation**: X issues +- **Test Coverage**: X gaps identified +- **Documentation**: X improvements suggested + +### ๐Ÿš€ Next Steps + +To apply these fixes automatically, run: +\`\`\`bash +/fix-pr $PR_NUMBER +\`\`\` + +Or address manually and re-request review when ready. + +--- +๐Ÿ’ก **Tip**: Use \`/fix-pr\` to automatically apply all suggested fixes." +``` + +## Phase 6: CI Status Check + +If CI is failing, include CI failure summary: + +```bash +gh pr checks $PR_NUMBER --json name,status,conclusion \ + --jq '.[] | select(.conclusion == "failure") | "โŒ " + .name' +``` + +Add to review: +``` +### โš ๏ธ CI Failures +- โŒ lint: Black formatting issues +- โŒ test: 3 test failures in test_income.py + +Run `/fix-pr` to automatically fix these issues. +``` + +## Command Options + +### Usage Examples +- `/review-pr` - Review PR for current branch +- `/review-pr 6390` - Review PR #6390 +- `/review-pr "Massachusetts CCFA"` - Search for and review PR by title + +### Validation Modes + +#### Standard Review (Default) +- All validators run +- Comprehensive analysis +- Balanced severity + +#### Quick Review +`/review-pr [PR] --quick` +- Skip deep validation +- Focus on CI failures and critical issues +- Faster turnaround + +#### Strict Review +`/review-pr [PR] --strict` +- Maximum scrutiny +- Flag even minor style issues +- Request changes for any violations + +## Output Format + +The review will include: + +1. **Overall Summary**: High-level assessment +2. **Inline Comments**: Specific issues at exact lines +3. **Code Suggestions**: GitHub suggestion blocks where applicable +4. **Summary Comment**: Checklist of all issues +5. **Next Steps**: How to address findings + +## Review Categories + +### ๐Ÿ”ด Critical Issues (Always flag) +- Hard-coded values in formulas +- Missing parameter references +- Incorrect formula implementations +- Missing tests for core functionality +- CI failures + +### ๐ŸŸก Suggestions (Optional improvements) +- Performance optimizations +- Documentation enhancements +- Code style improvements +- Additional test coverage +- Refactoring opportunities + +### ๐ŸŸข Positive Feedback +- Well-implemented patterns +- Good test coverage +- Clear documentation +- Proper parameterization + +## Success Criteria + +A good review should: +- โœ… Identify all hard-coded values +- โœ… Flag missing/incorrect references +- โœ… Suggest specific improvements with examples +- โœ… Provide actionable next steps +- โœ… Be respectful and constructive +- โœ… Include code suggestion blocks for easy application + +## Common Review Patterns + +### Hard-coded Value +```markdown +๐Ÿ’ก **Hard-coded value detected** + +This value should be parameterized. Create a parameter: + +\`\`\`yaml +# policyengine_us/parameters/gov/states/ma/program/threshold.yaml +values: + 2024-01-01: 75000 +metadata: + unit: currency-USD + period: year + reference: [Link to source] +\`\`\` + +Then use in formula: +\`\`\`suggestion +threshold = parameters(period).gov.states.ma.program.threshold +\`\`\` +``` + +### Missing Test +```markdown +๐Ÿงช **Missing edge case test** + +Add test for boundary condition: + +\`\`\`yaml +- name: At exact threshold + period: 2024-01 + input: + income: 75_000 + output: + eligible: false # Just above threshold +\`\`\` +``` + +### Missing Documentation +```markdown +๐Ÿ“š **Documentation enhancement** + +Add docstring with example: + +\`\`\`suggestion +def formula(person, period, parameters): + """ + Calculate benefit amount. + + Example: + Income $50,000 โ†’ Benefit $2,400 + Income $75,000 โ†’ Benefit $1,200 + + Reference: MA DTA Regulation 106 CMR 702.310 + """ +\`\`\` +``` + +### Performance Issue +```markdown +โšก **Vectorization opportunity** + +Replace loop with vectorized operation: + +\`\`\`suggestion +return where( + household_size == 1, + base_amount, + base_amount * household_size * 0.7 +) +\`\`\` +``` + +## Error Handling + +If validation agents fail: +1. Run basic validation manually +2. Document what couldn't be checked +3. Note limitations in review +4. Still post whatever findings were collected + +## Pre-Flight Checklist + +Before starting, confirm: +- [ ] I will NOT make any code changes +- [ ] I will ONLY analyze and post reviews +- [ ] I will collect findings from all validators +- [ ] I will format findings as GitHub review comments +- [ ] I will provide actionable suggestions +- [ ] I will be constructive and respectful + +Start by determining which PR to review, then proceed to Phase 1: Analyze the PR. diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..ffccf35 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,193 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:PolicyEngine/policyengine-claude:", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "13c5035179bc27935c58858f6e97708d440b6e3c", + "treeHash": "8fc6cc7ff1f163d96ca4369b313f98b29e583aadc5c5efe5252d7d2f13b61891", + "generatedAt": "2025-11-28T10:12:38.453446Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "complete", + "description": "Complete PolicyEngine knowledge base - all agents, commands, and skills for users, analysts, and contributors", + "version": null + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "908a6f4171272fbbe297e04ce039a3b0d02289f721887aac91112082bf368e8f" + }, + { + "path": "agents/app/app-reviewer.md", + "sha256": "95c67da648c7fc27d4cebec101e5f4c6a5976271cc4f525419014085dd4ab6e2" + }, + { + "path": "agents/api/api-reviewer.md", + "sha256": "9dc83ac9cfd286b86d5d2f9129d5d45a3c93a4b003c1ee7c82e1b254b03e68f7" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "ee60b86e3d20c06b6ad14f3eb2af53c022876fc0a95aada7164c29274529c5f7" + }, + { + "path": "commands/review-pr.md", + "sha256": "dc9274ee5df309da54d80998352781dd0581b5991a1d27e03aba6484133fdcea" + }, + { + "path": "commands/encode-policy.md", + "sha256": "c3f212f20f0300dad41ebe6ba9ae07261e1157e67cc7f1b8e62bd4e160dfb9d4" + }, + { + "path": "commands/fix-pr.md", + "sha256": "93f75c3ce650cc05c155a79e08d1b92768240642beb04db167f849c674ab130f" + }, + { + "path": "commands/create-pr.md", + "sha256": "1433d38a7a12ca82d5b23190d168e68b62e0a63b96779a8a959fa484d477be0e" + }, + { + "path": "skills/documentation/policyengine-writing-skill/SKILL.md", + "sha256": "7b1b6dfecb7db0cfde2cc87548e5292c4851c1d9062b549907cbcd9959d7fb19" + }, + { + "path": "skills/documentation/policyengine-user-guide-skill/SKILL.md", + "sha256": "f5f30d5af0d986de0350b16ab8220d3536040ec7863f179e792bde03e6aabbf1" + }, + { + "path": "skills/documentation/policyengine-standards-skill/SKILL.md", + "sha256": "8d59f51c19d85bfd2d3cba44e51e33a2061e34bc7523a10a0811a0919e76aa5b" + }, + { + "path": "skills/documentation/policyengine-design-skill/SKILL.md", + "sha256": "ca0653af9eda135b5f98c64446b35474ba7bd641dae111c666e6f6995e39c70c" + }, + { + "path": "skills/technical-patterns/policyengine-testing-patterns-skill/SKILL.md", + "sha256": "2d8b20214f3b26c55baa62d0782053c88dec97a8b29ed34c9614069c406ee517" + }, + { + "path": "skills/technical-patterns/policyengine-implementation-patterns-skill/SKILL.md", + "sha256": "12443716f602e8e47fac766d578b712bd9e0e749670a756e7c75ef6aebc5ff09" + }, + { + "path": "skills/technical-patterns/policyengine-review-patterns-skill/SKILL.md", + "sha256": "f7796a1c01d3dcc45bcdd65a27faffd7d0c59c19342fefdbfab90f333d05a86d" + }, + { + "path": "skills/technical-patterns/policyengine-period-patterns-skill/SKILL.md", + "sha256": "efaf84453f5a7465343091964bae4cf0f5431fef9ad887c3eaeeaa6b95b9976b" + }, + { + "path": "skills/technical-patterns/policyengine-parameter-patterns-skill/SKILL.md", + "sha256": "61c27c8aeb6983245aec9f84194460538645b8ca53af2909953e0fe1b673ceae" + }, + { + "path": "skills/technical-patterns/policyengine-vectorization-skill/SKILL.md", + "sha256": "278f84ac51b35a1ef94e9b2beaf15003d8a233cbc48476dd3d08ecde1bf9d105" + }, + { + "path": "skills/technical-patterns/policyengine-aggregation-skill/SKILL.md", + "sha256": "ec8e8cba63ad25e08d615ff8f0e518eb1310361ba73a55753e6d07b95fd40cc9" + }, + { + "path": "skills/technical-patterns/policyengine-code-style-skill/SKILL.md", + "sha256": "14e358797b62f7af451b21d0a396d7746e53b1a95f61955928c76bce9a030394" + }, + { + "path": "skills/analysis/policyengine-analysis-skill/SKILL.md", + "sha256": "a1079e6448eb0e9cb31b6fcbd3e87f3ed04dd04d99dde95353b948b8c4c5dc06" + }, + { + "path": "skills/analysis/policyengine-analysis-skill/examples/reform_template.py", + "sha256": "6fd0bef010e9555326ac71eae67a4cbd2dec62eddb863f51676f01a925d58c1e" + }, + { + "path": "skills/data-science/l0-skill/SKILL.md", + "sha256": "9b6373e3c70d52bba1db31166eec358e87bf67642964ba5c78c27d3b2ff71030" + }, + { + "path": "skills/data-science/microdf-skill/SKILL.md", + "sha256": "dd450afeb898e26a8936d2fde0cf28b4a5024e1e38cc27ad2674104cf72b43fd" + }, + { + "path": "skills/data-science/microcalibrate-skill/SKILL.md", + "sha256": "a49648cb20c0762d8c9a04ccb19170acb506fbeef93509ccef3316e41b53e912" + }, + { + "path": "skills/data-science/microimpute-skill/SKILL.md", + "sha256": "0a2816348b1af9263c2b0574ca6e696d53a69a0afb90caeb5c5a4525f282021c" + }, + { + "path": "skills/domain-knowledge/policyengine-us-skill/SKILL.md", + "sha256": "434d93548e3c792320c2ac4e736ec3327218829df30e3b7f06336bac833b2833" + }, + { + "path": "skills/domain-knowledge/policyengine-us-skill/examples/donation_sweep.yaml", + "sha256": "2d9294d931daa667c66c8a8a011524d15adbafb068f764b3d261086e0774ff7e" + }, + { + "path": "skills/domain-knowledge/policyengine-us-skill/examples/single_filer.yaml", + "sha256": "d14503e102c796e2141d56d85db4405f1681756d58158469f97ebc0a6c0f022f" + }, + { + "path": "skills/domain-knowledge/policyengine-us-skill/scripts/situation_helpers.py", + "sha256": "0fa2858e702ff64bbece06e1b383d9a07a034abb72f5467323aca8c8db40a97d" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/SKILL.md", + "sha256": "02cfc0e284c7d57be4cc265a6b57145f5485022a9b4b29a300e43c575a476890" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/examples/universal_credit_sweep.yaml", + "sha256": "6d8577eded1f047b7c064b4344de3e1cdb85e410f6d3240986fe44f2efef4669" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/examples/couple.yaml", + "sha256": "e1d115fae8f72cea29c00fb2aeb712bff8e2f6ed29ddd6515692695a1a79ffc1" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/examples/family_with_children.yaml", + "sha256": "b6f69b29ae9827b20a2d01c8fc5daf614d1fe875d4d680b7a21273f707671c16" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/examples/single_person.yaml", + "sha256": "5f96d30e99f1e0f834cdcaf575bc260d681314d614f27d975f60d05202606995" + }, + { + "path": "skills/domain-knowledge/policyengine-uk-skill/scripts/situation_helpers.py", + "sha256": "8281fad497f12c98b82a10a5474e502a2e48dd4ece0bdc2ff6da846b9fc12049" + }, + { + "path": "skills/tools-and-apis/policyengine-python-client-skill/SKILL.md", + "sha256": "dff10e9c433461089944a26e3cfac39e0a61754391df41748a97a65dd5c49d79" + }, + { + "path": "skills/tools-and-apis/policyengine-app-skill/SKILL.md", + "sha256": "8e0cf1a5fc6ed8cf22b8017720f0011005e1c4ad3dd19afb8828e3969e6c7ebe" + }, + { + "path": "skills/tools-and-apis/policyengine-api-skill/SKILL.md", + "sha256": "017c98bf207b8374ab6bb7d3ab2eaff3fc8e6358b61461d24e7a68093f8cf2b9" + }, + { + "path": "skills/tools-and-apis/policyengine-core-skill/SKILL.md", + "sha256": "c4b7c24d87a787c018a9595925f5dfa1eed52540ca804cff6d4706dfdca96b33" + } + ], + "dirSha256": "8fc6cc7ff1f163d96ca4369b313f98b29e583aadc5c5efe5252d7d2f13b61891" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/analysis/policyengine-analysis-skill/SKILL.md b/skills/analysis/policyengine-analysis-skill/SKILL.md new file mode 100644 index 0000000..231072f --- /dev/null +++ b/skills/analysis/policyengine-analysis-skill/SKILL.md @@ -0,0 +1,569 @@ +--- +name: policyengine-analysis +description: Common analysis patterns for PolicyEngine research repositories (CRFB, newsletters, dashboards, impact studies) +--- + +# PolicyEngine Analysis + +Patterns for creating policy impact analyses, dashboards, and research using PolicyEngine. + +## For Users ๐Ÿ‘ฅ + +### What are Analysis Repositories? + +Analysis repositories produce the research you see on PolicyEngine: + +**Blog posts:** +- "How Montana's tax cuts affect poverty" +- "Harris EITC proposal costs and impacts" +- "UK Budget 2024 analysis" + +**Dashboards:** +- State tax comparisons +- Policy proposal scorecards +- Interactive calculators (GiveCalc, SALT calculator) + +**Research reports:** +- Distributional analyses for organizations +- Policy briefs for legislators +- Impact assessments + +### How Analysis Works + +1. **Define policy reform** using PolicyEngine parameters +2. **Create household examples** showing specific impacts +3. **Run population simulations** for aggregate effects +4. **Calculate distributional impacts** (who wins, who loses) +5. **Create visualizations** (charts, tables) +6. **Write report** following policyengine-writing-skill style +7. **Publish** to blog or share with stakeholders + +### Reading PolicyEngine Analysis + +**Key sections in typical analysis:** + +**The proposal:** +- What policy changes +- Specific parameter values + +**Household impacts:** +- 3-5 example households +- Dollar amounts for each +- Charts showing impact across income range + +**Statewide/national impacts:** +- Total cost or revenue +- Winners and losers by income decile +- Poverty and inequality effects + +**See policyengine-writing-skill for writing conventions.** + +## For Analysts ๐Ÿ“Š + +### When to Use This Skill + +- Creating policy impact analyses +- Building interactive dashboards with Streamlit/Plotly +- Writing analysis notebooks +- Calculating distributional impacts +- Comparing policy proposals +- Creating visualizations for research +- Publishing policy research + +### Example Analysis Repositories + +- `crfb-tob-impacts` - Policy impact analyses +- `newsletters` - Data-driven newsletters +- `2024-election-dashboard` - Policy comparison dashboards +- `marginal-child` - Specialized policy analyses +- `givecalc` - Charitable giving calculator + +## Repository Structure + +Standard analysis repository structure: + +``` +analysis-repo/ +โ”œโ”€โ”€ analysis.ipynb # Main Jupyter notebook +โ”œโ”€โ”€ app.py # Streamlit app (if applicable) +โ”œโ”€โ”€ requirements.txt # Python dependencies +โ”œโ”€โ”€ README.md # Documentation +โ”œโ”€โ”€ data/ # Data files (if needed) +โ”œโ”€โ”€ outputs/ # Generated charts, tables +โ””โ”€โ”€ .streamlit/ # Streamlit config + โ””โ”€โ”€ config.toml +``` + +## Common Analysis Patterns + +### Pattern 1: Impact Analysis Across Income Distribution + +```python +import pandas as pd +import numpy as np +from policyengine_us import Simulation + +# Define reform +reform = { + "gov.irs.credits.ctc.amount.base_amount": { + "2024-01-01.2100-12-31": 5000 + } +} + +# Analyze across income distribution +incomes = np.linspace(0, 200000, 101) +results = [] + +for income in incomes: + # Baseline + situation = create_situation(income=income) + sim_baseline = Simulation(situation=situation) + tax_baseline = sim_baseline.calculate("income_tax", 2024)[0] + + # Reform + sim_reform = Simulation(situation=situation, reform=reform) + tax_reform = sim_reform.calculate("income_tax", 2024)[0] + + results.append({ + "income": income, + "tax_baseline": tax_baseline, + "tax_reform": tax_reform, + "tax_change": tax_reform - tax_baseline + }) + +df = pd.DataFrame(results) +``` + +### Pattern 2: Household-Level Case Studies + +```python +# Define representative households +households = { + "Single, No Children": { + "income": 40000, + "num_children": 0, + "married": False + }, + "Single Parent, 2 Children": { + "income": 50000, + "num_children": 2, + "married": False + }, + "Married, 2 Children": { + "income": 100000, + "num_children": 2, + "married": True + } +} + +# Calculate impacts for each +case_studies = {} +for name, params in households.items(): + situation = create_family(**params) + + sim_baseline = Simulation(situation=situation) + sim_reform = Simulation(situation=situation, reform=reform) + + case_studies[name] = { + "baseline_tax": sim_baseline.calculate("income_tax", 2024)[0], + "reform_tax": sim_reform.calculate("income_tax", 2024)[0], + "ctc_baseline": sim_baseline.calculate("ctc", 2024)[0], + "ctc_reform": sim_reform.calculate("ctc", 2024)[0] + } + +case_df = pd.DataFrame(case_studies).T +``` + +### Pattern 3: State-by-State Comparison + +```python +states = ["CA", "NY", "TX", "FL", "PA", "OH", "IL", "MI"] + +state_results = [] +for state in states: + situation = create_situation(income=75000, state=state) + + sim_baseline = Simulation(situation=situation) + sim_reform = Simulation(situation=situation, reform=reform) + + state_results.append({ + "state": state, + "baseline_net_income": sim_baseline.calculate("household_net_income", 2024)[0], + "reform_net_income": sim_reform.calculate("household_net_income", 2024)[0], + "change": (sim_reform.calculate("household_net_income", 2024)[0] - + sim_baseline.calculate("household_net_income", 2024)[0]) + }) + +state_df = pd.DataFrame(state_results) +``` + +### Pattern 4: Marginal Analysis (Winners/Losers) + +```python +import plotly.graph_objects as go + +# Calculate across income range +situation_with_axes = { + # ... setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 200000, + "period": 2024 + }]] +} + +sim_baseline = Simulation(situation=situation_with_axes) +sim_reform = Simulation(situation=situation_with_axes, reform=reform) + +incomes = sim_baseline.calculate("employment_income", 2024) +baseline_net = sim_baseline.calculate("household_net_income", 2024) +reform_net = sim_reform.calculate("household_net_income", 2024) + +gains = reform_net - baseline_net + +# Identify winners and losers +winners = gains > 0 +losers = gains < 0 +neutral = gains == 0 + +print(f"Winners: {winners.sum() / len(gains) * 100:.1f}%") +print(f"Losers: {losers.sum() / len(gains) * 100:.1f}%") +print(f"Neutral: {neutral.sum() / len(gains) * 100:.1f}%") +``` + +## Visualization Patterns + +### Standard Plotly Configuration + +```python +import plotly.graph_objects as go + +# PolicyEngine brand colors +TEAL = "#39C6C0" +BLUE = "#2C6496" +DARK_GRAY = "#616161" + +def create_pe_layout(title, xaxis_title, yaxis_title): + """Create standard PolicyEngine chart layout.""" + return go.Layout( + title=title, + xaxis_title=xaxis_title, + yaxis_title=yaxis_title, + font=dict(family="Roboto Serif", size=14), + plot_bgcolor="white", + hovermode="x unified", + xaxis=dict( + showgrid=True, + gridcolor="lightgray", + zeroline=True + ), + yaxis=dict( + showgrid=True, + gridcolor="lightgray", + zeroline=True + ) + ) + +# Use in charts +fig = go.Figure(layout=create_pe_layout( + "Tax Impact by Income", + "Income", + "Tax Change" +)) +fig.add_trace(go.Scatter(x=incomes, y=tax_change, line=dict(color=TEAL))) +``` + +### Common Chart Types + +**1. Line Chart (Impact by Income)** +```python +fig = go.Figure() +fig.add_trace(go.Scatter( + x=df.income, + y=df.tax_change, + mode='lines', + name='Tax Change', + line=dict(color=TEAL, width=3) +)) +fig.update_layout( + title="Tax Impact by Income Level", + xaxis_title="Income", + yaxis_title="Tax Change ($)", + xaxis_tickformat="$,.0f", + yaxis_tickformat="$,.0f" +) +``` + +**2. Bar Chart (State Comparison)** +```python +fig = go.Figure() +fig.add_trace(go.Bar( + x=state_df.state, + y=state_df.change, + marker_color=TEAL +)) +fig.update_layout( + title="Net Income Change by State", + xaxis_title="State", + yaxis_title="Change ($)", + yaxis_tickformat="$,.0f" +) +``` + +**3. Waterfall Chart (Budget Impact)** +```python +fig = go.Figure(go.Waterfall( + x=["Baseline", "Tax Credit", "Phase-out", "Reform"], + y=[baseline_revenue, credit_cost, phaseout_revenue, 0], + measure=["absolute", "relative", "relative", "total"], + connector={"line": {"color": "gray"}} +)) +``` + +## Streamlit Dashboard Patterns + +### Basic Streamlit Setup + +```python +import streamlit as st +from policyengine_us import Simulation + +st.set_page_config(page_title="Policy Analysis", layout="wide") + +st.title("Policy Impact Calculator") + +# User inputs +col1, col2, col3 = st.columns(3) +with col1: + income = st.number_input("Income", value=60000, step=5000) +with col2: + state = st.selectbox("State", ["CA", "NY", "TX", "FL"]) +with col3: + num_children = st.number_input("Children", value=0, min_value=0, max_value=10) + +# Calculate +if st.button("Calculate"): + situation = create_family( + parent_income=income, + num_children=num_children, + state=state + ) + + sim_baseline = Simulation(situation=situation) + sim_reform = Simulation(situation=situation, reform=reform) + + # Display results + col1, col2, col3 = st.columns(3) + with col1: + st.metric( + "Baseline Tax", + f"${sim_baseline.calculate('income_tax', 2024)[0]:,.0f}" + ) + with col2: + st.metric( + "Reform Tax", + f"${sim_reform.calculate('income_tax', 2024)[0]:,.0f}" + ) + with col3: + change = (sim_reform.calculate('income_tax', 2024)[0] - + sim_baseline.calculate('income_tax', 2024)[0]) + st.metric("Change", f"${change:,.0f}", delta=f"${-change:,.0f}") +``` + +### Interactive Chart with Streamlit + +```python +# Create chart based on user inputs +incomes = np.linspace(0, income_max, 1001) +results = [] + +for income in incomes: + situation = create_situation(income=income, state=selected_state) + sim = Simulation(situation=situation, reform=reform) + results.append(sim.calculate("household_net_income", 2024)[0]) + +fig = go.Figure() +fig.add_trace(go.Scatter(x=incomes, y=results, line=dict(color=TEAL))) +st.plotly_chart(fig, use_container_width=True) +``` + +## Jupyter Notebook Best Practices + +### Notebook Structure + +```python +# Cell 1: Title and Description +""" +# Policy Analysis: [Policy Name] + +**Date:** [Date] +**Author:** [Your Name] + +## Summary +Brief description of the analysis and key findings. +""" + +# Cell 2: Imports +import pandas as pd +import numpy as np +import plotly.graph_objects as go +from policyengine_us import Simulation + +# Cell 3: Configuration +YEAR = 2024 +STATES = ["CA", "NY", "TX", "FL"] + +# Cell 4+: Analysis sections with markdown headers +``` + +### Export Results + +```python +# Save DataFrame +df.to_csv("outputs/impact_analysis.csv", index=False) + +# Save Plotly chart +fig.write_html("outputs/chart.html") +fig.write_image("outputs/chart.png", width=1200, height=600) + +# Save summary statistics +summary = { + "total_winners": winners.sum(), + "total_losers": losers.sum(), + "avg_gain": gains[winners].mean(), + "avg_loss": gains[losers].mean() +} +pd.DataFrame([summary]).to_csv("outputs/summary.csv", index=False) +``` + +## Repository-Specific Examples + +This skill includes example templates in the `examples/` directory: + +- `impact_analysis_template.ipynb` - Standard impact analysis +- `dashboard_template.py` - Streamlit dashboard +- `state_comparison.py` - State-by-state analysis +- `case_studies.py` - Household case studies +- `reform_definitions.py` - Common reform patterns + +## Common Pitfalls + +### Pitfall 1: Not Using Consistent Year +**Problem:** Mixing 2024 and 2025 calculations + +**Solution:** Define year constant at top: +```python +CURRENT_YEAR = 2024 +# Use everywhere +simulation.calculate("income_tax", CURRENT_YEAR) +``` + +### Pitfall 2: Inefficient Simulations +**Problem:** Creating new simulation for each income level + +**Solution:** Use axes for efficiency: +```python +# SLOW +for income in incomes: + situation = create_situation(income=income) + sim = Simulation(situation=situation) + results.append(sim.calculate("income_tax", 2024)[0]) + +# FAST +situation_with_axes = create_situation_with_axes(incomes) +sim = Simulation(situation=situation_with_axes) +results = sim.calculate("income_tax", 2024) # Array of all results +``` + +### Pitfall 3: Forgetting to Compare Baseline and Reform +**Problem:** Only showing reform results + +**Solution:** Always show both: +```python +results = { + "baseline": sim_baseline.calculate("income_tax", 2024), + "reform": sim_reform.calculate("income_tax", 2024), + "change": reform - baseline +} +``` + +## PolicyEngine API Usage + +For larger-scale analyses, use the PolicyEngine API: + +```python +import requests + +def calculate_via_api(situation, reform=None): + """Calculate using PolicyEngine API.""" + url = "https://api.policyengine.org/us/calculate" + + payload = { + "household": situation, + "policy_id": reform_id if reform else baseline_policy_id + } + + response = requests.post(url, json=payload) + return response.json() +``` + +## Testing Analysis Code + +```python +import pytest + +def test_reform_increases_ctc(): + """Test that reform increases CTC as expected.""" + situation = create_family(income=50000, num_children=2) + + sim_baseline = Simulation(situation=situation) + sim_reform = Simulation(situation=situation, reform=reform) + + ctc_baseline = sim_baseline.calculate("ctc", 2024)[0] + ctc_reform = sim_reform.calculate("ctc", 2024)[0] + + assert ctc_reform > ctc_baseline, "Reform should increase CTC" + assert ctc_reform == 5000 * 2, "CTC should be $5000 per child" +``` + +## Documentation Standards + +### README Template + +```markdown +# [Analysis Name] + +## Overview +Brief description of the analysis. + +## Key Findings +- Finding 1 +- Finding 2 +- Finding 3 + +## Methodology +Explanation of approach and data sources. + +## How to Run + +\```bash +pip install -r requirements.txt +python app.py # or jupyter notebook analysis.ipynb +\``` + +## Outputs +- `outputs/chart1.png` - Description +- `outputs/results.csv` - Description + +## Contact +PolicyEngine Team - hello@policyengine.org +``` + +## Additional Resources + +- **PolicyEngine API Docs:** https://policyengine.org/us/api +- **Analysis Examples:** https://github.com/PolicyEngine/analysis-notebooks +- **Streamlit Docs:** https://docs.streamlit.io +- **Plotly Docs:** https://plotly.com/python/ diff --git a/skills/analysis/policyengine-analysis-skill/examples/reform_template.py b/skills/analysis/policyengine-analysis-skill/examples/reform_template.py new file mode 100644 index 0000000..5bd541f --- /dev/null +++ b/skills/analysis/policyengine-analysis-skill/examples/reform_template.py @@ -0,0 +1,178 @@ +""" +Template for PolicyEngine reform impact analysis. + +This template provides a starting point for analyzing the impact +of a policy reform across the income distribution. +""" + +import pandas as pd +import numpy as np +import plotly.graph_objects as go +from policyengine_us import Simulation + +# Configuration +CURRENT_YEAR = 2024 +INCOME_MIN = 0 +INCOME_MAX = 200000 +INCOME_STEPS = 101 + +# Define your reform here +REFORM = { + "gov.irs.credits.ctc.amount.base_amount": { + "2024-01-01.2100-12-31": 5000 # Example: Increase CTC to $5,000 + } +} + + +def create_situation(income, num_children=0, state="CA"): + """Create a basic household situation.""" + people = { + "parent": { + "age": {CURRENT_YEAR: 35}, + "employment_income": {CURRENT_YEAR: income} + } + } + + members = ["parent"] + + # Add children + for i in range(num_children): + child_id = f"child_{i+1}" + people[child_id] = {"age": {CURRENT_YEAR: 8}} + members.append(child_id) + + return { + "people": people, + "families": {"family": {"members": members}}, + "marital_units": {"marital_unit": {"members": ["parent"]}}, + "tax_units": {"tax_unit": {"members": members}}, + "spm_units": {"spm_unit": {"members": members}}, + "households": { + "household": { + "members": members, + "state_name": {CURRENT_YEAR: state} + } + } + } + + +def analyze_reform(num_children=2, state="CA"): + """Analyze reform impact across income distribution.""" + incomes = np.linspace(INCOME_MIN, INCOME_MAX, INCOME_STEPS) + results = [] + + for income in incomes: + situation = create_situation( + income=income, + num_children=num_children, + state=state + ) + + # Baseline + sim_baseline = Simulation(situation=situation) + income_tax_baseline = sim_baseline.calculate("income_tax", CURRENT_YEAR)[0] + ctc_baseline = sim_baseline.calculate("ctc", CURRENT_YEAR)[0] + net_income_baseline = sim_baseline.calculate("household_net_income", CURRENT_YEAR)[0] + + # Reform + sim_reform = Simulation(situation=situation, reform=REFORM) + income_tax_reform = sim_reform.calculate("income_tax", CURRENT_YEAR)[0] + ctc_reform = sim_reform.calculate("ctc", CURRENT_YEAR)[0] + net_income_reform = sim_reform.calculate("household_net_income", CURRENT_YEAR)[0] + + results.append({ + "income": income, + "income_tax_baseline": income_tax_baseline, + "income_tax_reform": income_tax_reform, + "ctc_baseline": ctc_baseline, + "ctc_reform": ctc_reform, + "net_income_baseline": net_income_baseline, + "net_income_reform": net_income_reform, + "tax_change": income_tax_reform - income_tax_baseline, + "ctc_change": ctc_reform - ctc_baseline, + "net_income_change": net_income_reform - net_income_baseline + }) + + return pd.DataFrame(results) + + +def create_chart(df, title="Reform Impact Analysis"): + """Create Plotly chart of reform impacts.""" + TEAL = "#39C6C0" + + fig = go.Figure() + + fig.add_trace(go.Scatter( + x=df.income, + y=df.net_income_change, + mode='lines', + name='Net Income Change', + line=dict(color=TEAL, width=3) + )) + + fig.update_layout( + title=title, + xaxis_title="Income", + yaxis_title="Net Income Change ($)", + font=dict(family="Roboto Serif", size=14), + plot_bgcolor="white", + hovermode="x unified", + xaxis=dict( + tickformat="$,.0f", + showgrid=True, + gridcolor="lightgray" + ), + yaxis=dict( + tickformat="$,.0f", + showgrid=True, + gridcolor="lightgray", + zeroline=True, + zerolinecolor="black", + zerolinewidth=1 + ) + ) + + return fig + + +def print_summary(df): + """Print summary statistics.""" + print("\n=== Reform Impact Summary ===\n") + + winners = df[df.net_income_change > 0] + losers = df[df.net_income_change < 0] + + print(f"Winners: {len(winners) / len(df) * 100:.1f}%") + print(f"Losers: {len(losers) / len(df) * 100:.1f}%") + + if len(winners) > 0: + print(f"\nAverage gain (winners): ${winners.net_income_change.mean():,.2f}") + print(f"Max gain: ${df.net_income_change.max():,.2f}") + + if len(losers) > 0: + print(f"\nAverage loss (losers): ${losers.net_income_change.mean():,.2f}") + print(f"Max loss: ${df.net_income_change.min():,.2f}") + + print(f"\nAverage CTC change: ${df.ctc_change.mean():,.2f}") + print(f"Average tax change: ${df.tax_change.mean():,.2f}") + + +if __name__ == "__main__": + # Run analysis + print("Running reform analysis...") + df = analyze_reform(num_children=2, state="CA") + + # Print summary + print_summary(df) + + # Save results + df.to_csv("reform_impact_results.csv", index=False) + print("\nResults saved to reform_impact_results.csv") + + # Create and save chart + fig = create_chart(df) + fig.write_html("reform_impact_chart.html") + print("Chart saved to reform_impact_chart.html") + + # Display chart (if running interactively) + fig.show() diff --git a/skills/data-science/l0-skill/SKILL.md b/skills/data-science/l0-skill/SKILL.md new file mode 100644 index 0000000..ec7f575 --- /dev/null +++ b/skills/data-science/l0-skill/SKILL.md @@ -0,0 +1,277 @@ +--- +name: l0 +description: L0 regularization for neural network sparsification and intelligent sampling - used in survey calibration +--- + +# L0 Regularization + +L0 is a PyTorch implementation of L0 regularization for neural network sparsification and intelligent sampling, used in PolicyEngine's survey calibration pipeline. + +## For Users ๐Ÿ‘ฅ + +### What is L0? + +L0 regularization helps PolicyEngine create more efficient survey datasets by intelligently selecting which households to include in calculations. + +**Impact you see:** +- Faster population impact calculations +- Smaller dataset sizes +- Maintained accuracy with fewer samples + +**Behind the scenes:** +When PolicyEngine shows population-wide impacts, L0 helps select representative households from the full survey, reducing computation time while maintaining accuracy. + +## For Analysts ๐Ÿ“Š + +### What L0 Does + +L0 provides intelligent sampling gates for: +- **Household selection** - Choose representative samples from CPS +- **Feature selection** - Identify important variables +- **Sparse weighting** - Create compact, efficient datasets + +**Used in PolicyEngine for:** +- Survey calibration (via microcalibrate) +- Dataset sparsification in policyengine-us-data +- Efficient microsimulation + +### Installation + +```bash +pip install l0-python +``` + +### Quick Example: Sample Selection + +```python +from l0 import SampleGate + +# Select 1,000 households from 10,000 +gate = SampleGate(n_samples=10000, target_samples=1000) +selected_data, indices = gate.select_samples(data) + +# Gates learn which samples are most informative +``` + +### Integration with microcalibrate + +```python +from l0 import HardConcrete +from microcalibrate import Calibration + +# L0 gates for household selection +gates = HardConcrete( + len(household_weights), + temperature=0.25, + init_mean=0.999 # Start with most households +) + +# Use in calibration +# microcalibrate applies gates during weight optimization +``` + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/L0 + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/L0 +cd L0 +``` + +### Current Implementation + +**To see structure:** +```bash +tree l0/ + +# Key modules: +ls l0/ +# - hard_concrete.py - Core L0 distribution +# - layers.py - L0Linear, L0Conv2d +# - gates.py - Sample/feature gates +# - penalties.py - L0/L2 penalty computation +# - temperature.py - Temperature scheduling +``` + +**To see specific implementations:** +```bash +# Hard Concrete distribution (core algorithm) +cat l0/hard_concrete.py + +# Sample gates (used in calibration) +cat l0/gates.py + +# Neural network layers +cat l0/layers.py +``` + +### Key Concepts + +**Hard Concrete Distribution:** +- Differentiable approximation of L0 norm +- Allows gradient-based optimization +- Temperature controls sparsity level + +**To see implementation:** +```bash +cat l0/hard_concrete.py +``` + +**Sample Gates:** +- Binary gates for sample selection +- Learn which samples are most informative +- Used in microcalibrate for household selection + +**Feature Gates:** +- Select important features/variables +- Reduce dimensionality +- Maintain prediction accuracy + +### Usage in PolicyEngine + +**In microcalibrate (survey calibration):** +```python +from l0 import HardConcrete + +# Create gates for household selection +gates = HardConcrete( + n_items=len(households), + temperature=0.25, + init_mean=0.999 # Start with almost all households +) + +# Gates produce probabilities (0 to 1) +probs = gates() + +# Apply to weights during calibration +masked_weights = weights * probs +``` + +**In policyengine-us-data:** +```bash +# See usage in data pipeline +grep -r "from l0 import" ../policyengine-us-data/ +``` + +### Temperature Scheduling + +**Controls sparsity over training:** +```python +from l0 import TemperatureScheduler, update_temperatures + +scheduler = TemperatureScheduler( + initial_temp=1.0, # Start relaxed + final_temp=0.1, # End sparse + total_epochs=100 +) + +for epoch in range(100): + temp = scheduler.get_temperature(epoch) + update_temperatures(model, temp) + # ... training ... +``` + +**To see implementation:** +```bash +cat l0/temperature.py +``` + +### L0L2 Combined Penalty + +**Prevents overfitting:** +```python +from l0 import compute_l0l2_penalty + +# Combine L0 (sparsity) with L2 (regularization) +penalty = compute_l0l2_penalty( + model, + l0_lambda=1e-3, # Sparsity strength + l2_lambda=1e-4 # Weight regularization +) + +loss = task_loss + penalty +``` + +### Testing + +**Run tests:** +```bash +make test + +# Or +pytest tests/ -v --cov=l0 +``` + +**To see test patterns:** +```bash +cat tests/test_hard_concrete.py +cat tests/test_gates.py +``` + +## Advanced Usage + +### Hybrid Gates (L0 + Random) + +```python +from l0 import HybridGate + +# Combine L0 selection with random sampling +hybrid = HybridGate( + n_items=10000, + l0_fraction=0.25, # 25% from L0 + random_fraction=0.75, # 75% random + target_items=1000 +) + +selected, indices, types = hybrid.select(data) +``` + +### Feature Selection + +```python +from l0 import FeatureGate + +# Select top features +gate = FeatureGate(n_features=1000, max_features=50) +selected_data, feature_indices = gate.select_features(data) + +# Get feature importance +importance = gate.get_feature_importance() +``` + +## Mathematical Background + +**L0 norm:** +- Counts non-zero elements +- Non-differentiable (discontinuous) +- Hard to optimize directly + +**Hard Concrete relaxation:** +- Continuous, differentiable approximation +- Enables gradient descent +- "Stretches" binary distribution to allow gradients + +**Paper:** +Louizos, Welling, & Kingma (2017): "Learning Sparse Neural Networks through L0 Regularization" +https://arxiv.org/abs/1712.01312 + +## Related Packages + +**Uses L0:** +- microcalibrate (survey weight calibration) +- policyengine-us-data (household selection) + +**See also:** +- **microcalibrate-skill** - Survey calibration using L0 +- **policyengine-us-data-skill** - Data pipeline integration + +## Resources + +**Repository:** https://github.com/PolicyEngine/L0 +**Documentation:** https://policyengine.github.io/L0/ +**Paper:** https://arxiv.org/abs/1712.01312 +**PyPI:** https://pypi.org/project/l0-python/ diff --git a/skills/data-science/microcalibrate-skill/SKILL.md b/skills/data-science/microcalibrate-skill/SKILL.md new file mode 100644 index 0000000..b0ce878 --- /dev/null +++ b/skills/data-science/microcalibrate-skill/SKILL.md @@ -0,0 +1,404 @@ +--- +name: microcalibrate +description: Survey weight calibration to match population targets - used in policyengine-us-data for enhanced microdata +--- + +# MicroCalibrate + +MicroCalibrate calibrates survey weights to match population targets, with L0 regularization for sparsity and automatic hyperparameter tuning. + +## For Users ๐Ÿ‘ฅ + +### What is MicroCalibrate? + +When you see PolicyEngine population impacts, the underlying data has been "calibrated" using MicroCalibrate to match official population statistics. + +**What calibration does:** +- Adjusts survey weights to match known totals (population, income, employment) +- Creates representative datasets +- Reduces dataset size while maintaining accuracy +- Ensures PolicyEngine estimates match administrative data + +**Example:** +- Census says US has 331 million people +- Survey has 100,000 households representing the population +- MicroCalibrate adjusts weights so survey totals match census totals +- Result: More accurate PolicyEngine calculations + +## For Analysts ๐Ÿ“Š + +### Installation + +```bash +pip install microcalibrate +``` + +### What MicroCalibrate Does + +**Calibration problem:** +You have survey data with initial weights, and you know certain population totals (benchmarks). Calibration adjusts weights so weighted survey totals match benchmarks. + +**Example:** +```python +from microcalibrate import Calibration +import numpy as np +import pandas as pd + +# Survey data (1,000 households) +weights = np.ones(1000) # Initial weights + +# Estimates (how much each household contributes to targets) +estimate_matrix = pd.DataFrame({ + 'total_income': household_incomes, # Each household's income + 'total_employed': household_employment # 1 if employed, 0 if not +}) + +# Known population targets (benchmarks) +targets = np.array([ + 50_000_000, # Total income in population + 600, # Total employed people +]) + +# Calibrate +cal = Calibration( + weights=weights, + targets=targets, + estimate_matrix=estimate_matrix, + l0_lambda=0.01 # Sparsity penalty +) + +# Optimize weights +new_weights = cal.calibrate(max_iter=1000) + +# Check results +achieved = (estimate_matrix.values.T @ new_weights) +print(f"Target: {targets}") +print(f"Achieved: {achieved}") +print(f"Non-zero weights: {(new_weights > 0).sum()} / {len(weights)}") +``` + +### L0 Regularization for Sparsity + +**Why sparsity matters:** +- Reduces dataset size (fewer households to simulate) +- Faster PolicyEngine calculations +- Easier to validate and understand + +**L0 penalty:** +```python +# L0 encourages many weights to be exactly zero +cal = Calibration( + weights=weights, + targets=targets, + estimate_matrix=estimate_matrix, + l0_lambda=0.01 # Higher = more sparse +) +``` + +**To see impact:** +```python +# Without L0 +cal_dense = Calibration(..., l0_lambda=0.0) +weights_dense = cal_dense.calibrate() + +# With L0 +cal_sparse = Calibration(..., l0_lambda=0.01) +weights_sparse = cal_sparse.calibrate() + +print(f"Dense: {(weights_dense > 0).sum()} households") +print(f"Sparse: {(weights_sparse > 0).sum()} households") +# Sparse might use 60% fewer households while matching same targets +``` + +### Automatic Hyperparameter Tuning + +**Find optimal l0_lambda:** +```python +from microcalibrate import tune_hyperparameters + +# Find best l0_lambda using cross-validation +best_lambda, results = tune_hyperparameters( + weights=weights, + targets=targets, + estimate_matrix=estimate_matrix, + lambda_min=1e-4, + lambda_max=1e-1, + n_trials=50 +) + +print(f"Best lambda: {best_lambda}") +``` + +### Robustness Evaluation + +**Test calibration stability:** +```python +from microcalibrate import evaluate_robustness + +# Holdout validation +robustness = evaluate_robustness( + weights=weights, + targets=targets, + estimate_matrix=estimate_matrix, + l0_lambda=0.01, + n_folds=5 +) + +print(f"Mean error: {robustness['mean_error']}") +print(f"Std error: {robustness['std_error']}") +``` + +### Interactive Dashboard + +**Visualize calibration:** +https://microcalibrate.vercel.app/ + +Features: +- Upload survey data +- Set targets +- Tune hyperparameters +- View results +- Download calibrated weights + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/microcalibrate + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/microcalibrate +cd microcalibrate +``` + +### Current Implementation + +**To see structure:** +```bash +tree microcalibrate/ + +# Key modules: +ls microcalibrate/ +# - calibration.py - Main Calibration class +# - hyperparameter_tuning.py - Optuna integration +# - evaluation.py - Robustness testing +# - target_analysis.py - Target diagnostics +``` + +**To see specific implementations:** +```bash +# Main calibration algorithm +cat microcalibrate/calibration.py + +# Hyperparameter tuning +cat microcalibrate/hyperparameter_tuning.py + +# Robustness evaluation +cat microcalibrate/evaluation.py +``` + +### Dependencies + +**Required:** +- torch (PyTorch for optimization) +- l0-python (L0 regularization) +- optuna (hyperparameter tuning) +- numpy, pandas, tqdm + +**To see all dependencies:** +```bash +cat pyproject.toml +``` + +### How MicroCalibrate Uses L0 + +```python +# Internal to microcalibrate +from l0 import HardConcrete + +# Create gates for sample selection +gates = HardConcrete( + n_items=len(weights), + temperature=temperature, + init_mean=0.999 +) + +# Apply gates during optimization +effective_weights = weights * gates() + +# L0 penalty encourages gates โ†’ 0 or 1 +# Result: Many households get weight = 0 (sparse) +``` + +**To see L0 integration:** +```bash +grep -n "HardConcrete\|l0" microcalibrate/calibration.py +``` + +### Optimization Algorithm + +**Iterative reweighting:** +1. Start with initial weights +2. Apply L0 gates (select samples) +3. Optimize to match targets +4. Apply penalty for sparsity +5. Iterate until convergence + +**Loss function:** +```python +# Target matching loss +target_loss = sum((achieved_targets - desired_targets)^2) + +# L0 penalty (number of non-zero weights) +l0_penalty = l0_lambda * count_nonzero(weights) + +# Total loss +total_loss = target_loss + l0_penalty +``` + +### Testing + +**Run tests:** +```bash +make test + +# Or +pytest tests/ -v +``` + +**To see test patterns:** +```bash +cat tests/test_calibration.py +cat tests/test_hyperparameter_tuning.py +``` + +### Usage in policyengine-us-data + +**To see how data pipeline uses microcalibrate:** +```bash +cd ../policyengine-us-data + +# Find usage +grep -r "microcalibrate" policyengine_us_data/ +grep -r "Calibration" policyengine_us_data/ +``` + +## Common Patterns + +### Pattern 1: Basic Calibration + +```python +from microcalibrate import Calibration + +cal = Calibration( + weights=initial_weights, + targets=benchmark_values, + estimate_matrix=contributions, + l0_lambda=0.01 +) + +calibrated_weights = cal.calibrate(max_iter=1000) +``` + +### Pattern 2: With Hyperparameter Tuning + +```python +from microcalibrate import tune_hyperparameters, Calibration + +# Find best lambda +best_lambda, results = tune_hyperparameters( + weights=weights, + targets=targets, + estimate_matrix=estimate_matrix +) + +# Use best lambda +cal = Calibration(..., l0_lambda=best_lambda) +calibrated_weights = cal.calibrate() +``` + +### Pattern 3: Multi-Target Calibration + +```python +# Multiple population targets +estimate_matrix = pd.DataFrame({ + 'total_population': population_counts, + 'total_income': incomes, + 'total_employed': employment_indicators, + 'total_children': child_counts +}) + +targets = np.array([ + 331_000_000, # US population + 15_000_000_000_000, # Total income + 160_000_000, # Employed people + 73_000_000 # Children +]) + +cal = Calibration(weights, targets, estimate_matrix, l0_lambda=0.01) +``` + +## Performance Considerations + +**Calibration speed:** +- 1,000 households, 5 targets: ~1 second +- 100,000 households, 10 targets: ~30 seconds +- Depends on: dataset size, number of targets, l0_lambda + +**Memory usage:** +- PyTorch tensors for optimization +- Scales linearly with dataset size + +**To profile:** +```python +import time + +start = time.time() +weights = cal.calibrate() +print(f"Calibration took {time.time() - start:.1f}s") +``` + +## Troubleshooting + +**Common issues:** + +**1. Calibration not converging:** +```python +# Try: +# - More iterations +# - Lower l0_lambda +# - Better initialization + +cal = Calibration(..., l0_lambda=0.001) # Lower sparsity penalty +weights = cal.calibrate(max_iter=5000) # More iterations +``` + +**2. Targets not matching:** +```python +# Check achieved vs desired +achieved = (estimate_matrix.values.T @ weights) +error = np.abs(achieved - targets) / targets +print(f"Relative errors: {error}") + +# If large errors, l0_lambda may be too high +``` + +**3. Too sparse (all weights zero):** +```python +# Lower l0_lambda +cal = Calibration(..., l0_lambda=0.0001) +``` + +## Related Skills + +- **l0-skill** - Understanding L0 regularization +- **policyengine-us-data-skill** - How calibration fits in data pipeline +- **microdf-skill** - Working with calibrated survey data + +## Resources + +**Repository:** https://github.com/PolicyEngine/microcalibrate +**Dashboard:** https://microcalibrate.vercel.app/ +**PyPI:** https://pypi.org/project/microcalibrate/ +**Paper:** Louizos et al. (2017) on L0 regularization diff --git a/skills/data-science/microdf-skill/SKILL.md b/skills/data-science/microdf-skill/SKILL.md new file mode 100644 index 0000000..40a0d9c --- /dev/null +++ b/skills/data-science/microdf-skill/SKILL.md @@ -0,0 +1,329 @@ +--- +name: microdf +description: Weighted pandas DataFrames for survey microdata analysis - inequality, poverty, and distributional calculations +--- + +# MicroDF + +MicroDF provides weighted pandas DataFrames and Series for analyzing survey microdata, with built-in support for inequality and poverty calculations. + +## For Users ๐Ÿ‘ฅ + +### What is MicroDF? + +When you see poverty rates, Gini coefficients, or distributional charts in PolicyEngine, those are calculated using MicroDF. + +**MicroDF powers:** +- Poverty rate calculations (SPM) +- Inequality metrics (Gini coefficient) +- Income distribution analysis +- Weighted statistics from survey data + +### Understanding the Metrics + +**Gini coefficient:** +- Calculated using MicroDF from weighted income data +- Ranges from 0 (perfect equality) to 1 (perfect inequality) +- US typically around 0.48 + +**Poverty rates:** +- Calculated using MicroDF with weighted household data +- Compares income to poverty thresholds +- Accounts for household composition + +**Percentiles:** +- MicroDF calculates weighted percentiles +- Shows income distribution (10th, 50th, 90th percentile) + +## For Analysts ๐Ÿ“Š + +### Installation + +```bash +pip install microdf-python +``` + +### Quick Start + +```python +import microdf as mdf +import pandas as pd + +# Create sample data +df = pd.DataFrame({ + 'income': [10000, 20000, 30000, 40000, 50000], + 'weights': [1, 2, 3, 2, 1] +}) + +# Create MicroDataFrame +mdf_df = mdf.MicroDataFrame(df, weights='weights') + +# All operations are weight-aware +print(f"Weighted mean: ${mdf_df.income.mean():,.0f}") +print(f"Gini coefficient: {mdf_df.income.gini():.3f}") +``` + +### Common Operations + +**Weighted statistics:** +```python +mdf_df.income.mean() # Weighted mean +mdf_df.income.median() # Weighted median +mdf_df.income.sum() # Weighted sum +mdf_df.income.std() # Weighted standard deviation +``` + +**Inequality metrics:** +```python +mdf_df.income.gini() # Gini coefficient +mdf_df.income.top_x_pct_share(10) # Top 10% share +mdf_df.income.top_x_pct_share(1) # Top 1% share +``` + +**Poverty analysis:** +```python +# Poverty rate (income < threshold) +poverty_rate = mdf_df.poverty_rate( + income_measure='income', + threshold=poverty_line +) + +# Poverty gap (how far below threshold) +poverty_gap = mdf_df.poverty_gap( + income_measure='income', + threshold=poverty_line +) + +# Deep poverty (income < 50% of threshold) +deep_poverty_rate = mdf_df.deep_poverty_rate( + income_measure='income', + threshold=poverty_line, + deep_poverty_line=0.5 +) +``` + +**Quantiles:** +```python +# Deciles +mdf_df.income.decile_values() + +# Quintiles +mdf_df.income.quintile_values() + +# Custom quantiles +mdf_df.income.quantile(0.25) # 25th percentile +``` + +### MicroSeries + +```python +# Extract a Series with weights +income_series = mdf_df.income # This is a MicroSeries + +# MicroSeries operations +income_series.mean() +income_series.gini() +income_series.percentile(50) +``` + +### Working with PolicyEngine Results + +```python +import microdf as mdf +from policyengine_us import Simulation + +# Run simulation with axes (multiple households) +situation_with_axes = {...} # See policyengine-us-skill +sim = Simulation(situation=situation_with_axes) + +# Get results as arrays +incomes = sim.calculate("household_net_income", 2024) +weights = sim.calculate("household_weight", 2024) + +# Create MicroDataFrame +df = pd.DataFrame({'income': incomes, 'weight': weights}) +mdf_df = mdf.MicroDataFrame(df, weights='weight') + +# Calculate metrics +gini = mdf_df.income.gini() +poverty_rate = mdf_df.poverty_rate('income', threshold=15000) + +print(f"Gini: {gini:.3f}") +print(f"Poverty rate: {poverty_rate:.1%}") +``` + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/microdf + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/microdf +cd microdf +``` + +### Current Implementation + +**To see current API:** +```bash +# Main classes +cat microdf/microframe.py # MicroDataFrame +cat microdf/microseries.py # MicroSeries + +# Key modules +cat microdf/generic.py # Generic weighted operations +cat microdf/inequality.py # Gini, top shares +cat microdf/poverty.py # Poverty metrics +``` + +**To see all methods:** +```bash +# MicroDataFrame methods +grep "def " microdf/microframe.py + +# MicroSeries methods +grep "def " microdf/microseries.py +``` + +### Testing + +**To see test patterns:** +```bash +ls tests/ +cat tests/test_microframe.py +``` + +**Run tests:** +```bash +make test + +# Or +pytest tests/ -v +``` + +### Contributing + +**Before contributing:** +1. Check if method already exists +2. Ensure it's weighted correctly +3. Add tests +4. Follow policyengine-standards-skill + +**Common contributions:** +- New inequality metrics +- New poverty measures +- Performance optimizations +- Bug fixes + +## Advanced Patterns + +### Custom Aggregations + +```python +# Define custom weighted aggregation +def weighted_operation(series, weights): + return (series * weights).sum() / weights.sum() + +# Apply to MicroSeries +result = weighted_operation(mdf_df.income, mdf_df.weights) +``` + +### Groupby Operations + +```python +# Group by with weights +grouped = mdf_df.groupby('state') +state_means = grouped.income.mean() # Weighted means by state +``` + +### Inequality Decomposition + +**To see decomposition methods:** +```bash +grep -A 20 "def.*decomp" microdf/ +``` + +## Integration Examples + +### Example 1: PolicyEngine Blog Post Analysis + +```python +# Pattern from PolicyEngine blog posts +import microdf as mdf + +# Get simulation results +baseline_income = baseline_sim.calculate("household_net_income", 2024) +reform_income = reform_sim.calculate("household_net_income", 2024) +weights = baseline_sim.calculate("household_weight", 2024) + +# Create MicroDataFrame +df = pd.DataFrame({ + 'baseline_income': baseline_income, + 'reform_income': reform_income, + 'weight': weights +}) +mdf_df = mdf.MicroDataFrame(df, weights='weight') + +# Calculate impacts +baseline_gini = mdf_df.baseline_income.gini() +reform_gini = mdf_df.reform_income.gini() + +print(f"Gini change: {reform_gini - baseline_gini:+.4f}") +``` + +### Example 2: Poverty Analysis + +```python +# Calculate poverty under baseline and reform +from policyengine_us import Simulation + +baseline_sim = Simulation(situation=situation) +reform_sim = Simulation(situation=situation, reform=reform) + +# Get incomes +baseline_income = baseline_sim.calculate("spm_unit_net_income", 2024) +reform_income = reform_sim.calculate("spm_unit_net_income", 2024) +spm_threshold = baseline_sim.calculate("spm_unit_poverty_threshold", 2024) +weights = baseline_sim.calculate("spm_unit_weight", 2024) + +# Calculate poverty rates +df_baseline = mdf.MicroDataFrame( + pd.DataFrame({'income': baseline_income, 'threshold': spm_threshold, 'weight': weights}), + weights='weight' +) + +poverty_baseline = (df_baseline.income < df_baseline.threshold).mean() # Weighted + +# Similar for reform +print(f"Poverty reduction: {(poverty_baseline - poverty_reform):.1%}") +``` + +## Package Status + +**Maturity:** Stable, production-ready +**API stability:** Stable (rarely breaking changes) +**Performance:** Optimized for large datasets + +**To see version:** +```bash +pip show microdf-python +``` + +**To see changelog:** +```bash +cat CHANGELOG.md # In microdf repo +``` + +## Related Skills + +- **policyengine-us-skill** - Generating data for microdf analysis +- **policyengine-analysis-skill** - Using microdf in policy analysis +- **policyengine-us-data-skill** - Data sources for microdf + +## Resources + +**Repository:** https://github.com/PolicyEngine/microdf +**PyPI:** https://pypi.org/project/microdf-python/ +**Issues:** https://github.com/PolicyEngine/microdf/issues diff --git a/skills/data-science/microimpute-skill/SKILL.md b/skills/data-science/microimpute-skill/SKILL.md new file mode 100644 index 0000000..6a18b9d --- /dev/null +++ b/skills/data-science/microimpute-skill/SKILL.md @@ -0,0 +1,415 @@ +--- +name: microimpute +description: ML-based variable imputation for survey data - used in policyengine-us-data to fill missing values +--- + +# MicroImpute + +MicroImpute enables ML-based variable imputation through different statistical methods, with comparison and benchmarking capabilities. + +## For Users ๐Ÿ‘ฅ + +### What is MicroImpute? + +When PolicyEngine calculates population impacts, the underlying survey data has missing information. MicroImpute uses machine learning to fill in those gaps intelligently. + +**What imputation does:** +- Fills missing data in surveys +- Uses machine learning to predict missing values +- Maintains statistical relationships +- Improves PolicyEngine accuracy + +**Example:** +- Survey asks about income but not capital gains breakdown +- MicroImpute predicts short-term vs long-term capital gains +- Based on patterns from IRS data +- Result: More accurate tax calculations + +**You benefit from imputation when:** +- PolicyEngine calculates capital gains tax accurately +- Benefits eligibility uses complete household information +- State-specific calculations have all needed data + +## For Analysts ๐Ÿ“Š + +### Installation + +```bash +pip install microimpute + +# With image export (for plots) +pip install microimpute[images] +``` + +### What MicroImpute Does + +**Imputation problem:** +- Donor dataset has complete information (e.g., IRS tax records) +- Recipient dataset has missing variables (e.g., CPS survey) +- Imputation predicts missing values in recipient using donor patterns + +**Methods available:** +- Linear regression +- Random forest +- Quantile forest (preserves full distribution) +- XGBoost +- Hot deck (traditional matching) + +### Quick Example + +```python +from microimpute import Imputer +import pandas as pd + +# Donor data (complete) +donor = pd.DataFrame({ + 'income': [50000, 60000, 70000], + 'age': [30, 40, 50], + 'capital_gains': [5000, 8000, 12000] # Variable to impute +}) + +# Recipient data (missing capital_gains) +recipient = pd.DataFrame({ + 'income': [55000, 65000], + 'age': [35, 45], + # capital_gains is missing +}) + +# Impute using quantile forest +imputer = Imputer(method='quantile_forest') +imputer.fit( + donor=donor, + donor_target='capital_gains', + common_vars=['income', 'age'] +) + +recipient_imputed = imputer.predict(recipient) +# Now recipient has predicted capital_gains +``` + +### Method Comparison + +```python +from microimpute import compare_methods + +# Compare different imputation methods +results = compare_methods( + donor=donor, + recipient=recipient, + target_var='capital_gains', + common_vars=['income', 'age'], + methods=['linear', 'random_forest', 'quantile_forest'] +) + +# Shows quantile loss for each method +print(results) +``` + +### Quantile Loss (Quality Metric) + +**Why quantile loss:** +- Measures how well imputation preserves the distribution +- Not just mean accuracy, but full distribution shape +- Lower is better + +**Interpretation:** +```python +# Quantile loss around 0.1 = good +# Quantile loss around 0.5 = poor +# Compare across methods to choose best +``` + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/microimpute + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/microimpute +cd microimpute +``` + +### Current Implementation + +**To see structure:** +```bash +tree microimpute/ + +# Key modules: +ls microimpute/ +# - imputer.py - Main Imputer class +# - methods/ - Different imputation methods +# - comparison.py - Method benchmarking +# - utils/ - Utilities +``` + +**To see specific methods:** +```bash +# Quantile forest implementation +cat microimpute/methods/quantile_forest.py + +# Random forest +cat microimpute/methods/random_forest.py + +# Linear regression +cat microimpute/methods/linear.py +``` + +### Dependencies + +**Required:** +- numpy, pandas (data handling) +- scikit-learn (ML models) +- quantile-forest (distributional imputation) +- optuna (hyperparameter tuning) +- statsmodels (statistical methods) +- scipy (statistical functions) + +**To see all dependencies:** +```bash +cat pyproject.toml +``` + +### Adding New Imputation Methods + +**Pattern:** +```python +# microimpute/methods/my_method.py + +class MyMethodImputer: + def fit(self, X_train, y_train): + """Train on donor data.""" + # Fit your model + pass + + def predict(self, X_test): + """Impute on recipient data.""" + # Return predictions + pass + + def get_quantile_loss(self, X_val, y_val): + """Compute validation loss.""" + # Evaluate quality + pass +``` + +### Usage in policyengine-us-data + +**To see how data pipeline uses microimpute:** +```bash +cd ../policyengine-us-data + +# Find usage +grep -r "microimpute" policyengine_us_data/ +grep -r "Imputer" policyengine_us_data/ +``` + +**Typical workflow:** +1. Load CPS (has demographics, missing capital gains details) +2. Load IRS PUF (has complete tax data) +3. Use microimpute to predict missing CPS variables from PUF patterns +4. Validate imputation quality +5. Save enhanced dataset + +### Testing + +**Run tests:** +```bash +make test + +# Or +pytest tests/ -v --cov=microimpute +``` + +**To see test patterns:** +```bash +cat tests/test_imputer.py +cat tests/test_methods.py +``` + +## Common Patterns + +### Pattern 1: Basic Imputation + +```python +from microimpute import Imputer + +# Create imputer +imputer = Imputer(method='quantile_forest') + +# Fit on donor (complete data) +imputer.fit( + donor=donor_df, + donor_target='target_variable', + common_vars=['age', 'income', 'state'] +) + +# Predict on recipient (missing target_variable) +recipient_imputed = imputer.predict(recipient_df) +``` + +### Pattern 2: Choosing Best Method + +```python +from microimpute import compare_methods + +# Test multiple methods +methods = ['linear', 'random_forest', 'quantile_forest', 'xgboost'] + +results = compare_methods( + donor=donor, + recipient=recipient, + target_var='target', + common_vars=common_vars, + methods=methods +) + +# Use method with lowest quantile loss +best_method = results.sort_values('quantile_loss').iloc[0]['method'] +``` + +### Pattern 3: Multiple Variable Imputation + +```python +# Impute several variables +variables_to_impute = [ + 'short_term_capital_gains', + 'long_term_capital_gains', + 'qualified_dividends' +] + +for var in variables_to_impute: + imputer = Imputer(method='quantile_forest') + imputer.fit(donor=irs_puf, donor_target=var, common_vars=common_vars) + cps[var] = imputer.predict(cps) +``` + +## Advanced Features + +### Hyperparameter Tuning + +**Built-in Optuna integration:** +```python +from microimpute import tune_hyperparameters + +# Automatically find best hyperparameters +best_params, study = tune_hyperparameters( + donor=donor, + target_var='target', + common_vars=common_vars, + method='quantile_forest', + n_trials=100 +) + +# Use tuned parameters +imputer = Imputer(method='quantile_forest', **best_params) +``` + +### Cross-Validation + +**Validate imputation quality:** +```python +from sklearn.model_selection import cross_val_score + +# Split donor for validation +# Impute on validation set +# Measure accuracy +``` + +### Visualization + +**Plot imputation results:** +```python +import plotly.express as px + +# Compare imputed vs actual (on donor validation set) +fig = px.scatter( + x=actual_values, + y=imputed_values, + labels={'x': 'Actual', 'y': 'Imputed'} +) +fig.add_trace(px.line(x=[min, max], y=[min, max])) # 45-degree line +``` + +## Statistical Background + +**Imputation preserves:** +- Marginal distributions (imputed variable distribution matches donor) +- Conditional relationships (imputation depends on common variables) +- Uncertainty (quantile methods preserve full distribution) + +**Trade-offs:** +- **Linear:** Fast, but assumes linear relationships +- **Random forest:** Handles non-linearity, may overfit +- **Quantile forest:** Preserves full distribution, slower +- **XGBoost:** High accuracy, requires tuning + +## Integration with PolicyEngine + +**Full pipeline (policyengine-us-data):** +``` +1. Load CPS survey data + โ†“ +2. microimpute: Fill missing variables from IRS PUF + โ†“ +3. microcalibrate: Adjust weights to match benchmarks + โ†“ +4. Validation: Check against administrative totals + โ†“ +5. Package: Distribute enhanced dataset + โ†“ +6. PolicyEngine: Use for population simulations +``` + +## Comparison to Other Methods + +**MicroImpute vs traditional imputation:** + +**Traditional (mean imputation):** +- Fast but destroys distribution +- All missing values get same value +- Underestimates variance + +**MicroImpute (ML methods):** +- Preserves relationships +- Different predictions per record +- Maintains distribution shape + +**Quantile forest advantage:** +- Predicts full conditional distribution +- Not just point estimates +- Can sample from predicted distribution + +## Performance Tips + +**For large datasets:** +```python +# Use random forest (faster than quantile forest) +imputer = Imputer(method='random_forest') + +# Or subsample donor +donor_sample = donor.sample(n=10000, random_state=42) +imputer.fit(donor=donor_sample, ...) +``` + +**For high accuracy:** +```python +# Use quantile forest with tuning +best_params, _ = tune_hyperparameters(...) +imputer = Imputer(method='quantile_forest', **best_params) +``` + +## Related Skills + +- **l0-skill** - Regularization techniques +- **microcalibrate-skill** - Survey calibration (next step after imputation) +- **policyengine-us-data-skill** - Complete data pipeline +- **microdf-skill** - Working with imputed/calibrated data + +## Resources + +**Repository:** https://github.com/PolicyEngine/microimpute +**PyPI:** https://pypi.org/project/microimpute/ +**Documentation:** See README and docstrings in source diff --git a/skills/documentation/policyengine-design-skill/SKILL.md b/skills/documentation/policyengine-design-skill/SKILL.md new file mode 100644 index 0000000..d5526ca --- /dev/null +++ b/skills/documentation/policyengine-design-skill/SKILL.md @@ -0,0 +1,880 @@ +--- +name: policyengine-design +description: PolicyEngine visual identity - colors, fonts, logos, and branding for web apps, calculators, charts, and research +--- + +# PolicyEngine Design System + +PolicyEngine's visual identity and branding guidelines for creating consistent user experiences across web apps, calculators, charts, and research outputs. + +## For Users ๐Ÿ‘ฅ + +### PolicyEngine Visual Identity + +**Brand colors:** +- **Teal** (#39C6C0) - Primary accent color (buttons, highlights, interactive elements) +- **Blue** (#2C6496) - Secondary color (links, charts, headers) + +**Typography:** +- **Charts:** Roboto Serif +- **Web app:** System fonts (sans-serif) +- **Streamlit apps:** Default sans-serif + +**Logo:** +- Used in charts (bottom right) +- Blue version for light backgrounds +- White version for dark backgrounds + +### Recognizing PolicyEngine Content + +**You can identify PolicyEngine content by:** +- Teal accent color (#39C6C0) on buttons and interactive elements +- Blue (#2C6496) in charts and links +- Roboto Serif font in charts +- PolicyEngine logo in chart footer +- Clean, minimal white backgrounds +- Data-focused, quantitative presentation + +## For Analysts ๐Ÿ“Š + +### Chart Branding + +When creating charts for PolicyEngine analysis, follow these guidelines: + +#### Color Palette + +**Primary colors:** +```python +TEAL_ACCENT = "#39C6C0" # Primary color (teal) +BLUE_PRIMARY = "#2C6496" # Secondary color (blue) +DARK_GRAY = "#616161" # Text color +``` + +**Extended palette:** +```python +# Blues +BLUE = "#2C6496" +BLUE_LIGHT = "#D8E6F3" +BLUE_PRESSED = "#17354F" +BLUE_98 = "#F7FAFD" +DARK_BLUE_HOVER = "#1d3e5e" +DARKEST_BLUE = "#0C1A27" + +# Teals +TEAL_ACCENT = "#39C6C0" +TEAL_LIGHT = "#F7FDFC" +TEAL_PRESSED = "#227773" + +# Grays +DARK_GRAY = "#616161" +GRAY = "#808080" +MEDIUM_LIGHT_GRAY = "#BDBDBD" +MEDIUM_DARK_GRAY = "#D2D2D2" +LIGHT_GRAY = "#F2F2F2" + +# Accents +WHITE = "#FFFFFF" +BLACK = "#000000" +DARK_RED = "#b50d0d" # For negative values +``` + +**See current colors:** +```bash +cat policyengine-app/src/style/colors.js +``` + +#### Plotly Chart Formatting + +**Standard PolicyEngine chart:** + +```python +import plotly.graph_objects as go + +def format_fig(fig): + """Format chart with PolicyEngine branding.""" + fig.update_layout( + # Typography + font=dict( + family="Roboto Serif", + color="black" + ), + + # Background + plot_bgcolor="white", + template="plotly_white", + + # Margins (leave room for logo) + margin=dict( + l=50, + r=100, + t=50, + b=120, + pad=4 + ), + + # Chart size + height=600, + width=800, + ) + + # Add PolicyEngine logo (bottom right) + fig.add_layout_image( + dict( + source="https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png", + xref="paper", + yref="paper", + x=1.0, + y=-0.10, + sizex=0.10, + sizey=0.10, + xanchor="right", + yanchor="bottom" + ) + ) + + # Clean modebar + fig.update_layout( + modebar=dict( + bgcolor="rgba(0,0,0,0)", + color="rgba(0,0,0,0)" + ) + ) + + return fig + +# Usage +fig = go.Figure() +fig.add_trace(go.Scatter(x=x_data, y=y_data, line=dict(color=TEAL_ACCENT))) +fig = format_fig(fig) +``` + +**Current implementation:** +```bash +# See format_fig in action +cat givecalc/ui/visualization.py +cat policyengine-app/src/pages/policy/output/... +``` + +#### Chart Colors + +**For line charts:** +- Primary line: Teal (#39C6C0) or Blue (#2C6496) +- Background lines: Light gray (rgb(180, 180, 180)) +- Markers: Teal with 70% opacity + +**For bar charts:** +- Positive values: Teal (#39C6C0) +- Negative values: Dark red (#b50d0d) +- Neutral: Gray + +**For multiple series:** +Use variations of blue and teal, or discrete color scale: +```python +colors = ["#2C6496", "#39C6C0", "#17354F", "#227773"] +``` + +#### Typography + +**Charts:** +```python +font=dict(family="Roboto Serif", size=14, color="black") +``` + +**Axis labels:** +```python +xaxis=dict( + title=dict(text="Label", font=dict(size=14)), + tickfont=dict(size=12) +) +``` + +**Load Roboto font:** +```python +# In Streamlit apps +st.markdown(""" + + + +""", unsafe_allow_html=True) +``` + +### Streamlit App Branding + +**Streamlit configuration (.streamlit/config.toml):** + +```toml +[theme] +base = "light" +primaryColor = "#39C6C0" # Teal accent +backgroundColor = "#FFFFFF" # White background +secondaryBackgroundColor = "#F7FDFC" # Teal light +textColor = "#616161" # Dark gray + +[client] +toolbarMode = "minimal" +``` + +**Current implementation:** +```bash +cat givecalc/.streamlit/config.toml +cat salt-amt-calculator/.streamlit/config.toml # Other calculators +``` + +### Logo Usage + +**Logo URLs:** + +```python +# Blue logo (for light backgrounds) +LOGO_BLUE = "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png" + +# White logo (for dark backgrounds) +LOGO_WHITE = "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/white.png" + +# SVG versions (scalable) +LOGO_BLUE_SVG = "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.svg" +LOGO_WHITE_SVG = "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/white.svg" +``` + +**Logo placement in charts:** +- Bottom right corner +- 10% of chart width +- Slightly below bottom edge (y=-0.10) + +**Current logos:** +```bash +ls policyengine-app/src/images/logos/policyengine/ +``` + +### Complete Example: Branded Chart + +```python +import plotly.graph_objects as go + +# PolicyEngine colors +TEAL_ACCENT = "#39C6C0" +BLUE_PRIMARY = "#2C6496" + +# Create chart +fig = go.Figure() + +# Add data +fig.add_trace(go.Scatter( + x=incomes, + y=taxes, + mode='lines', + name='Tax liability', + line=dict(color=TEAL_ACCENT, width=3) +)) + +# Apply PolicyEngine branding +fig.update_layout( + # Typography + font=dict(family="Roboto Serif", size=14, color="black"), + + # Title and labels + title="Tax liability by income", + xaxis_title="Income", + yaxis_title="Tax ($)", + + # Formatting + xaxis_tickformat="$,.0f", + yaxis_tickformat="$,.0f", + + # Appearance + plot_bgcolor="white", + template="plotly_white", + + # Size and margins + height=600, + width=800, + margin=dict(l=50, r=100, t=50, b=120, pad=4) +) + +# Add logo +fig.add_layout_image( + dict( + source="https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png", + xref="paper", + yref="paper", + x=1.0, + y=-0.10, + sizex=0.10, + sizey=0.10, + xanchor="right", + yanchor="bottom" + ) +) + +# Show +fig.show() +``` + +## For Contributors ๐Ÿ’ป + +### Brand Assets + +**Repository:** PolicyEngine/policyengine-app-v2 (current), PolicyEngine/policyengine-app (legacy) + +**Logo files:** +```bash +# Logos in app (both v1 and v2 use same logos) +ls policyengine-app/src/images/logos/policyengine/ +# - blue.png - For light backgrounds +# - white.png - For dark backgrounds +# - blue.svg - Scalable blue logo +# - white.svg - Scalable white logo +# - banners/ - Banner variations +# - profile/ - Profile/avatar versions +``` + +**Access logos:** +```bash +# View logo files (v1 repo has the assets) +cd policyengine-app/src/images/logos/policyengine/ +ls -la +``` + +### Color Definitions + +**โš ๏ธ IMPORTANT: App V2 Transition** + +PolicyEngine is transitioning to policyengine-app-v2 with updated design tokens. Use app-v2 colors for new projects. + +**Current colors (policyengine-app-v2):** + +```typescript +// policyengine-app-v2/app/src/designTokens/colors.ts + +// Primary (teal) - 50 to 900 scale +primary[500]: "#319795" // Main teal +primary[400]: "#38B2AC" // Lighter teal +primary[600]: "#2C7A7B" // Darker teal + +// Blue scale +blue[700]: "#026AA2" // Primary blue +blue[500]: "#0EA5E9" // Lighter blue + +// Gray scale +gray[700]: "#344054" // Dark text +gray[100]: "#F2F4F7" // Light backgrounds + +// Semantic +success: "#22C55E" +warning: "#FEC601" +error: "#EF4444" + +// Background +background.primary: "#FFFFFF" +background.secondary: "#F5F9FF" + +// Text +text.primary: "#000000" +text.secondary: "#5A5A5A" +``` + +**To see current design tokens:** +```bash +cat policyengine-app-v2/app/src/designTokens/colors.ts +cat policyengine-app-v2/app/src/styles/colors.ts # Mantine integration +``` + +**Legacy colors (policyengine-app - still used in some projects):** + +```javascript +// policyengine-app/src/style/colors.js +TEAL_ACCENT = "#39C6C0" // Old teal (slightly different from v2) +BLUE = "#2C6496" // Old blue +DARK_GRAY = "#616161" // Old dark gray +``` + +**To see legacy colors:** +```bash +cat policyengine-app/src/style/colors.js +``` + +**Usage in React (app-v2):** +```typescript +import { colors } from 'designTokens'; + + + +// Links +Learn more + +// Text +

Description

+``` + +**Current colors:** +```bash +cat policyengine-app/src/style/colors.js +``` + +## Visual Guidelines + +### Chart Design Principles + +1. **Minimal decoration** - Let data speak +2. **White backgrounds** - Clean, print-friendly +3. **Clear axis labels** - Always include units +4. **Formatted numbers** - Currency ($), percentages (%), etc. +5. **Logo inclusion** - Bottom right, never intrusive +6. **Consistent sizing** - 800x600 standard +7. **Roboto Serif** - Professional, readable font + +### Color Usage Rules + +**Primary actions:** +- Use TEAL_ACCENT (#39C6C0) +- Buttons, highlights, current selection + +**Chart lines:** +- Primary data: TEAL_ACCENT or BLUE_PRIMARY +- Secondary data: BLUE_LIGHT or GRAY +- Negative values: DARK_RED (#b50d0d) + +**Backgrounds:** +- Main: WHITE (#FFFFFF) +- Secondary: TEAL_LIGHT (#F7FDFC) or BLUE_98 (#F7FAFD) +- Plot area: WHITE + +**Text:** +- Primary: BLACK (#000000) +- Secondary: DARK_GRAY (#616161) +- Muted: GRAY (#808080) + +### Accessibility + +**Color contrast requirements:** +- Text on background: 4.5:1 minimum (WCAG AA) +- DARK_GRAY on WHITE: โœ… Passes +- TEAL_ACCENT on WHITE: โœ… Passes for large text +- Use sufficient line weights for visibility + +**Don't rely on color alone:** +- Use patterns or labels for different data series +- Ensure charts work in grayscale + +## Common Branding Tasks + +### Task 1: Create Branded Plotly Chart + +1. **Define colors:** + ```python + TEAL_ACCENT = "#39C6C0" + BLUE_PRIMARY = "#2C6496" + ``` + +2. **Create chart:** + ```python + fig = go.Figure() + fig.add_trace(go.Scatter(x=x, y=y, line=dict(color=TEAL_ACCENT))) + ``` + +3. **Apply branding:** + ```python + fig = format_fig(fig) # See implementation above + ``` + +### Task 2: Setup Streamlit Branding + +1. **Create config directory:** + ```bash + mkdir .streamlit + ``` + +2. **Copy theme config:** + ```bash + cat givecalc/.streamlit/config.toml > .streamlit/config.toml + ``` + +3. **Verify in app:** + ```python + import streamlit as st + + st.button("Test", type="primary") # Should be teal + ``` + +### Task 3: Brand Consistency Check + +**Checklist:** +- [ ] Charts use Roboto Serif font +- [ ] Primary color is TEAL_ACCENT (#39C6C0) +- [ ] Secondary color is BLUE_PRIMARY (#2C6496) +- [ ] White backgrounds +- [ ] Logo in charts (bottom right) +- [ ] Currency formatted with $ and commas +- [ ] Percentages formatted with % +- [ ] Streamlit config.toml uses PolicyEngine theme + +## Reference Implementations + +### Excellent Examples + +**Streamlit calculators:** +```bash +# GiveCalc - Complete example +cat givecalc/ui/visualization.py +cat givecalc/.streamlit/config.toml + +# Other calculators +ls salt-amt-calculator/ +ls ctc-calculator/ +``` + +**Blog post charts:** +```bash +# Analysis with branded charts +cat policyengine-app/src/posts/articles/harris-eitc.md +cat policyengine-app/src/posts/articles/montana-tax-cuts-2026.md +``` + +**React app components:** +```bash +# Charts in app +cat policyengine-app/src/pages/policy/output/DistributionalImpact.jsx +``` + +### Don't Use These + +**โŒ Wrong colors:** +```python +# Don't use random colors +color = "#FF5733" +color = "red" +color = "green" +``` + +**โŒ Wrong fonts:** +```python +# Don't use other fonts for charts +font = dict(family="Arial") +font = dict(family="Times New Roman") +``` + +**โŒ Missing logo:** +```python +# Don't skip the logo in charts for publication +# All published charts should include PolicyEngine logo +``` + +## Assets and Resources + +### Logo Files + +**In policyengine-app repository:** +```bash +policyengine-app/src/images/logos/policyengine/ +โ”œโ”€โ”€ blue.png # Primary logo (light backgrounds) +โ”œโ”€โ”€ white.png # Logo for dark backgrounds +โ”œโ”€โ”€ blue.svg # Scalable blue logo +โ”œโ”€โ”€ white.svg # Scalable white logo +โ”œโ”€โ”€ banners/ # Banner variations +โ””โ”€โ”€ profile/ # Profile/avatar versions +``` + +**Raw URLs for direct use:** +```python +# Use these URLs in code +LOGO_URL = "https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png" +``` + +### Font Files + +**Roboto (charts):** +- Google Fonts: https://fonts.google.com/specimen/Roboto +- Family: Roboto Serif +- Weights: 300 (light), 400 (regular), 500 (medium), 700 (bold) + +**Loading:** +```html + +``` + +### Color Reference Files + +**JavaScript (React app):** +```bash +cat policyengine-app/src/style/colors.js +``` + +**Python (calculators, analysis):** +```python +# Define in constants.py or at top of file +TEAL_ACCENT = "#39C6C0" +BLUE_PRIMARY = "#2C6496" +DARK_GRAY = "#616161" +WHITE = "#FFFFFF" +``` + +## Brand Evolution + +**Current identity (2025):** +- Teal primary (#39C6C0) +- Blue secondary (#2C6496) +- Roboto Serif for charts +- Minimal, data-focused design + +**If brand evolves:** +- Colors defined in policyengine-app/src/style/colors.js are source of truth +- Update this skill to point to current definitions +- Never hardcode - always reference colors.js + +## Quick Reference + +### Color Codes + +| Color | Hex | Usage | +|-------|-----|-------| +| Teal Accent | #39C6C0 | Primary interactive elements | +| Blue Primary | #2C6496 | Secondary, links, charts | +| Dark Gray | #616161 | Body text | +| White | #FFFFFF | Backgrounds | +| Teal Light | #F7FDFC | Secondary backgrounds | +| Dark Red | #b50d0d | Negative values, errors | + +### Font Families + +| Context | Font | +|---------|------| +| Charts | Roboto Serif | +| Web app | System sans-serif | +| Streamlit | Default sans-serif | +| Code blocks | Monospace | + +### Logo URLs + +| Background | Format | URL | +|------------|--------|-----| +| Light | PNG | https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.png | +| Light | SVG | https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/blue.svg | +| Dark | PNG | https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/white.png | +| Dark | SVG | https://raw.githubusercontent.com/PolicyEngine/policyengine-app/master/src/images/logos/policyengine/white.svg | + +## Related Skills + +- **policyengine-app-skill** - React component styling +- **policyengine-analysis-skill** - Chart creation patterns +- **policyengine-writing-skill** - Content style (complements visual style) + +## Resources + +**Brand assets:** PolicyEngine/policyengine-app/src/images/ +**Color definitions:** PolicyEngine/policyengine-app/src/style/colors.js +**Examples:** givecalc, salt-amt-calculator, crfb-tob-impacts diff --git a/skills/documentation/policyengine-standards-skill/SKILL.md b/skills/documentation/policyengine-standards-skill/SKILL.md new file mode 100644 index 0000000..2266849 --- /dev/null +++ b/skills/documentation/policyengine-standards-skill/SKILL.md @@ -0,0 +1,768 @@ +--- +name: policyengine-standards +description: PolicyEngine coding standards, formatters, CI requirements, and development best practices +--- + +# PolicyEngine Standards Skill + +Use this skill to ensure code meets PolicyEngine's development standards and passes CI checks. + +## When to Use This Skill + +- Before committing code to any PolicyEngine repository +- When CI checks fail with linting/formatting errors +- Setting up a new PolicyEngine repository +- Reviewing PRs for standard compliance +- When AI tools generate code that needs standardization + +## Critical Requirements + +### Python Version +โš ๏ธ **MUST USE Python 3.13** - Do NOT downgrade to older versions +- Check version: `python --version` +- Use `pyproject.toml` to specify version requirements + +### Command Execution +โš ๏ธ **ALWAYS use `uv run` for Python commands** - Never use bare `python` or `pytest` +- โœ… Correct: `uv run python script.py`, `uv run pytest tests/` +- โŒ Wrong: `python script.py`, `pytest tests/` +- This ensures correct virtual environment and dependencies + +### Documentation (Python Projects) +โš ๏ธ **MUST USE Jupyter Book 2.0 (MyST-NB)** - NOT Jupyter Book 1.x +- Build docs: `myst build docs` (NOT `jb build`) +- Use MyST markdown syntax + +## Before Committing - Checklist + +1. **Write tests first** (TDD - see below) +2. **Format code**: `make format` or language-specific formatter +3. **Run tests**: `make test` to ensure all tests pass +4. **Check linting**: Ensure no linting errors +5. **Use config files**: Prefer config files over environment variables +6. **Reference issues**: Include "Fixes #123" in commit message + +## Creating Pull Requests + +### The CI Waiting Problem + +**Common failure pattern:** +``` +User: "Create a PR and mark it ready when CI passes" +Claude: "I've created the PR as draft. CI will take a while, I'll check back later..." +[Chat ends - Claude never checks back] +Result: PR stays in draft, user has to manually check CI and mark ready +``` + +### Solution: Use /create-pr Command + +**When creating PRs, use the /create-pr command:** + +```bash +/create-pr +``` + +**This command:** +- โœ… Creates PR as draft +- โœ… Actually waits for CI (polls every 15 seconds) +- โœ… Marks ready when CI passes +- โœ… Reports failures with details +- โœ… Handles timeouts gracefully + +**Why this works:** +The command contains explicit polling logic that Claude executes, so it actually waits instead of giving up. + +### If /create-pr is Not Available + +**If the command isn't installed, implement the pattern directly:** + +```bash +# 1. Create PR as draft +gh pr create --draft --title "Title" --body "Body" +PR_NUMBER=$(gh pr view --json number --jq '.number') + +# 2. Wait for CI (ACTUALLY WAIT - don't give up!) +POLL_INTERVAL=15 +ELAPSED=0 + +while true; do # No timeout - wait as long as needed + CHECKS=$(gh pr checks $PR_NUMBER --json status,conclusion) + TOTAL=$(echo "$CHECKS" | jq '. | length') + COMPLETED=$(echo "$CHECKS" | jq '[.[] | select(.status == "COMPLETED")] | length') + + echo "[$ELAPSED s] CI: $COMPLETED/$TOTAL completed" + + if [ "$COMPLETED" -eq "$TOTAL" ] && [ "$TOTAL" -gt 0 ]; then + FAILED=$(echo "$CHECKS" | jq '[.[] | select(.conclusion == "FAILURE")] | length') + if [ "$FAILED" -eq 0 ]; then + echo "โœ… All CI passed! Marking ready..." + gh pr ready $PR_NUMBER + break + else + echo "โŒ CI failed. PR remains draft." + gh pr checks $PR_NUMBER + break + fi + fi + + sleep $POLL_INTERVAL + ELAPSED=$((ELAPSED + POLL_INTERVAL)) +done + +# Important: No timeout! Population simulations can take 30+ minutes. +``` + +### DO NOT Say "I'll Check Back Later" + +**โŒ WRONG:** +``` +"I've created the PR as draft. CI checks will take a few minutes. +I'll check back later once they complete." +``` + +**Why wrong:** You cannot check back later. The chat session ends. + +**โœ… CORRECT:** +``` +"I've created the PR as draft. Now polling CI status every 15 seconds..." +[Actually polls using while loop] +"CI checks completed. All passed! Marking PR as ready for review." +``` + +### When to Create Draft vs Ready + +**Always create as draft when:** +- CI checks are configured +- User asks to wait for CI +- Making automated changes +- Unsure if CI will pass + +**Create as ready only when:** +- User explicitly requests ready PR +- No CI configured +- CI already verified locally + +### PR Workflow Standards + +**Standard flow:** +```bash +# 1. Ensure branch is pushed +git push -u origin feature-branch + +# 2. Create PR as draft +gh pr create --draft --title "..." --body "..." + +# 3. Wait for CI (use polling loop - see pattern above) + +# 4. If CI passes: +gh pr ready $PR_NUMBER + +# 5. If CI fails: +echo "CI failed. PR remains draft. Fix issues and push again." +``` + +## Test-Driven Development (TDD) + +PolicyEngine follows Test-Driven Development practices across all repositories. + +### TDD Workflow + +**1. Write test first (RED):** +```python +# tests/test_new_feature.py +def test_california_eitc_calculation(): + """Test California EITC for family with 2 children earning $30,000.""" + situation = create_family(income=30000, num_children=2, state="CA") + sim = Simulation(situation=situation) + ca_eitc = sim.calculate("ca_eitc", 2024)[0] + + # Test fails initially (feature not implemented yet) + assert ca_eitc == 3000, "CA EITC should be $3,000 for this household" +``` + +**2. Implement feature (GREEN):** +```python +# policyengine_us/variables/gov/states/ca/tax/income/credits/ca_eitc.py +class ca_eitc(Variable): + value_type = float + entity = TaxUnit + definition_period = YEAR + + def formula(tax_unit, period, parameters): + # Implementation to make test pass + federal_eitc = tax_unit("eitc", period) + return federal_eitc * parameters(period).gov.states.ca.tax.eitc.match +``` + +**3. Refactor (REFACTOR):** +```python +# Clean up, optimize, add documentation +# All while tests continue to pass +``` + +### TDD Benefits + +**Why PolicyEngine uses TDD:** +- โœ… **Accuracy** - Tests verify implementation matches regulations +- โœ… **Documentation** - Tests show expected behavior +- โœ… **Regression prevention** - Changes don't break existing features +- โœ… **Confidence** - Safe to refactor +- โœ… **Isolation** - Multi-agent workflow (test-creator and rules-engineer work separately) + +### TDD in Multi-Agent Workflow + +**Country model development:** +1. **@document-collector** gathers regulations +2. **@test-creator** writes tests from regulations (isolated, no implementation access) +3. **@rules-engineer** implements from regulations (isolated, no test access) +4. Both work from same source โ†’ tests verify implementation accuracy + +**See policyengine-core-skill and country-models agents for details.** + +### Test Examples + +**Python (pytest):** +```python +def test_ctc_for_two_children(): + """Test CTC calculation for married couple with 2 children.""" + situation = create_married_couple( + income_1=75000, + income_2=50000, + num_children=2, + child_ages=[5, 8] + ) + + sim = Simulation(situation=situation) + ctc = sim.calculate("ctc", 2024)[0] + + assert ctc == 4000, "CTC should be $2,000 per child" +``` + +**React (Jest + RTL):** +```javascript +import { render, screen } from '@testing-library/react'; +import TaxCalculator from './TaxCalculator'; + +test('displays calculated tax', () => { + render(); + + // Test what user sees, not implementation + expect(screen.getByText(/\$5,000/)).toBeInTheDocument(); +}); +``` + +### Test Organization + +**Python:** +``` +tests/ +โ”œโ”€โ”€ test_variables/ +โ”‚ โ”œโ”€โ”€ test_income.py +โ”‚ โ”œโ”€โ”€ test_deductions.py +โ”‚ โ””โ”€โ”€ test_credits.py +โ”œโ”€โ”€ test_parameters/ +โ””โ”€โ”€ test_simulations/ +``` + +**React:** +``` +src/ +โ”œโ”€โ”€ components/ +โ”‚ โ””โ”€โ”€ TaxCalculator/ +โ”‚ โ”œโ”€โ”€ TaxCalculator.jsx +โ”‚ โ””โ”€โ”€ TaxCalculator.test.jsx +``` + +### Running Tests + +**Python:** +```bash +# All tests +make test + +# With uv +uv run pytest tests/ -v + +# Specific test +uv run pytest tests/test_credits.py::test_ctc_for_two_children -v + +# With coverage +uv run pytest tests/ --cov=policyengine_us --cov-report=html +``` + +**React:** +```bash +# All tests +make test + +# Watch mode +npm test -- --watch + +# Specific test +npm test -- TaxCalculator.test.jsx + +# Coverage +npm test -- --coverage +``` + +### Test Quality Standards + +**Good tests:** +- โœ… Test behavior, not implementation +- โœ… Clear, descriptive names +- โœ… Single assertion per test (when possible) +- โœ… Include documentation (docstrings) +- โœ… Based on official regulations with citations + +**Bad tests:** +- โŒ Testing private methods +- โŒ Mocking everything +- โŒ No assertion messages +- โŒ Magic numbers without explanation + +### Example: TDD for New Feature + +```python +# Step 1: Write test (RED) +def test_new_york_empire_state_child_credit(): + """Test NY Empire State Child Credit for family with 1 child. + + Based on NY Tax Law Section 606(c-1). + Family earning $50,000 with 1 child under 4 should receive $330. + """ + situation = create_family( + income=50000, + num_children=1, + child_ages=[2], + state="NY" + ) + + sim = Simulation(situation=situation) + credit = sim.calculate("ny_empire_state_child_credit", 2024)[0] + + assert credit == 330, "Should receive $330 for child under 4" + +# Test fails - feature doesn't exist yet + +# Step 2: Implement (GREEN) +# Create variable in policyengine_us/variables/gov/states/ny/... +# Test passes + +# Step 3: Refactor +# Optimize, add documentation, maintain passing tests +``` + +## Python Standards + +### Formatting +- **Formatter**: Black with 79-character line length +- **Command**: `make format` or `black . -l 79` +- **Check without changes**: `black . -l 79 --check` + +```bash +# Format all Python files +make format + +# Check if formatting is needed (CI-style) +black . -l 79 --check +``` + +### Code Style +```python +# Imports: Grouped and alphabetized +import os +import sys +from pathlib import Path # stdlib + +import numpy as np +import pandas as pd # third-party + +from policyengine_us import Simulation # local + +# Naming conventions +class TaxCalculator: # CamelCase for classes + pass + +def calculate_income_tax(income): # snake_case for functions + annual_income = income * 12 # snake_case for variables + return annual_income + +# Type hints (recommended) +def calculate_tax(income: float, state: str) -> float: + """Calculate state income tax. + + Args: + income: Annual income in dollars + state: Two-letter state code + + Returns: + Tax liability in dollars + """ + pass + +# Error handling - catch specific exceptions +try: + result = simulation.calculate("income_tax", 2024) +except KeyError as e: + raise ValueError(f"Invalid variable name: {e}") +``` + +### Testing +```python +import pytest + +def test_ctc_calculation(): + """Test Child Tax Credit calculation for family with 2 children.""" + situation = create_family(income=50000, num_children=2) + sim = Simulation(situation=situation) + ctc = sim.calculate("ctc", 2024)[0] + + assert ctc == 4000, "CTC should be $2000 per child" +``` + +**Run tests:** +```bash +# All tests +make test + +# Or with uv +uv run pytest tests/ -v + +# Specific test +uv run pytest tests/test_tax.py::test_ctc_calculation -v + +# With coverage +uv run pytest tests/ --cov=policyengine_us --cov-report=html +``` + +## JavaScript/React Standards + +### Formatting +- **Formatters**: Prettier + ESLint +- **Command**: `npm run lint -- --fix && npx prettier --write .` +- **CI Check**: `npm run lint -- --max-warnings=0` + +```bash +# Format all files +make format + +# Or manually +npm run lint -- --fix +npx prettier --write . + +# Check if formatting is needed (CI-style) +npm run lint -- --max-warnings=0 +``` + +### Code Style +```javascript +// Use functional components only (no class components) +import { useState, useEffect } from "react"; + +function TaxCalculator({ income, state }) { + const [tax, setTax] = useState(0); + + useEffect(() => { + // Calculate tax when inputs change + calculateTax(income, state).then(setTax); + }, [income, state]); + + return ( +
+

Tax: ${tax.toLocaleString()}

+
+ ); +} + +// File naming +// - Components: PascalCase.jsx (TaxCalculator.jsx) +// - Utilities: camelCase.js (formatCurrency.js) + +// Environment config - use config file pattern +// src/config/environment.js +const config = { + API_URL: process.env.NODE_ENV === 'production' + ? 'https://api.policyengine.org' + : 'http://localhost:5000' +}; +export default config; +``` + +### React Component Size +- Keep components under 150 lines after formatting +- Extract complex logic into custom hooks +- Split large components into smaller ones + +## Version Control Standards + +### Changelog Management + +**CRITICAL**: For PRs, ONLY modify `changelog_entry.yaml`. NEVER manually update `CHANGELOG.md` or `changelog.yaml`. + +**Correct Workflow:** +1. Create `changelog_entry.yaml` at repository root: + ```yaml + - bump: patch # or minor, major + changes: + added: + - Description of new feature + fixed: + - Description of bug fix + changed: + - Description of change + ``` + +2. Commit ONLY `changelog_entry.yaml` with your code changes + +3. GitHub Actions automatically updates `CHANGELOG.md` and `changelog.yaml` on merge + +**DO NOT:** +- โŒ Run `make changelog` manually during PR creation +- โŒ Commit `CHANGELOG.md` or `changelog.yaml` in your PR +- โŒ Modify main changelog files directly + +### Git Workflow + +1. **Create branches on PolicyEngine repos, NOT forks** + - Forks cause CI failures due to missing secrets + - Request write access if needed + +2. **Branch naming**: `feature-name` or `fix-issue-123` + +3. **Commit messages**: + ``` + Add CTC reform analysis for CRFB report + + - Implement household-level calculations + - Add state-by-state comparison + - Create visualizations + + Fixes #123 + ``` + +4. **PR description**: Include "Fixes #123" to auto-close issues + +### Common Git Pitfalls + +**Never do these:** +- โŒ Force push to main/master +- โŒ Commit secrets or `.env` files +- โŒ Skip hooks with `--no-verify` +- โŒ Create versioned files (app_v2.py, component_new.jsx) + +**Always do:** +- โœ… Fix original files in place +- โœ… Run formatters before pushing +- โœ… Reference issue numbers in commits +- โœ… Watch CI after filing PR + +## Common AI Pitfalls + +Since many PRs are AI-generated, watch for these common mistakes: + +### 1. File Versioning +**โŒ Wrong:** +```bash +# Creating new versions instead of fixing originals +app_new.py +app_v2.py +component_refactored.jsx +``` + +**โœ… Correct:** +```bash +# Always modify the original file +app.py # Fixed in place +``` + +### 2. Formatter Not Run +**โŒ Wrong:** Committing without formatting (main cause of CI failures) + +**โœ… Correct:** +```bash +# Python +make format +black . -l 79 + +# React +npm run lint -- --fix +npx prettier --write . +``` + +### 3. Environment Variables +**โŒ Wrong:** +```javascript +// React env vars without REACT_APP_ prefix +const API_URL = process.env.API_URL; // Won't work! +``` + +**โœ… Correct:** +```javascript +// Use config file pattern instead +import config from './config/environment'; +const API_URL = config.API_URL; +``` + +### 4. Using Wrong Python Version +**โŒ Wrong:** Downgrading to Python 3.10 or older + +**โœ… Correct:** Use Python 3.13 as specified in project requirements + +### 5. Manual Changelog Updates +**โŒ Wrong:** Running `make changelog` and committing `CHANGELOG.md` + +**โœ… Correct:** Only create `changelog_entry.yaml` in PR + +## Repository Setup Patterns + +### Python Package Structure +``` +policyengine-package/ +โ”œโ”€โ”€ policyengine_package/ +โ”‚ โ”œโ”€โ”€ __init__.py +โ”‚ โ”œโ”€โ”€ core/ +โ”‚ โ”œโ”€โ”€ calculations/ +โ”‚ โ””โ”€โ”€ utils/ +โ”œโ”€โ”€ tests/ +โ”‚ โ”œโ”€โ”€ test_calculations.py +โ”‚ โ””โ”€โ”€ test_core.py +โ”œโ”€โ”€ pyproject.toml +โ”œโ”€โ”€ Makefile +โ”œโ”€โ”€ CLAUDE.md +โ”œโ”€โ”€ CHANGELOG.md +โ””โ”€โ”€ README.md +``` + +### React App Structure +``` +policyengine-app/ +โ”œโ”€โ”€ src/ +โ”‚ โ”œโ”€โ”€ components/ +โ”‚ โ”œโ”€โ”€ pages/ +โ”‚ โ”œโ”€โ”€ config/ +โ”‚ โ”‚ โ””โ”€โ”€ environment.js +โ”‚ โ””โ”€โ”€ App.jsx +โ”œโ”€โ”€ public/ +โ”œโ”€โ”€ package.json +โ”œโ”€โ”€ .eslintrc.json +โ”œโ”€โ”€ .prettierrc +โ””โ”€โ”€ README.md +``` + +## Makefile Commands + +Standard commands across PolicyEngine repos: + +```bash +make install # Install dependencies +make test # Run tests +make format # Format code +make changelog # Update changelog (automation only, not manual) +make debug # Start dev server (apps) +make build # Production build (apps) +``` + +## CI Stability + +### Common CI Issues + +**1. Fork PRs Fail** +- **Problem**: PRs from forks don't have access to repository secrets +- **Solution**: Create branches directly on PolicyEngine repos + +**2. GitHub API Rate Limits** +- **Problem**: Smoke tests fail with 403 errors +- **Solution**: Re-run failed jobs (different runners have different limits) + +**3. Linting Failures** +- **Problem**: Code not formatted before commit +- **Solution**: Always run `make format` before committing + +**4. Test Failures in CI but Pass Locally** +- **Problem**: Missing `uv run` prefix +- **Solution**: Use `uv run pytest` instead of `pytest` + +## Best Practices Checklist + +### Code Quality +- [ ] Code formatted with Black (Python) or Prettier (JS) +- [ ] No linting errors +- [ ] All tests pass +- [ ] Type hints added (Python, where applicable) +- [ ] Docstrings for public functions/classes +- [ ] Error handling with specific exceptions + +### Version Control +- [ ] Only `changelog_entry.yaml` created (not CHANGELOG.md) +- [ ] Commit message references issue number +- [ ] Branch created on PolicyEngine repo (not fork) +- [ ] No secrets or .env files committed +- [ ] Original files modified (no _v2 or _new files) + +### Testing +- [ ] Tests written for new functionality +- [ ] Tests pass locally with `make test` +- [ ] Coverage maintained or improved +- [ ] Edge cases handled + +### Documentation +- [ ] README updated if needed +- [ ] Code comments for complex logic +- [ ] API documentation updated if needed +- [ ] Examples provided for new features + +## Quick Reference + +### Format Commands by Language + +**Python:** +```bash +make format # Format code +black . -l 79 --check # Check formatting +uv run pytest tests/ -v # Run tests +``` + +**React:** +```bash +make format # Format code +npm run lint -- --max-warnings=0 # Check linting +npm test # Run tests +``` + +### Pre-Commit Checklist +```bash +# 1. Format +make format + +# 2. Test +make test + +# 3. Check linting +# Python: black . -l 79 --check +# React: npm run lint -- --max-warnings=0 + +# 4. Stage and commit +git add . +git commit -m "Description + +Fixes #123" + +# 5. Push and watch CI +git push +``` + +## Resources + +- **Main CLAUDE.md**: `/PolicyEngine/CLAUDE.md` +- **Python Style**: PEP 8, Black documentation +- **React Style**: Airbnb React/JSX Style Guide +- **Testing**: pytest documentation, Jest/RTL documentation +- **Writing Style**: See policyengine-writing-skill for blog posts, PR descriptions, and documentation + +## Examples + +See PolicyEngine repositories for examples of standard-compliant code: +- **policyengine-us**: Python package standards +- **policyengine-app**: React app standards +- **givecalc**: Streamlit app standards +- **crfb-tob-impacts**: Analysis repository standards diff --git a/skills/documentation/policyengine-user-guide-skill/SKILL.md b/skills/documentation/policyengine-user-guide-skill/SKILL.md new file mode 100644 index 0000000..1e7354e --- /dev/null +++ b/skills/documentation/policyengine-user-guide-skill/SKILL.md @@ -0,0 +1,295 @@ +--- +name: policyengine-user-guide +description: Using PolicyEngine web apps to analyze tax and benefit policy impacts - for users of policyengine.org +--- + +# PolicyEngine User Guide + +This skill helps you use PolicyEngine to analyze how tax and benefit policies affect households and populations. + +## For Users: Getting Started + +### What is PolicyEngine? + +PolicyEngine computes the impact of public policy on households and society. You can: +- Calculate how policies affect your household +- Analyze population-wide impacts of reforms +- Create and share custom policy proposals +- Compare different policy options + +### Web App: policyengine.org + +**Main features:** +1. **Your household** - Calculate your taxes and benefits +2. **Policy** - Design custom reforms and see impacts +3. **Research** - Read policy analysis and blog posts + +### Available Countries + +- **United States** - policyengine.org/us +- **United Kingdom** - policyengine.org/uk +- **Canada** - policyengine.org/ca (beta) + +## Using the Household Calculator + +### Step 1: Navigate to Household Page + +**US:** https://policyengine.org/us/household +**UK:** https://policyengine.org/uk/household + +### Step 2: Enter Your Information + +**Income:** +- Employment income (W-2 wages) +- Self-employment income +- Capital gains and dividends +- Social Security, pensions, etc. + +**Household composition:** +- Adults and dependents +- Ages +- Marital status + +**Location:** +- State (US) or region (UK) +- NYC checkbox for New York City residents + +**Deductions (US):** +- Charitable donations +- Mortgage interest +- State and local taxes (SALT) +- Medical expenses + +### Step 3: View Results + +**Net income** - Your income after taxes and benefits + +**Breakdown:** +- Total taxes (federal + state + local) +- Total benefits (EITC, CTC, SNAP, etc.) +- Effective tax rate +- Marginal tax rate + +**Charts:** +- Net income by earnings +- Marginal tax rate by earnings + +## Creating a Policy Reform + +### Step 1: Navigate to Policy Page + +**US:** https://policyengine.org/us/policy +**UK:** https://policyengine.org/uk/policy + +### Step 2: Select Parameters to Change + +**Browse parameters by:** +- Government department (IRS, SSA, etc.) +- Program (EITC, CTC, SNAP) +- Type (tax rates, benefit amounts, thresholds) + +**Example: Increase Child Tax Credit** +1. Navigate to gov.irs.credits.ctc.amount.base_amount +2. Change from $2,000 to $5,000 +3. Click "Calculate economic impact" + +### Step 3: View Population Impacts + +**Budgetary impact:** +- Total cost or revenue raised +- Breakdown by program + +**Poverty impact:** +- Change in poverty rates +- By age group (children, adults, seniors) +- Deep poverty (income < 50% of threshold) + +**Distributional impact:** +- Average impact by income decile +- Winners and losers by decile +- Relative vs absolute changes + +**Inequality impact:** +- Gini index change +- Top 10% and top 1% income share + +### Step 4: Share Your Reform + +**Share URL:** +Every reform has a unique URL you can share: +``` +policyengine.org/us/policy?reform=12345®ion=enhanced_us&timePeriod=2025 +``` + +**Parameters in URL:** +- `reform=12345` - Your custom reform ID +- `region=enhanced_us` - Geography (US, state, or congressional district) +- `timePeriod=2025` - Year of analysis + +## Understanding Results + +### Metrics Explained + +**Supplemental Poverty Measure (SPM):** +- Accounts for taxes, benefits, and living costs +- US Census Bureau's official alternative poverty measure +- More comprehensive than Official Poverty Measure + +**Gini coefficient:** +- Measures income inequality (0 = perfect equality, 1 = perfect inequality) +- US Gini is typically around 0.48 +- Lower values = more equal income distribution + +**Income deciles:** +- Population divided into 10 equal groups by income +- Decile 1 = bottom 10% of earners +- Decile 10 = top 10% of earners + +**Winners and losers:** +- Winners: Net income increases by 5% or more +- Losers: Net income decreases by 5% or more +- Neutral: Net income change less than 5% + +### Reading Charts + +**Household impact charts:** +- X-axis: Usually income or earnings +- Y-axis: Net income, taxes, or benefits +- Hover to see exact values + +**Population impact charts:** +- Bar charts: Compare across groups (deciles, states) +- Line charts: Show relationships (income vs impact) +- Waterfall charts: Show components of budgetary impact + +## Common Use Cases + +### Use Case 1: How Does Policy X Affect My Household? + +1. Go to household calculator +2. Enter your information +3. Select "Reform" and choose the policy +4. Compare baseline vs reform results + +### Use Case 2: How Much Would Policy X Cost? + +1. Go to policy page +2. Create or select the reform +3. View "Budgetary impact" section +4. See total cost and breakdown + +### Use Case 3: Would Policy X Reduce Poverty? + +1. Go to policy page +2. Create or select the reform +3. View "Poverty impact" section +4. See change in poverty rate by age group + +### Use Case 4: Who Benefits from Policy X? + +1. Go to policy page +2. Create or select the reform +3. View "Distributional impact" section +4. See winners and losers by income decile + +### Use Case 5: Compare Two Policy Proposals + +1. Create Reform A (e.g., expand EITC) +2. Note the URL or reform ID +3. Create Reform B (e.g., expand CTC) +4. Compare budgetary, poverty, and distributional impacts + +## For Analysts: Moving Beyond the Web App + +Once you understand the web app, you can: + +**Use the Python client:** +- See `policyengine-python-client-skill` for programmatic access +- See `policyengine-us-skill` for detailed simulation patterns + +**Create custom analyses:** +- See `policyengine-analysis-skill` for analysis patterns +- See `microdf-skill` for data analysis utilities + +**Access the API directly:** +- See `policyengine-api-skill` for API documentation +- REST endpoints for integration + +## For Contributors: Building PolicyEngine + +To contribute to PolicyEngine development: + +**Understanding the stack:** +- See `policyengine-core-skill` for engine architecture +- See `policyengine-us-skill` for country model patterns +- See `policyengine-api-skill` for API development +- See `policyengine-app-skill` for app development + +**Development standards:** +- See `policyengine-standards-skill` for code quality requirements +- See `policyengine-writing-skill` for documentation style + +## Frequently Asked Questions + +### How accurate is PolicyEngine? + +PolicyEngine uses official tax and benefit rules from legislation and regulations. Calculations match official calculators (IRS, SSA, etc.) for individual households. + +Population-level estimates use microsimulation with survey data (Current Population Survey for US, Family Resources Survey for UK). + +### Can I use PolicyEngine for my taxes? + +PolicyEngine is for policy analysis, not tax filing. Results are estimates based on the information you provide. For filing taxes, use IRS.gov or professional tax software. + +### How is PolicyEngine funded? + +PolicyEngine is a nonprofit funded by grants and donations. The platform is free to use. + +### Can I export results? + +Yes! Charts can be downloaded as PNG or HTML. You can also share reform URLs with others. + +### What programs does PolicyEngine model? + +**US (federal):** +- Income tax, payroll tax, capital gains tax +- EITC, CTC, ACTC +- SNAP, WIC, ACA premium tax credits +- Social Security, SSI, TANF +- State income taxes (varies by state) + +**UK:** +- Income tax, National Insurance +- Universal Credit, Child Benefit +- State Pension, Pension Credit +- Council Tax, Council Tax Support + +For complete lists, see: +- US: https://policyengine.org/us/parameters +- UK: https://policyengine.org/uk/parameters + +### How do I report a bug? + +**If you find incorrect calculations:** +1. Go to the household calculator +2. Note your inputs and the incorrect result +3. File an issue: https://github.com/PolicyEngine/policyengine-us/issues (or appropriate country repo) +4. Include the household URL + +**If you find app bugs:** +1. Note what you were doing +2. File an issue: https://github.com/PolicyEngine/policyengine-app/issues + +## Resources + +- **Website:** https://policyengine.org +- **Documentation:** https://policyengine.org/us/docs +- **Blog:** https://policyengine.org/us/research +- **GitHub:** https://github.com/PolicyEngine +- **Contact:** hello@policyengine.org + +## Related Skills + +- **policyengine-python-client-skill** - Using PolicyEngine programmatically +- **policyengine-us-skill** - Understanding US tax/benefit calculations +- **policyengine-analysis-skill** - Creating custom policy analyses diff --git a/skills/documentation/policyengine-writing-skill/SKILL.md b/skills/documentation/policyengine-writing-skill/SKILL.md new file mode 100644 index 0000000..9f3ed06 --- /dev/null +++ b/skills/documentation/policyengine-writing-skill/SKILL.md @@ -0,0 +1,526 @@ +--- +name: policyengine-writing +description: PolicyEngine writing style for blog posts, documentation, PR descriptions, and research reports - emphasizing active voice, quantitative precision, and neutral tone +--- + +# PolicyEngine Writing Skill + +Use this skill when writing blog posts, documentation, PR descriptions, research reports, or any public-facing PolicyEngine content. + +## When to Use This Skill + +- Writing blog posts about policy analysis +- Creating PR descriptions +- Drafting documentation +- Writing research reports +- Composing social media posts +- Creating newsletters +- Writing README files + +## Core Principles + +PolicyEngine's writing emphasizes clarity, precision, and objectivity. + +1. **Active voice** - Prefer active constructions over passive +2. **Direct and quantitative** - Use specific numbers, avoid vague adjectives/adverbs +3. **Sentence case** - Use sentence case for headings, not title case +4. **Neutral tone** - Describe what policies do, not whether they're good or bad +5. **Precise language** - Choose exact verbs over vague modifiers + +## Active Voice + +Active voice makes writing clearer and more direct. + +**โœ… Correct (Active):** +``` +Harris proposes expanding the Earned Income Tax Credit +The reform reduces poverty by 3.2% +PolicyEngine projects higher costs than other organizations +We estimate the ten-year costs +The bill lowers the state's top income tax rate +Montana raises the EITC from 10% to 20% +``` + +**โŒ Wrong (Passive):** +``` +The Earned Income Tax Credit is proposed to be expanded by Harris +Poverty is reduced by 3.2% by the reform +Higher costs are projected by PolicyEngine +The ten-year costs are estimated +The state's top income tax rate is lowered by the bill +The EITC is raised from 10% to 20% by Montana +``` + +## Quantitative and Precise + +Replace vague modifiers with specific numbers and measurements. + +**โœ… Correct (Quantitative):** +``` +Costs the state $245 million +Benefits 77% of Montana residents +Lowers the Supplemental Poverty Measure by 0.8% +Raises net income by $252 in 2026 +The reform affects 14.3 million households +Hours worked falls by 0.27%, or 411,000 full-time equivalent jobs +The top decile receives an average benefit of $1,033 +PolicyEngine projects costs 40% higher than the Tax Foundation +``` + +**โŒ Wrong (Vague adjectives/adverbs):** +``` +Significantly costs the state +Benefits most Montana residents +Greatly lowers poverty +Substantially raises net income +The reform affects many households +Hours worked falls considerably +High earners receive large benefits +PolicyEngine projects much higher costs +``` + +## Sentence Case for Headings + +Use sentence case (capitalize only the first word and proper nouns) for all headings. + +**โœ… Correct (Sentence case):** +``` +## The proposal +## Nationwide impacts +## Household impacts +## Statewide impacts 2026 +## Case study: the End Child Poverty Act +## Key findings +``` + +**โŒ Wrong (Title case):** +``` +## The Proposal +## Nationwide Impacts +## Household Impacts +## Statewide Impacts 2026 +## Case Study: The End Child Poverty Act +## Key Findings +``` + +## Neutral, Objective Tone + +Describe what policies do without value judgments. Let readers draw their own conclusions from the data. + +**โœ… Correct (Neutral):** +``` +The reform reduces poverty by 3.2% and raises inequality by 0.16% +Single filers with earnings between $8,000 and $37,000 see their net incomes increase +The tax changes raise the net income of 75.9% of residents +PolicyEngine projects higher costs than other organizations +The top income decile receives 42% of total benefits +``` + +**โŒ Wrong (Value judgments):** +``` +The reform successfully reduces poverty by 3.2% but unfortunately raises inequality +Low-income workers finally see their net incomes increase +The tax changes benefit most residents +PolicyEngine provides more accurate cost estimates +The wealthiest households receive a disproportionate share of benefits +``` + +## Precise Verbs Over Adverbs + +Choose specific verbs instead of generic verbs modified by adverbs. + +**โœ… Correct (Precise verbs):** +``` +The bill lowers the top rate from 5.9% to 5.4% +The policy raises the maximum credit from $632 to $1,774 +The reform increases the phase-in rate from 7.65% to 15.3% +This doubles Montana's EITC from 10% to 20% +The change eliminates the age cap +``` + +**โŒ Wrong (Vague verbs + adverbs):** +``` +The bill significantly changes the top rate +The policy substantially increases the maximum credit +The reform greatly boosts the phase-in rate +This dramatically expands Montana's EITC +The change completely removes the age cap +``` + +## Concrete Examples + +Always include specific household examples with precise numbers. + +**โœ… Correct:** +``` +For a single adult with no children and $10,000 of earnings, the tax provisions +increase their net income by $69 in 2026 and $68 in 2027, solely from the +doubled EITC match. + +A single parent of two kids with an annual income of $50,000 will see a $252 +increase to their net income: $179 from the expanded EITC, and $73 from the +lower bracket threshold. + +A married couple with no dependents and $200,000 of earnings will see their +liability drop by $1,306 in 2027. +``` + +**โŒ Wrong:** +``` +Low-income workers see modest increases to their net income from the +expanded EITC. + +Families with children benefit substantially from the tax changes. + +High earners also see significant reductions in their tax liability. +``` + +## Tables and Data + +Use tables liberally to present data clearly. Always include units and context. + +**Example 1: Tax parameters over time** + +| Year | Phase-in rate | Max credit | Phase-out start | Phase-out rate | +| ---- | ------------- | ---------- | --------------- | -------------- | +| 2025 | 15.3% | $1,774 | $13,706 | 15.3% | +| 2026 | 15.3% | $1,815 | $14,022 | 15.3% | +| 2027 | 15.3% | $1,852 | $14,306 | 15.3% | + +**Example 2: Household impacts** + +| Household composition | 2026 net income change | 2027 net income change | +| ------------------------------ | ---------------------- | ---------------------- | +| Single, no children, $10,000 | $66 | $68 | +| Single, two children, $50,000 | $252 | $266 | +| Married, no children, $200,000 | $853 | $1,306 | + +**Example 3: Ten-year costs** + +| Year | Federal cost ($ billions) | +| ------- | ------------------------- | +| 2025 | 14.3 | +| 2026 | 14.4 | +| 2027 | 14.7 | +| 2025-34 | 143.7 | + +## Avoid Superlatives + +Replace superlative claims with specific comparisons. + +**โœ… Correct:** +``` +PolicyEngine projects costs 40% higher than the Tax Foundation +The top decile receives an average benefit of $1,033 +The reform reduces child poverty by 3.2 percentage points +This represents Montana's largest income tax cut since 2021 +``` + +**โŒ Wrong:** +``` +PolicyEngine provides the most accurate cost projections +The wealthiest households receive massive benefits +The reform dramatically slashes child poverty +This is Montana's largest income tax cut in history +``` + +## Structure and Organization + +Follow a clear hierarchical structure with key findings up front. + +**Standard blog post structure:** + +```markdown +# Title (H1) + +Opening paragraph states what happened and when, with a link to PolicyEngine. + +Key results in [year]: +- Cost: $245 million +- Benefits: 77% of residents +- Poverty impact: Reduces SPM by 0.8% +- Inequality impact: Raises Gini by 0.16% + +## The proposal (H2) + +Detailed description of the policy changes, often with a table showing +the specific parameter values. + +## Household impacts (H2) + +Specific examples for representative household types. + +### Example 1: Single filer (H3) +Detailed calculation... + +### Example 2: Family with children (H3) +Detailed calculation... + +## Statewide impacts (H2) + +Population-level analysis with charts and tables. + +### Budgetary impact (H3) +Cost/revenue estimates... + +### Distributional impact (H3) +Winners/losers by income decile... + +### Poverty and inequality (H3) +Impact on poverty rates and inequality measures... + +## Methodology (H2) + +Explanation of data sources, modeling approach, and caveats. +``` + +## Common Patterns + +### Opening Paragraphs + +State the facts directly with dates, actors, and actions: + +``` +[On April 28, 2025], Governor Greg Gianforte (R-MT) signed House Bill 337, +a bill that amends Montana's individual income tax code. + +Vice President Harris proposes expanding the Earned Income Tax Credit (EITC) +for filers without qualifying dependents. + +In her economic plan, Harris proposes to restore the expanded Earned Income +Tax Credit for workers without children to its level under the American +Rescue Plan Act in 2021. +``` + +### Key Findings Format + +Lead with bullet points of quantitative results: + +``` +Key results in 2027: +- Costs the state $245 million +- Benefits 77% of Montana residents +- Lowers the Supplemental Poverty Measure by 0.8% +- Raises the Gini index by 0.16% +``` + +### Methodological Transparency + +Always specify the model, version, and assumptions: + +``` +Based on static microsimulation modeling with PolicyEngine US (version 1.103.0), +we project the following economic impacts for 2025. + +Assuming no behavioral responses, we project that the EITC expansion will cost +the federal government $14.3 billion in 2025. + +Incorporating elasticities of labor supply used by the Congressional Budget Office +increases the reform's cost. + +Over the ten-year budget window, this amounts to $143.7 billion. +``` + +### Household Examples + +Always include the household composition, income, and specific dollar impacts: + +``` +For a single adult with no children and $10,000 of earnings, the tax provisions +increase their net income by $69 in 2026 and $68 in 2027. + +A single parent of two kids with an annual income of $50,000 will see a $252 +increase to their net income due to House Bill 337: $179 from the expanded EITC, +and $73 from the lower bracket threshold. +``` + +## Examples in Context + +### Blog Post Opening + +**โœ… Correct:** +``` +On April 28, 2025, Governor Gianforte signed House Bill 337, which lowers +Montana's top income tax rate from 5.9% to 5.4% and doubles the state EITC +from 10% to 20% of the federal credit. + +Key results in 2027: +- Costs the state $245 million +- Benefits 77% of Montana residents +- Lowers the Supplemental Poverty Measure by 0.8% +- Raises the Gini index by 0.16% + +Use PolicyEngine to view the full results or calculate the effect on your +household. +``` + +**โŒ Wrong:** +``` +On April 28, 2025, Governor Gianforte made history by signing an amazing new +tax cut bill that will dramatically help Montana families. House Bill 337 +significantly reduces tax rates and greatly expands the EITC. + +This groundbreaking reform will: +- Cost the state money +- Help most residents +- Reduce poverty substantially +- Impact inequality + +Check out PolicyEngine to see how much you could save! +``` + +### PR Description + +**โœ… Correct:** +``` +## Summary + +This PR adds Claude Code plugin configuration to enable automated installation +of agents and skills for PolicyEngine development. + +## Changes + +- Add plugin auto-install configuration in .claude/settings.json +- Configure auto-install of country-models plugin from PolicyEngine/policyengine-claude + +## Benefits + +- Access to 15 specialized agents +- 3 slash commands (/encode-policy, /review-pr, /fix-pr) +- 2 skills (policyengine-us-skill, policyengine-standards-skill) + +## Testing + +After merging, team members trust the repo and the plugin auto-installs. +``` + +**โŒ Wrong:** +``` +## Summary + +This amazing PR adds incredible new Claude Code plugin support that will +revolutionize PolicyEngine development! + +## Changes + +- Adds some configuration files +- Sets up plugins and stuff + +## Benefits + +- Gets you lots of cool new features +- Makes development much easier +- Provides great new tools + +## Testing + +Should work great once merged! +``` + +### Documentation + +**โœ… Correct:** +``` +## Installation + +Install PolicyEngine-US from PyPI: + +```bash +pip install policyengine-us +``` + +This installs version 1.103.0 or later, which includes support for 2025 +tax parameters. +``` + +**โŒ Wrong:** +``` +## Installation + +Simply install PolicyEngine-US: + +```bash +pip install policyengine-us +``` + +This will install the latest version with all the newest features! +``` + +## Special Cases + +### Comparisons to Other Organizations + +State facts neutrally without claiming superiority: + +**โœ… Correct:** +``` +PolicyEngine projects higher costs than other organizations when considering +behavioral responses. + +| Organization | Cost, 2025-2034 ($ billions) | +| ------------------------- | ---------------------------- | +| PolicyEngine (static) | 144 | +| PolicyEngine (dynamic) | 201 | +| Tax Foundation | 157 | +| Penn Wharton Budget Model | 135 | +``` + +**โŒ Wrong:** +``` +PolicyEngine provides more accurate estimates than other organizations. + +Unlike other models that underestimate costs, PolicyEngine correctly accounts +for behavioral responses to project a more realistic $201 billion cost. +``` + +### Discussing Limitations + +Acknowledge limitations directly without hedging: + +**โœ… Correct:** +``` +## Caveats + +The Current Population Survey has several limitations for tax microsimulation: + +- Truncates high incomes for privacy, underestimating tax impacts on high earners +- Underestimates benefit receipt compared to administrative totals +- Reflects 2020 data with 2025 policy parameters +- Lacks detail for specific income types (assumes all capital gains are long-term) +``` + +**โŒ Wrong:** +``` +## Caveats + +While our model is highly sophisticated, like all models it has some potential +limitations that users should be aware of: + +- The data might not perfectly capture high incomes +- Benefits may be slightly underestimated +- We do our best to extrapolate older data to current years +``` + +## Writing Checklist + +Before publishing, verify: + +- [ ] Use active voice throughout +- [ ] Include specific numbers for all claims +- [ ] Use sentence case for all headings +- [ ] Maintain neutral, objective tone +- [ ] Choose precise verbs over vague adverbs +- [ ] Include concrete household examples +- [ ] Present data in tables +- [ ] Avoid all superlatives +- [ ] Structure with clear hierarchy +- [ ] Open with key quantitative findings +- [ ] Specify model version and assumptions +- [ ] Link to PolicyEngine when relevant +- [ ] Acknowledge limitations directly + +## Resources + +- **Example posts**: See `policyengine-app/src/posts/articles/` for reference implementations +- **PolicyEngine app**: https://policyengine.org for linking to analyses +- **Microsimulation docs**: https://policyengine.org/us/docs for methodology details diff --git a/skills/domain-knowledge/policyengine-uk-skill/SKILL.md b/skills/domain-knowledge/policyengine-uk-skill/SKILL.md new file mode 100644 index 0000000..1ffd600 --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/SKILL.md @@ -0,0 +1,660 @@ +--- +name: policyengine-uk +description: PolicyEngine-UK tax and benefit microsimulation patterns, situation creation, and common workflows +--- + +# PolicyEngine-UK + +PolicyEngine-UK models the UK tax and benefit system, including devolved variations for Scotland and Wales. + +## For Users ๐Ÿ‘ฅ + +### What is PolicyEngine-UK? + +PolicyEngine-UK is the "calculator" for UK taxes and benefits. When you use policyengine.org/uk, PolicyEngine-UK runs behind the scenes. + +**What it models:** + +**Direct taxes:** +- Income tax (UK-wide, Scottish, and Welsh variations) +- National Insurance (Classes 1, 2, 4) +- Capital gains tax +- Dividend tax + +**Property and transaction taxes:** +- Council Tax +- Stamp Duty Land Tax (England/NI) +- Land and Buildings Transaction Tax (Scotland) +- Land Transaction Tax (Wales) + +**Universal Credit:** +- Standard allowance +- Child elements +- Housing cost element +- Childcare costs element +- Carer element +- Work capability elements + +**Legacy benefits (being phased out):** +- Working Tax Credit +- Child Tax Credit +- Income Support +- Income-based JSA/ESA +- Housing Benefit + +**Other benefits:** +- Child Benefit +- Pension Credit +- Personal Independence Payment (PIP) +- Disability Living Allowance (DLA) +- Attendance Allowance +- State Pension + +**See full list:** https://policyengine.org/uk/parameters + +### Understanding Variables + +When you see results in PolicyEngine, these are variables: + +**Income variables:** +- `employment_income` - Gross employment earnings/salary +- `self_employment_income` - Self-employment profits +- `pension_income` - Private pension income +- `property_income` - Rental income +- `savings_interest_income` - Interest from savings +- `dividend_income` - Dividend income + +**Tax variables:** +- `income_tax` - Total income tax liability +- `national_insurance` - Total NI contributions +- `council_tax` - Council tax liability + +**Benefit variables:** +- `universal_credit` - Universal Credit amount +- `child_benefit` - Child Benefit amount +- `pension_credit` - Pension Credit amount +- `working_tax_credit` - Working Tax Credit (legacy) +- `child_tax_credit` - Child Tax Credit (legacy) + +**Summary variables:** +- `household_net_income` - Income after taxes and benefits +- `disposable_income` - Income after taxes +- `equivalised_household_net_income` - Adjusted for household size + +## For Analysts ๐Ÿ“Š + +### Installation and Setup + +```bash +# Install PolicyEngine-UK +pip install policyengine-uk + +# Or with uv (recommended) +uv pip install policyengine-uk +``` + +### Quick Start + +```python +from policyengine_uk import Simulation + +# Create a household +situation = { + "people": { + "person": { + "age": {2025: 30}, + "employment_income": {2025: 30000} + } + }, + "benunits": { + "benunit": { + "members": ["person"] + } + }, + "households": { + "household": { + "members": ["person"], + "region": {2025: "LONDON"} + } + } +} + +# Calculate taxes and benefits +sim = Simulation(situation=situation) +income_tax = sim.calculate("income_tax", 2025)[0] +universal_credit = sim.calculate("universal_credit", 2025)[0] + +print(f"Income tax: ยฃ{income_tax:,.0f}") +print(f"Universal Credit: ยฃ{universal_credit:,.0f}") +``` + +### Web App to Python + +**Web app URL:** +``` +policyengine.org/uk/household?household=12345 +``` + +**Equivalent Python (conceptually):** +The household ID represents a situation dictionary. To replicate in Python, you'd create a similar situation. + +### When to Use This Skill + +- Creating household situations for tax/benefit calculations +- Running microsimulations with PolicyEngine-UK +- Analyzing policy reforms and their impacts +- Building tools that use PolicyEngine-UK (calculators, analysis notebooks) +- Debugging PolicyEngine-UK calculations + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/policyengine-uk + +**To see current implementation:** +```bash +git clone https://github.com/PolicyEngine/policyengine-uk +cd policyengine-uk + +# Explore structure +tree policyengine_uk/ +``` + +**Key directories:** +```bash +ls policyengine_uk/ +# - variables/ - Tax and benefit calculations +# - parameters/ - Policy rules (YAML) +# - reforms/ - Pre-defined reforms +# - tests/ - Test cases +``` + +## Core Concepts + +### 1. Situation Dictionary Structure + +PolicyEngine UK requires a nested dictionary defining household composition: + +```python +situation = { + "people": { + "person_id": { + "age": {2025: 35}, + "employment_income": {2025: 30000}, + # ... other person attributes + } + }, + "benunits": { + "benunit_id": { + "members": ["person_id", ...] + } + }, + "households": { + "household_id": { + "members": ["person_id", ...], + "region": {2025: "SOUTH_EAST"} + } + } +} +``` + +**Key Rules:** +- All entities must have consistent member lists +- Use year keys for all values: `{2025: value}` +- Region must be one of the ITL 1 regions (see below) +- All monetary values in pounds (not pence) +- UK tax year runs April 6 to April 5 (but use calendar year in code) + +**Important Entity Difference:** +- UK uses **benunits** (benefit units): a single adult OR couple + dependent children +- This is the assessment unit for most means-tested benefits +- Unlike US which uses families/marital_units/tax_units/spm_units + +### 2. Creating Simulations + +```python +from policyengine_uk import Simulation + +# Create simulation from situation +simulation = Simulation(situation=situation) + +# Calculate variables +income_tax = simulation.calculate("income_tax", 2025) +universal_credit = simulation.calculate("universal_credit", 2025) +household_net_income = simulation.calculate("household_net_income", 2025) +``` + +**Common Variables:** + +**Income:** +- `employment_income` - Gross employment earnings +- `self_employment_income` - Self-employment profits +- `pension_income` - Private pension income +- `property_income` - Rental income +- `savings_interest_income` - Interest income +- `dividend_income` - Dividend income +- `miscellaneous_income` - Other income sources + +**Tax Outputs:** +- `income_tax` - Total income tax liability +- `national_insurance` - Total NI contributions +- `council_tax` - Council tax liability +- `VAT` - Value Added Tax paid + +**Benefits:** +- `universal_credit` - Universal Credit +- `child_benefit` - Child Benefit +- `pension_credit` - Pension Credit +- `working_tax_credit` - Working Tax Credit (legacy) +- `child_tax_credit` - Child Tax Credit (legacy) +- `personal_independence_payment` - PIP +- `attendance_allowance` - Attendance Allowance +- `state_pension` - State Pension + +**Summary:** +- `household_net_income` - Income after taxes and benefits +- `disposable_income` - Income after taxes +- `equivalised_household_net_income` - Adjusted for household size + +### 3. Using Axes for Parameter Sweeps + +To vary a parameter across multiple values: + +```python +situation = { + # ... normal situation setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 100000, + "period": 2025 + }]] +} + +simulation = Simulation(situation=situation) +# Now calculate() returns arrays of 1001 values +incomes = simulation.calculate("employment_income", 2025) # Array of 1001 values +taxes = simulation.calculate("income_tax", 2025) # Array of 1001 values +``` + +**Important:** Remove axes before creating single-point simulations: +```python +situation_single = situation.copy() +situation_single.pop("axes", None) +simulation = Simulation(situation=situation_single) +``` + +### 4. Policy Reforms + +```python +from policyengine_uk import Simulation + +# Define a reform (modifies parameters) +reform = { + "gov.hmrc.income_tax.rates.uk.brackets[0].rate": { + "2025-01-01.2100-12-31": 0.25 # Increase basic rate to 25% + } +} + +# Create simulation with reform +simulation = Simulation(situation=situation, reform=reform) +``` + +## Common Patterns + +### Pattern 1: Single Person Household Calculation + +```python +from policyengine_uk import Simulation + +situation = { + "people": { + "person": { + "age": {2025: 30}, + "employment_income": {2025: 30000} + } + }, + "benunits": { + "benunit": { + "members": ["person"] + } + }, + "households": { + "household": { + "members": ["person"], + "region": {2025: "LONDON"} + } + } +} + +sim = Simulation(situation=situation) +income_tax = sim.calculate("income_tax", 2025)[0] +national_insurance = sim.calculate("national_insurance", 2025)[0] +universal_credit = sim.calculate("universal_credit", 2025)[0] +``` + +### Pattern 2: Couple with Children + +```python +situation = { + "people": { + "parent_1": { + "age": {2025: 35}, + "employment_income": {2025: 35000} + }, + "parent_2": { + "age": {2025: 33}, + "employment_income": {2025: 25000} + }, + "child_1": { + "age": {2025: 8} + }, + "child_2": { + "age": {2025: 5} + } + }, + "benunits": { + "benunit": { + "members": ["parent_1", "parent_2", "child_1", "child_2"] + } + }, + "households": { + "household": { + "members": ["parent_1", "parent_2", "child_1", "child_2"], + "region": {2025: "NORTH_WEST"} + } + } +} + +sim = Simulation(situation=situation) +child_benefit = sim.calculate("child_benefit", 2025)[0] +universal_credit = sim.calculate("universal_credit", 2025)[0] +``` + +### Pattern 3: Marginal Tax Rate Analysis + +```python +# Create baseline with axes varying income +situation_with_axes = { + "people": { + "person": { + "age": {2025: 30} + } + }, + "benunits": {"benunit": {"members": ["person"]}}, + "households": { + "household": { + "members": ["person"], + "region": {2025: "LONDON"} + } + }, + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 100000, + "period": 2025 + }]] +} + +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", 2025) +net_incomes = sim.calculate("household_net_income", 2025) + +# Calculate marginal tax rate +import numpy as np +mtr = 1 - (np.gradient(net_incomes) / np.gradient(incomes)) +``` + +### Pattern 4: Regional Comparison + +```python +regions = ["LONDON", "SCOTLAND", "WALES", "NORTH_EAST"] +results = {} + +for region in regions: + situation = create_situation(region=region, income=30000) + sim = Simulation(situation=situation) + results[region] = { + "income_tax": sim.calculate("income_tax", 2025)[0], + "national_insurance": sim.calculate("national_insurance", 2025)[0], + "total_tax": sim.calculate("income_tax", 2025)[0] + + sim.calculate("national_insurance", 2025)[0] + } +``` + +### Pattern 5: Policy Reform Impact + +```python +from policyengine_uk import Microsimulation, Reform + +# Define reform: Increase basic rate to 25% +class IncreaseBasicRate(Reform): + def apply(self): + def modify_parameters(parameters): + parameters.gov.hmrc.income_tax.rates.uk.brackets[0].rate.update( + period="year:2025:10", value=0.25 + ) + return parameters + self.modify_parameters(modify_parameters) + +# Run microsimulation +baseline = Microsimulation() +reformed = Microsimulation(reform=IncreaseBasicRate) + +# Calculate revenue impact +baseline_revenue = baseline.calc("income_tax", 2025).sum() +reformed_revenue = reformed.calc("income_tax", 2025).sum() +revenue_change = (reformed_revenue - baseline_revenue) / 1e9 # in billions + +# Calculate household impact +baseline_net_income = baseline.calc("household_net_income", 2025) +reformed_net_income = reformed.calc("household_net_income", 2025) +``` + +## Helper Scripts + +This skill includes helper scripts in the `scripts/` directory: + +```python +from policyengine_uk_skills.situation_helpers import ( + create_single_person, + create_couple, + create_family_with_children, + add_region +) + +# Quick situation creation +situation = create_single_person( + income=30000, + region="LONDON", + age=30 +) + +# Create couple +situation = create_couple( + income_1=35000, + income_2=25000, + region="SCOTLAND" +) +``` + +## Common Pitfalls and Solutions + +### Pitfall 1: Member Lists Out of Sync + +**Problem:** Different entities have different members +```python +# WRONG +"benunits": {"benunit": {"members": ["parent"]}}, +"households": {"household": {"members": ["parent", "child"]}} +``` + +**Solution:** Keep all entity member lists consistent: +```python +# CORRECT +all_members = ["parent", "child"] +"benunits": {"benunit": {"members": all_members}}, +"households": {"household": {"members": all_members}} +``` + +### Pitfall 2: Forgetting Year Keys + +**Problem:** `"age": 35` instead of `"age": {2025: 35}` + +**Solution:** Always use year dictionary: +```python +"age": {2025: 35}, +"employment_income": {2025: 30000} +``` + +### Pitfall 3: Wrong Region Format + +**Problem:** Using lowercase or incorrect region names + +**Solution:** Use uppercase ITL 1 region codes: +```python +# CORRECT regions: +"region": {2025: "LONDON"} +"region": {2025: "SCOTLAND"} +"region": {2025: "WALES"} +"region": {2025: "NORTH_EAST"} +"region": {2025: "SOUTH_EAST"} +``` + +### Pitfall 4: Axes Persistence + +**Problem:** Axes remain in situation when creating single-point simulation + +**Solution:** Remove axes before single-point simulation: +```python +situation_single = situation.copy() +situation_single.pop("axes", None) +``` + +### Pitfall 5: Missing Benunits + +**Problem:** Forgetting to include benunits (benefit units) + +**Solution:** Always include benunits in UK simulations: +```python +# UK requires benunits +situation = { + "people": {...}, + "benunits": {"benunit": {"members": [...]}}, # Required! + "households": {...} +} +``` + +## Regions in PolicyEngine UK + +UK uses ITL 1 (International Territorial Level 1, formerly NUTS 1) regions: + +**Regions:** +- `NORTH_EAST` - North East England +- `NORTH_WEST` - North West England +- `YORKSHIRE` - Yorkshire and the Humber +- `EAST_MIDLANDS` - East Midlands +- `WEST_MIDLANDS` - West Midlands +- `EAST_OF_ENGLAND` - East of England +- `LONDON` - London +- `SOUTH_EAST` - South East England +- `SOUTH_WEST` - South West England +- `WALES` - Wales +- `SCOTLAND` - Scotland +- `NORTHERN_IRELAND` - Northern Ireland + +**Regional Tax Variations:** + +**Scotland:** +- Has devolved income tax with 6 bands (starter 19%, basic 20%, intermediate 21%, higher 42%, advanced 45%, top 47%) +- Scottish residents automatically calculated with Scottish rates + +**Wales:** +- Has Welsh Rate of Income Tax (WRIT) +- Currently maintains parity with England/NI rates + +**England/Northern Ireland:** +- Standard UK rates: basic 20%, higher 40%, additional 45% + +## Key Parameters and Values (2025/26) + +### Income Tax +- **Personal Allowance:** ยฃ12,570 +- **Basic rate threshold:** ยฃ50,270 +- **Higher rate threshold:** ยฃ125,140 +- **Rates:** 20% (basic), 40% (higher), 45% (additional) +- **Personal allowance tapering:** ยฃ1 reduction for every ยฃ2 over ยฃ100,000 + +### National Insurance (Class 1) +- **Lower Earnings Limit:** ยฃ6,396/year +- **Primary Threshold:** ยฃ12,570/year +- **Upper Earnings Limit:** ยฃ50,270/year +- **Rates:** 12% (between primary and upper), 2% (above upper) + +### Universal Credit +- **Standard allowance:** Varies by single/couple and age +- **Taper rate:** 55% (rate at which UC reduced as income increases) +- **Work allowance:** Amount you can earn before UC reduced + +### Child Benefit +- **First child:** Higher rate +- **Subsequent children:** Lower rate +- **High Income Charge:** Tapered withdrawal starting at ยฃ60,000 + +## Version Compatibility + +- Use `policyengine-uk>=1.0.0` for 2025 calculations +- Check version: `import policyengine_uk; print(policyengine_uk.__version__)` +- Different years may require different package versions + +## Debugging Tips + +1. **Enable tracing:** + ```python + simulation.trace = True + result = simulation.calculate("variable_name", 2025) + ``` + +2. **Check intermediate calculations:** + ```python + gross_income = simulation.calculate("gross_income", 2025) + disposable_income = simulation.calculate("disposable_income", 2025) + ``` + +3. **Verify situation structure:** + ```python + import json + print(json.dumps(situation, indent=2)) + ``` + +4. **Test with PolicyEngine web app:** + - Go to policyengine.org/uk/household + - Enter same inputs + - Compare results + +## Additional Resources + +- **Documentation:** https://policyengine.org/uk/docs +- **API Reference:** https://github.com/PolicyEngine/policyengine-uk +- **Variable Explorer:** https://policyengine.org/uk/variables +- **Parameter Explorer:** https://policyengine.org/uk/parameters + +## Examples Directory + +See `examples/` for complete working examples: +- `single_person.yaml` - Single person household +- `couple.yaml` - Couple without children +- `family_with_children.yaml` - Family with dependents +- `universal_credit_sweep.yaml` - Analyzing UC with axes + +## Key Differences from US System + +1. **Benefit Units:** UK uses `benunits` (single/couple + children) instead of US multiple entity types +2. **Universal Credit:** Consolidated means-tested benefit (vs separate SNAP, TANF, etc. in US) +3. **National Insurance:** Separate from income tax with own thresholds (vs US Social Security tax) +4. **Devolved Taxes:** Scotland and Wales have different income tax rates +5. **Tax Year:** April 6 to April 5 (vs calendar year in US) +6. **No State Variation:** Council Tax is local, but most taxes/benefits are national (vs 50 US states) diff --git a/skills/domain-knowledge/policyengine-uk-skill/examples/couple.yaml b/skills/domain-knowledge/policyengine-uk-skill/examples/couple.yaml new file mode 100644 index 0000000..d2767d4 --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/examples/couple.yaml @@ -0,0 +1,29 @@ +# Example: Couple without children in Scotland +# Person 1: ยฃ35,000, Age: 35 +# Person 2: ยฃ25,000, Age: 33 + +people: + person_1: + age: + 2025: 35 + employment_income: + 2025: 35000 + person_2: + age: + 2025: 33 + employment_income: + 2025: 25000 + +benunits: + benunit: + members: + - person_1 + - person_2 + +households: + household: + members: + - person_1 + - person_2 + region: + 2025: SCOTLAND diff --git a/skills/domain-knowledge/policyengine-uk-skill/examples/family_with_children.yaml b/skills/domain-knowledge/policyengine-uk-skill/examples/family_with_children.yaml new file mode 100644 index 0000000..be02ac8 --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/examples/family_with_children.yaml @@ -0,0 +1,41 @@ +# Example: Family with children in Wales +# Parent 1: ยฃ35,000, Age: 35 +# Parent 2: ยฃ25,000, Age: 33 +# Child 1: Age 8 +# Child 2: Age 5 + +people: + parent_1: + age: + 2025: 35 + employment_income: + 2025: 35000 + parent_2: + age: + 2025: 33 + employment_income: + 2025: 25000 + child_1: + age: + 2025: 8 + child_2: + age: + 2025: 5 + +benunits: + benunit: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + +households: + household: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + region: + 2025: WALES diff --git a/skills/domain-knowledge/policyengine-uk-skill/examples/single_person.yaml b/skills/domain-knowledge/policyengine-uk-skill/examples/single_person.yaml new file mode 100644 index 0000000..33cd1e0 --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/examples/single_person.yaml @@ -0,0 +1,21 @@ +# Example: Single person household in London +# Income: ยฃ30,000, Age: 30 + +people: + person: + age: + 2025: 30 + employment_income: + 2025: 30000 + +benunits: + benunit: + members: + - person + +households: + household: + members: + - person + region: + 2025: LONDON diff --git a/skills/domain-knowledge/policyengine-uk-skill/examples/universal_credit_sweep.yaml b/skills/domain-knowledge/policyengine-uk-skill/examples/universal_credit_sweep.yaml new file mode 100644 index 0000000..b57492d --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/examples/universal_credit_sweep.yaml @@ -0,0 +1,38 @@ +# Example: Analyzing Universal Credit with income variation +# Single parent with 2 children in North West +# Sweeps employment income from ยฃ0 to ยฃ50,000 + +people: + parent: + age: + 2025: 30 + child_1: + age: + 2025: 8 + child_2: + age: + 2025: 5 + +benunits: + benunit: + members: + - parent + - child_1 + - child_2 + +households: + household: + members: + - parent + - child_1 + - child_2 + region: + 2025: NORTH_WEST + +# Axes: Vary employment income from ยฃ0 to ยฃ50,000 +axes: + - - name: employment_income + count: 1001 + min: 0 + max: 50000 + period: 2025 diff --git a/skills/domain-knowledge/policyengine-uk-skill/scripts/situation_helpers.py b/skills/domain-knowledge/policyengine-uk-skill/scripts/situation_helpers.py new file mode 100644 index 0000000..4c1aadc --- /dev/null +++ b/skills/domain-knowledge/policyengine-uk-skill/scripts/situation_helpers.py @@ -0,0 +1,339 @@ +""" +Helper functions for creating PolicyEngine-UK situations. + +These utilities simplify the creation of situation dictionaries +for common household configurations. +""" + +CURRENT_YEAR = 2025 + +# UK ITL 1 regions +VALID_REGIONS = [ + "NORTH_EAST", + "NORTH_WEST", + "YORKSHIRE", + "EAST_MIDLANDS", + "WEST_MIDLANDS", + "EAST_OF_ENGLAND", + "LONDON", + "SOUTH_EAST", + "SOUTH_WEST", + "WALES", + "SCOTLAND", + "NORTHERN_IRELAND" +] + + +def create_single_person(income, region="LONDON", age=30, **kwargs): + """ + Create a situation for a single person household. + + Args: + income (float): Employment income + region (str): ITL 1 region (e.g., "LONDON", "SCOTLAND") + age (int): Person's age + **kwargs: Additional person attributes (e.g., self_employment_income) + + Returns: + dict: PolicyEngine situation dictionary + """ + if region not in VALID_REGIONS: + raise ValueError(f"Invalid region. Must be one of: {', '.join(VALID_REGIONS)}") + + person_attrs = { + "age": {CURRENT_YEAR: age}, + "employment_income": {CURRENT_YEAR: income}, + } + person_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": {"person": person_attrs}, + "benunits": {"benunit": {"members": ["person"]}}, + "households": { + "household": { + "members": ["person"], + "region": {CURRENT_YEAR: region} + } + } + } + + +def create_couple( + income_1, income_2=0, region="LONDON", age_1=35, age_2=35, **kwargs +): + """ + Create a situation for a couple without children. + + Args: + income_1 (float): First person's employment income + income_2 (float): Second person's employment income + region (str): ITL 1 region + age_1 (int): First person's age + age_2 (int): Second person's age + **kwargs: Additional household attributes + + Returns: + dict: PolicyEngine situation dictionary + """ + if region not in VALID_REGIONS: + raise ValueError(f"Invalid region. Must be one of: {', '.join(VALID_REGIONS)}") + + members = ["person_1", "person_2"] + + household_attrs = { + "members": members, + "region": {CURRENT_YEAR: region} + } + household_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": { + "person_1": { + "age": {CURRENT_YEAR: age_1}, + "employment_income": {CURRENT_YEAR: income_1} + }, + "person_2": { + "age": {CURRENT_YEAR: age_2}, + "employment_income": {CURRENT_YEAR: income_2} + } + }, + "benunits": {"benunit": {"members": members}}, + "households": {"household": household_attrs} + } + + +def create_family_with_children( + parent_income, + num_children=1, + child_ages=None, + region="LONDON", + parent_age=35, + couple=False, + partner_income=0, + **kwargs +): + """ + Create a situation for a family with children. + + Args: + parent_income (float): Primary parent's employment income + num_children (int): Number of children + child_ages (list): List of child ages (defaults to [5, 8, 12, ...]) + region (str): ITL 1 region + parent_age (int): Parent's age + couple (bool): Whether this is a couple household + partner_income (float): Partner's income if couple + **kwargs: Additional household attributes + + Returns: + dict: PolicyEngine situation dictionary + """ + if region not in VALID_REGIONS: + raise ValueError(f"Invalid region. Must be one of: {', '.join(VALID_REGIONS)}") + + if child_ages is None: + child_ages = [5 + i * 3 for i in range(num_children)] + elif len(child_ages) != num_children: + raise ValueError("Length of child_ages must match num_children") + + people = { + "parent": { + "age": {CURRENT_YEAR: parent_age}, + "employment_income": {CURRENT_YEAR: parent_income} + } + } + + members = ["parent"] + + if couple: + people["partner"] = { + "age": {CURRENT_YEAR: parent_age}, + "employment_income": {CURRENT_YEAR: partner_income} + } + members.append("partner") + + for i, age in enumerate(child_ages): + child_id = f"child_{i+1}" + people[child_id] = {"age": {CURRENT_YEAR: age}} + members.append(child_id) + + household_attrs = { + "members": members, + "region": {CURRENT_YEAR: region} + } + household_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": people, + "benunits": {"benunit": {"members": members}}, + "households": {"household": household_attrs} + } + + +def add_income_sources( + situation, + person_id=None, + self_employment_income=0, + pension_income=0, + property_income=0, + savings_interest_income=0, + dividend_income=0, + miscellaneous_income=0 +): + """ + Add additional income sources to a person in an existing situation. + + Args: + situation (dict): Existing PolicyEngine situation + person_id (str): Person ID to add income to (defaults to first person) + self_employment_income (float): Self-employment income + pension_income (float): Private pension income + property_income (float): Rental income + savings_interest_income (float): Interest income + dividend_income (float): Dividend income + miscellaneous_income (float): Other income + + Returns: + dict: Updated situation with additional income + """ + # Get person ID + if person_id is None: + person_id = list(situation["people"].keys())[0] + + # Add income sources + if self_employment_income > 0: + situation["people"][person_id]["self_employment_income"] = { + CURRENT_YEAR: self_employment_income + } + + if pension_income > 0: + situation["people"][person_id]["pension_income"] = { + CURRENT_YEAR: pension_income + } + + if property_income > 0: + situation["people"][person_id]["property_income"] = { + CURRENT_YEAR: property_income + } + + if savings_interest_income > 0: + situation["people"][person_id]["savings_interest_income"] = { + CURRENT_YEAR: savings_interest_income + } + + if dividend_income > 0: + situation["people"][person_id]["dividend_income"] = { + CURRENT_YEAR: dividend_income + } + + if miscellaneous_income > 0: + situation["people"][person_id]["miscellaneous_income"] = { + CURRENT_YEAR: miscellaneous_income + } + + return situation + + +def add_axes(situation, variable_name, min_val, max_val, count=1001): + """ + Add axes to a situation for parameter sweeps. + + Args: + situation (dict): Existing PolicyEngine situation + variable_name (str): Variable to vary (e.g., "employment_income") + min_val (float): Minimum value + max_val (float): Maximum value + count (int): Number of points (default: 1001) + + Returns: + dict: Updated situation with axes + """ + situation["axes"] = [[{ + "name": variable_name, + "count": count, + "min": min_val, + "max": max_val, + "period": CURRENT_YEAR + }]] + + return situation + + +def set_region(situation, region): + """ + Set or change the region for a household. + + Args: + situation (dict): Existing PolicyEngine situation + region (str): ITL 1 region (e.g., "LONDON", "SCOTLAND") + + Returns: + dict: Updated situation + """ + if region not in VALID_REGIONS: + raise ValueError(f"Invalid region. Must be one of: {', '.join(VALID_REGIONS)}") + + household_id = list(situation["households"].keys())[0] + situation["households"][household_id]["region"] = {CURRENT_YEAR: region} + + return situation + + +def create_pensioner_household( + pension_income, + state_pension_income=0, + region="LONDON", + age=70, + couple=False, + partner_pension_income=0, + partner_age=68, + **kwargs +): + """ + Create a situation for a pensioner household. + + Args: + pension_income (float): Private pension income + state_pension_income (float): State pension income + region (str): ITL 1 region + age (int): Pensioner's age + couple (bool): Whether this is a couple household + partner_pension_income (float): Partner's pension income if couple + partner_age (int): Partner's age if couple + **kwargs: Additional household attributes + + Returns: + dict: PolicyEngine situation dictionary + """ + if region not in VALID_REGIONS: + raise ValueError(f"Invalid region. Must be one of: {', '.join(VALID_REGIONS)}") + + people = { + "pensioner": { + "age": {CURRENT_YEAR: age}, + "pension_income": {CURRENT_YEAR: pension_income}, + "state_pension": {CURRENT_YEAR: state_pension_income} + } + } + + members = ["pensioner"] + + if couple: + people["partner"] = { + "age": {CURRENT_YEAR: partner_age}, + "pension_income": {CURRENT_YEAR: partner_pension_income}, + "state_pension": {CURRENT_YEAR: 0} + } + members.append("partner") + + household_attrs = { + "members": members, + "region": {CURRENT_YEAR: region} + } + household_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": people, + "benunits": {"benunit": {"members": members}}, + "households": {"household": household_attrs} + } diff --git a/skills/domain-knowledge/policyengine-us-skill/SKILL.md b/skills/domain-knowledge/policyengine-us-skill/SKILL.md new file mode 100644 index 0000000..c8f9359 --- /dev/null +++ b/skills/domain-knowledge/policyengine-us-skill/SKILL.md @@ -0,0 +1,524 @@ +--- +name: policyengine-us +description: PolicyEngine-US tax and benefit microsimulation patterns, situation creation, and common workflows +--- + +# PolicyEngine-US + +PolicyEngine-US models the US federal and state tax and benefit system. + +## For Users ๐Ÿ‘ฅ + +### What is PolicyEngine-US? + +PolicyEngine-US is the "calculator" for US taxes and benefits. When you use policyengine.org/us, PolicyEngine-US runs behind the scenes. + +**What it models:** + +**Federal taxes:** +- Income tax (with standard/itemized deductions) +- Payroll tax (Social Security, Medicare) +- Capital gains tax + +**Federal benefits:** +- Earned Income Tax Credit (EITC) +- Child Tax Credit (CTC) +- SNAP (food stamps) +- WIC, ACA premium tax credits +- Social Security, SSI, TANF + +**State programs (varies by state):** +- State income tax (all 50 states + DC) +- State EITC, CTC +- State-specific benefits + +**See full list:** https://policyengine.org/us/parameters + +### Understanding Variables + +When you see results in PolicyEngine, these are variables: + +**Income variables:** +- `employment_income` - W-2 wages +- `self_employment_income` - 1099 income +- `qualified_dividend_income` - Dividends +- `capital_gains` - Capital gains + +**Tax variables:** +- `income_tax` - Federal income tax +- `state_income_tax` - State income tax +- `payroll_tax` - FICA taxes + +**Benefit variables:** +- `eitc` - Earned Income Tax Credit +- `ctc` - Child Tax Credit +- `snap` - SNAP benefits + +**Summary variables:** +- `household_net_income` - Income after taxes and benefits +- `household_tax` - Total taxes +- `household_benefits` - Total benefits + +## For Analysts ๐Ÿ“Š + +### Installation and Setup + +```bash +# Install PolicyEngine-US +pip install policyengine-us + +# Or with uv (recommended) +uv pip install policyengine-us +``` + +### Quick Start + +```python +from policyengine_us import Simulation + +# Create a household +situation = { + "people": { + "you": { + "age": {2024: 30}, + "employment_income": {2024: 50000} + } + }, + "families": {"family": {"members": ["you"]}}, + "marital_units": {"marital_unit": {"members": ["you"]}}, + "tax_units": {"tax_unit": {"members": ["you"]}}, + "spm_units": {"spm_unit": {"members": ["you"]}}, + "households": { + "household": { + "members": ["you"], + "state_name": {2024: "CA"} + } + } +} + +# Calculate taxes and benefits +sim = Simulation(situation=situation) +income_tax = sim.calculate("income_tax", 2024)[0] +eitc = sim.calculate("eitc", 2024)[0] + +print(f"Income tax: ${income_tax:,.0f}") +print(f"EITC: ${eitc:,.0f}") +``` + +### Web App to Python + +**Web app URL:** +``` +policyengine.org/us/household?household=12345 +``` + +**Equivalent Python (conceptually):** +The household ID represents a situation dictionary. To replicate in Python, you'd create a similar situation. + +### When to Use This Skill + +- Creating household situations for tax/benefit calculations +- Running microsimulations with PolicyEngine-US +- Analyzing policy reforms and their impacts +- Building tools that use PolicyEngine-US (calculators, analysis notebooks) +- Debugging PolicyEngine-US calculations + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/policyengine-us + +**To see current implementation:** +```bash +git clone https://github.com/PolicyEngine/policyengine-us +cd policyengine-us + +# Explore structure +tree policyengine_us/ +``` + +**Key directories:** +```bash +ls policyengine_us/ +# - variables/ - Tax and benefit calculations +# - parameters/ - Policy rules (YAML) +# - reforms/ - Pre-defined reforms +# - tests/ - Test cases +``` + +## Core Concepts + +### 1. Situation Dictionary Structure + +PolicyEngine requires a nested dictionary defining household composition and characteristics: + +```python +situation = { + "people": { + "person_id": { + "age": {2024: 35}, + "employment_income": {2024: 50000}, + # ... other person attributes + } + }, + "families": { + "family_id": {"members": ["person_id", ...]} + }, + "marital_units": { + "marital_unit_id": {"members": ["person_id", ...]} + }, + "tax_units": { + "tax_unit_id": {"members": ["person_id", ...]} + }, + "spm_units": { + "spm_unit_id": {"members": ["person_id", ...]} + }, + "households": { + "household_id": { + "members": ["person_id", ...], + "state_name": {2024: "CA"} + } + } +} +``` + +**Key Rules:** +- All entities must have consistent member lists +- Use year keys for all values: `{2024: value}` +- State must be two-letter code (e.g., "CA", "NY", "TX") +- All monetary values in dollars (not cents) + +### 2. Creating Simulations + +```python +from policyengine_us import Simulation + +# Create simulation from situation +simulation = Simulation(situation=situation) + +# Calculate variables +income_tax = simulation.calculate("income_tax", 2024) +eitc = simulation.calculate("eitc", 2024) +household_net_income = simulation.calculate("household_net_income", 2024) +``` + +**Common Variables:** + +**Income:** +- `employment_income` - W-2 wages +- `self_employment_income` - 1099/business income +- `qualified_dividend_income` - Qualified dividends +- `capital_gains` - Capital gains +- `interest_income` - Interest income +- `social_security` - Social Security benefits +- `pension_income` - Pension/retirement income + +**Deductions:** +- `charitable_cash_donations` - Cash charitable giving +- `real_estate_taxes` - State and local property taxes +- `mortgage_interest` - Mortgage interest deduction +- `medical_expense` - Medical and dental expenses +- `casualty_loss` - Casualty and theft losses + +**Tax Outputs:** +- `income_tax` - Total federal income tax +- `payroll_tax` - FICA taxes +- `state_income_tax` - State income tax +- `household_tax` - Total taxes (federal + state + local) + +**Benefits:** +- `eitc` - Earned Income Tax Credit +- `ctc` - Child Tax Credit +- `snap` - SNAP benefits +- `household_benefits` - Total benefits + +**Summary:** +- `household_net_income` - Income minus taxes plus benefits + +### 3. Using Axes for Parameter Sweeps + +To vary a parameter across multiple values: + +```python +situation = { + # ... normal situation setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 200000, + "period": 2024 + }]] +} + +simulation = Simulation(situation=situation) +# Now calculate() returns arrays of 1001 values +incomes = simulation.calculate("employment_income", 2024) # Array of 1001 values +taxes = simulation.calculate("income_tax", 2024) # Array of 1001 values +``` + +**Important:** Remove axes before creating single-point simulations: +```python +situation_single = situation.copy() +situation_single.pop("axes", None) +simulation = Simulation(situation=situation_single) +``` + +### 4. Policy Reforms + +```python +from policyengine_us import Simulation + +# Define a reform (modifies parameters) +reform = { + "gov.irs.credits.ctc.amount.base_amount": { + "2024-01-01.2100-12-31": 5000 # Increase CTC to $5000 + } +} + +# Create simulation with reform +simulation = Simulation(situation=situation, reform=reform) +``` + +## Common Patterns + +### Pattern 1: Single Household Calculation + +```python +from policyengine_us import Simulation + +situation = { + "people": { + "parent": { + "age": {2024: 35}, + "employment_income": {2024: 60000} + }, + "child": { + "age": {2024: 5} + } + }, + "families": {"family": {"members": ["parent", "child"]}}, + "marital_units": {"marital_unit": {"members": ["parent"]}}, + "tax_units": {"tax_unit": {"members": ["parent", "child"]}}, + "spm_units": {"spm_unit": {"members": ["parent", "child"]}}, + "households": { + "household": { + "members": ["parent", "child"], + "state_name": {2024: "NY"} + } + } +} + +sim = Simulation(situation=situation) +income_tax = sim.calculate("income_tax", 2024)[0] +ctc = sim.calculate("ctc", 2024)[0] +``` + +### Pattern 2: Marginal Tax Rate Analysis + +```python +# Create baseline with axes varying income +situation_with_axes = { + # ... situation setup ... + "axes": [[{ + "name": "employment_income", + "count": 1001, + "min": 0, + "max": 200000, + "period": 2024 + }]] +} + +sim = Simulation(situation=situation_with_axes) +incomes = sim.calculate("employment_income", 2024) +taxes = sim.calculate("income_tax", 2024) + +# Calculate marginal tax rate +import numpy as np +mtr = np.gradient(taxes) / np.gradient(incomes) +``` + +### Pattern 3: Charitable Donation Impact + +```python +# Baseline (no donation) +situation_baseline = create_situation(income=100000, donation=0) +sim_baseline = Simulation(situation=situation_baseline) +tax_baseline = sim_baseline.calculate("income_tax", 2024)[0] + +# With donation +situation_donation = create_situation(income=100000, donation=5000) +sim_donation = Simulation(situation=situation_donation) +tax_donation = sim_donation.calculate("income_tax", 2024)[0] + +# Tax savings from donation +tax_savings = tax_baseline - tax_donation +effective_discount = tax_savings / 5000 # e.g., 0.24 = 24% discount +``` + +### Pattern 4: State Comparison + +```python +states = ["CA", "NY", "TX", "FL"] +results = {} + +for state in states: + situation = create_situation(state=state, income=75000) + sim = Simulation(situation=situation) + results[state] = { + "state_income_tax": sim.calculate("state_income_tax", 2024)[0], + "total_tax": sim.calculate("household_tax", 2024)[0] + } +``` + +## Helper Scripts + +This skill includes helper scripts in the `scripts/` directory: + +```python +from policyengine_skills.situation_helpers import ( + create_single_filer, + create_married_couple, + create_family_with_children, + add_itemized_deductions +) + +# Quick situation creation +situation = create_single_filer( + income=50000, + state="CA", + age=30 +) + +# Add deductions +situation = add_itemized_deductions( + situation, + charitable_donations=5000, + mortgage_interest=10000, + real_estate_taxes=8000 +) +``` + +## Common Pitfalls and Solutions + +### Pitfall 1: Member Lists Out of Sync +**Problem:** Different entities have different members +```python +# WRONG +"tax_units": {"tax_unit": {"members": ["parent"]}}, +"households": {"household": {"members": ["parent", "child"]}} +``` + +**Solution:** Keep all entity member lists consistent: +```python +# CORRECT +all_members = ["parent", "child"] +"families": {"family": {"members": all_members}}, +"tax_units": {"tax_unit": {"members": all_members}}, +"households": {"household": {"members": all_members}} +``` + +### Pitfall 2: Forgetting Year Keys +**Problem:** `"age": 35` instead of `"age": {2024: 35}` + +**Solution:** Always use year dictionary: +```python +"age": {2024: 35}, +"employment_income": {2024: 50000} +``` + +### Pitfall 3: Net Taxes vs Gross Taxes +**Problem:** Forgetting to subtract benefits from taxes + +**Solution:** Use proper calculation: +```python +# Net taxes (what household actually pays) +net_tax = sim.calculate("household_tax", 2024) - \ + sim.calculate("household_benefits", 2024) +``` + +### Pitfall 4: Axes Persistence +**Problem:** Axes remain in situation when creating single-point simulation + +**Solution:** Remove axes before single-point simulation: +```python +situation_single = situation.copy() +situation_single.pop("axes", None) +``` + +### Pitfall 5: State-Specific Variables +**Problem:** Using NYC-specific variables without `in_nyc: True` + +**Solution:** Set NYC flag for NY residents in NYC: +```python +"households": { + "household": { + "state_name": {2024: "NY"}, + "in_nyc": {2024: True} # Required for NYC taxes + } +} +``` + +## NYC Handling + +For New York City residents: +```python +situation = { + # ... people setup ... + "households": { + "household": { + "members": ["person"], + "state_name": {2024: "NY"}, + "in_nyc": {2024: True} # Enable NYC tax calculations + } + } +} +``` + +## Version Compatibility + +- Always use `policyengine-us>=1.155.0` for 2024 calculations +- Check version: `import policyengine_us; print(policyengine_us.__version__)` +- Different years may require different package versions + +## Debugging Tips + +1. **Enable tracing:** + ```python + simulation.trace = True + result = simulation.calculate("variable_name", 2024) + ``` + +2. **Check intermediate calculations:** + ```python + agi = simulation.calculate("adjusted_gross_income", 2024) + taxable_income = simulation.calculate("taxable_income", 2024) + ``` + +3. **Verify situation structure:** + ```python + import json + print(json.dumps(situation, indent=2)) + ``` + +4. **Test with PolicyEngine web app:** + - Go to policyengine.org/us/household + - Enter same inputs + - Compare results + +## Additional Resources + +- **Documentation:** https://policyengine.org/us/docs +- **API Reference:** https://github.com/PolicyEngine/policyengine-us +- **Example Notebooks:** https://github.com/PolicyEngine/analysis-notebooks +- **Variable Explorer:** https://policyengine.org/us/variables + +## Examples Directory + +See `examples/` for complete working examples: +- `single_filer.yaml` - Single person household +- `married_couple.yaml` - Married filing jointly +- `family_with_children.yaml` - Family with dependents +- `itemized_deductions.yaml` - Using itemized deductions +- `donation_sweep.yaml` - Analyzing donation impacts with axes diff --git a/skills/domain-knowledge/policyengine-us-skill/examples/donation_sweep.yaml b/skills/domain-knowledge/policyengine-us-skill/examples/donation_sweep.yaml new file mode 100644 index 0000000..6829690 --- /dev/null +++ b/skills/domain-knowledge/policyengine-us-skill/examples/donation_sweep.yaml @@ -0,0 +1,71 @@ +# Example: Analyzing charitable donation impacts using axes +# Married couple with 2 children in New York +# Sweeps charitable donations from $0 to $50,000 + +people: + parent_1: + age: + 2024: 35 + employment_income: + 2024: 100000 + parent_2: + age: + 2024: 35 + employment_income: + 2024: 50000 + child_1: + age: + 2024: 8 + child_2: + age: + 2024: 5 + +families: + family: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + +marital_units: + marital_unit: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + +tax_units: + tax_unit: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + +spm_units: + spm_unit: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + +households: + household: + members: + - parent_1 + - parent_2 + - child_1 + - child_2 + state_name: + 2024: NY + +# Axes: Vary charitable donations from $0 to $50,000 +axes: + - - name: charitable_cash_donations + count: 1001 + min: 0 + max: 50000 + period: 2024 diff --git a/skills/domain-knowledge/policyengine-us-skill/examples/single_filer.yaml b/skills/domain-knowledge/policyengine-us-skill/examples/single_filer.yaml new file mode 100644 index 0000000..c51e04c --- /dev/null +++ b/skills/domain-knowledge/policyengine-us-skill/examples/single_filer.yaml @@ -0,0 +1,38 @@ +# Example: Single tax filer in California +# Income: $60,000, Age: 30, with charitable donations + +people: + person: + age: + 2024: 30 + employment_income: + 2024: 60000 + charitable_cash_donations: + 2024: 5000 + +families: + family: + members: + - person + +marital_units: + marital_unit: + members: + - person + +tax_units: + tax_unit: + members: + - person + +spm_units: + spm_unit: + members: + - person + +households: + household: + members: + - person + state_name: + 2024: CA diff --git a/skills/domain-knowledge/policyengine-us-skill/scripts/situation_helpers.py b/skills/domain-knowledge/policyengine-us-skill/scripts/situation_helpers.py new file mode 100644 index 0000000..1bac138 --- /dev/null +++ b/skills/domain-knowledge/policyengine-us-skill/scripts/situation_helpers.py @@ -0,0 +1,257 @@ +""" +Helper functions for creating PolicyEngine-US situations. + +These utilities simplify the creation of situation dictionaries +for common household configurations. +""" + +CURRENT_YEAR = 2024 + + +def create_single_filer(income, state="CA", age=35, **kwargs): + """ + Create a situation for a single tax filer. + + Args: + income (float): Employment income + state (str): Two-letter state code (e.g., "CA", "NY") + age (int): Person's age + **kwargs: Additional person attributes (e.g., self_employment_income) + + Returns: + dict: PolicyEngine situation dictionary + """ + person_attrs = { + "age": {CURRENT_YEAR: age}, + "employment_income": {CURRENT_YEAR: income}, + } + person_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": {"person": person_attrs}, + "families": {"family": {"members": ["person"]}}, + "marital_units": {"marital_unit": {"members": ["person"]}}, + "tax_units": {"tax_unit": {"members": ["person"]}}, + "spm_units": {"spm_unit": {"members": ["person"]}}, + "households": { + "household": { + "members": ["person"], + "state_name": {CURRENT_YEAR: state} + } + } + } + + +def create_married_couple( + income_1, income_2=0, state="CA", age_1=35, age_2=35, **kwargs +): + """ + Create a situation for a married couple filing jointly. + + Args: + income_1 (float): First spouse's employment income + income_2 (float): Second spouse's employment income + state (str): Two-letter state code + age_1 (int): First spouse's age + age_2 (int): Second spouse's age + **kwargs: Additional household attributes + + Returns: + dict: PolicyEngine situation dictionary + """ + members = ["spouse_1", "spouse_2"] + + household_attrs = { + "members": members, + "state_name": {CURRENT_YEAR: state} + } + household_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": { + "spouse_1": { + "age": {CURRENT_YEAR: age_1}, + "employment_income": {CURRENT_YEAR: income_1} + }, + "spouse_2": { + "age": {CURRENT_YEAR: age_2}, + "employment_income": {CURRENT_YEAR: income_2} + } + }, + "families": {"family": {"members": members}}, + "marital_units": {"marital_unit": {"members": members}}, + "tax_units": {"tax_unit": {"members": members}}, + "spm_units": {"spm_unit": {"members": members}}, + "households": {"household": household_attrs} + } + + +def create_family_with_children( + parent_income, + num_children=1, + child_ages=None, + state="CA", + parent_age=35, + married=False, + spouse_income=0, + **kwargs +): + """ + Create a situation for a family with children. + + Args: + parent_income (float): Primary parent's employment income + num_children (int): Number of children + child_ages (list): List of child ages (defaults to [5, 8, 12, ...]) + state (str): Two-letter state code + parent_age (int): Parent's age + married (bool): Whether parents are married + spouse_income (float): Spouse's income if married + **kwargs: Additional household attributes + + Returns: + dict: PolicyEngine situation dictionary + """ + if child_ages is None: + child_ages = [5 + i * 3 for i in range(num_children)] + elif len(child_ages) != num_children: + raise ValueError("Length of child_ages must match num_children") + + people = { + "parent": { + "age": {CURRENT_YEAR: parent_age}, + "employment_income": {CURRENT_YEAR: parent_income} + } + } + + members = ["parent"] + + if married: + people["spouse"] = { + "age": {CURRENT_YEAR: parent_age}, + "employment_income": {CURRENT_YEAR: spouse_income} + } + members.append("spouse") + + for i, age in enumerate(child_ages): + child_id = f"child_{i+1}" + people[child_id] = {"age": {CURRENT_YEAR: age}} + members.append(child_id) + + household_attrs = { + "members": members, + "state_name": {CURRENT_YEAR: state} + } + household_attrs.update({k: {CURRENT_YEAR: v} for k, v in kwargs.items()}) + + return { + "people": people, + "families": {"family": {"members": members}}, + "marital_units": { + "marital_unit": { + "members": members if married else ["parent"] + } + }, + "tax_units": {"tax_unit": {"members": members}}, + "spm_units": {"spm_unit": {"members": members}}, + "households": {"household": household_attrs} + } + + +def add_itemized_deductions( + situation, + charitable_donations=0, + mortgage_interest=0, + real_estate_taxes=0, + medical_expenses=0, + casualty_losses=0 +): + """ + Add itemized deductions to an existing situation. + + Adds deductions to the first person in the situation. + + Args: + situation (dict): Existing PolicyEngine situation + charitable_donations (float): Cash charitable contributions + mortgage_interest (float): Mortgage interest paid + real_estate_taxes (float): State and local property taxes + medical_expenses (float): Medical and dental expenses + casualty_losses (float): Casualty and theft losses + + Returns: + dict: Updated situation with deductions + """ + # Get first person ID + first_person = list(situation["people"].keys())[0] + + # Add deductions + if charitable_donations > 0: + situation["people"][first_person]["charitable_cash_donations"] = { + CURRENT_YEAR: charitable_donations + } + + if mortgage_interest > 0: + situation["people"][first_person]["mortgage_interest"] = { + CURRENT_YEAR: mortgage_interest + } + + if real_estate_taxes > 0: + situation["people"][first_person]["real_estate_taxes"] = { + CURRENT_YEAR: real_estate_taxes + } + + if medical_expenses > 0: + situation["people"][first_person]["medical_expense"] = { + CURRENT_YEAR: medical_expenses + } + + if casualty_losses > 0: + situation["people"][first_person]["casualty_loss"] = { + CURRENT_YEAR: casualty_losses + } + + return situation + + +def add_axes(situation, variable_name, min_val, max_val, count=1001): + """ + Add axes to a situation for parameter sweeps. + + Args: + situation (dict): Existing PolicyEngine situation + variable_name (str): Variable to vary (e.g., "employment_income") + min_val (float): Minimum value + max_val (float): Maximum value + count (int): Number of points (default: 1001) + + Returns: + dict: Updated situation with axes + """ + situation["axes"] = [[{ + "name": variable_name, + "count": count, + "min": min_val, + "max": max_val, + "period": CURRENT_YEAR + }]] + + return situation + + +def set_state_nyc(situation, in_nyc=True): + """ + Set state to NY and configure NYC residence. + + Args: + situation (dict): Existing PolicyEngine situation + in_nyc (bool): Whether household is in NYC + + Returns: + dict: Updated situation + """ + household_id = list(situation["households"].keys())[0] + situation["households"][household_id]["state_name"] = {CURRENT_YEAR: "NY"} + situation["households"][household_id]["in_nyc"] = {CURRENT_YEAR: in_nyc} + + return situation diff --git a/skills/technical-patterns/policyengine-aggregation-skill/SKILL.md b/skills/technical-patterns/policyengine-aggregation-skill/SKILL.md new file mode 100644 index 0000000..000c922 --- /dev/null +++ b/skills/technical-patterns/policyengine-aggregation-skill/SKILL.md @@ -0,0 +1,329 @@ +--- +name: policyengine-aggregation +description: PolicyEngine aggregation patterns - using adds attribute and add() function for summing variables across entities +--- + +# PolicyEngine Aggregation Patterns + +Essential patterns for summing variables across entities in PolicyEngine. + +## Quick Decision Guide + +``` +Is the variable ONLY a sum of other variables? +โ”‚ +โ”œโ”€ YES โ†’ Use `adds` attribute (NO formula needed!) +โ”‚ adds = ["var1", "var2"] +โ”‚ +โ””โ”€ NO โ†’ Use `add()` function in formula + (when you need max_, where, conditions, etc.) +``` + +## Quick Reference + +| Need | Use | Example | +|------|-----|---------| +| Simple sum | `adds` | `adds = ["var1", "var2"]` | +| Sum from parameters | `adds` | `adds = "gov.path.to.list"` | +| Sum + max_() | `add()` | `max_(0, add(...))` | +| Sum + where() | `add()` | `where(cond, add(...), 0)` | +| Sum + conditions | `add()` | `if cond: add(...)` | +| Count booleans | `adds` | `adds = ["is_eligible"]` | + +--- + +## 1. `adds` Class Attribute (Preferred When Possible) + +### When to Use +Use `adds` when a variable is **ONLY** the sum of other variables with **NO additional logic**. + +### Syntax +```python +class variable_name(Variable): + value_type = float + entity = Entity + definition_period = PERIOD + + # Option 1: List of variables + adds = ["variable1", "variable2", "variable3"] + + # Option 2: Parameter tree path + adds = "gov.path.to.parameter.list" +``` + +### Key Points +- โœ… No `formula()` method needed +- โœ… Automatically handles entity aggregation (person โ†’ household/tax_unit/spm_unit) +- โœ… Clean and declarative + +### Example: Simple Income Sum +```python +class tanf_gross_earned_income(Variable): + value_type = float + entity = SPMUnit + label = "TANF gross earned income" + unit = USD + definition_period = MONTH + + adds = ["employment_income", "self_employment_income"] + # NO formula needed! Automatically: + # 1. Gets each person's employment_income + # 2. Gets each person's self_employment_income + # 3. Sums all values across SPM unit members +``` + +### Example: Using Parameter List +```python +class income_tax_refundable_credits(Variable): + value_type = float + entity = TaxUnit + definition_period = YEAR + + adds = "gov.irs.credits.refundable" + # Parameter file contains list like: + # - earned_income_tax_credit + # - child_tax_credit + # - additional_child_tax_credit +``` + +### Example: Counting Boolean Values +```python +class count_eligible_people(Variable): + value_type = int + entity = SPMUnit + definition_period = YEAR + + adds = ["is_eligible_person"] + # Automatically sums True (1) and False (0) across members +``` + +--- + +## 2. `add()` Function (When Logic Needed) + +### When to Use +Use `add()` inside a `formula()` when you need: +- To apply `max_()`, `where()`, or conditions +- To combine with other operations +- To modify values before/after summing + +### Syntax +```python +from policyengine_us.model_api import * + +def formula(entity, period, parameters): + result = add(entity, period, variable_list) +``` + +**Parameters:** +- `entity`: The entity to operate on +- `period`: The time period for calculation +- `variable_list`: List of variable names or parameter path + +### Example: With max_() to Prevent Negatives +```python +class adjusted_earned_income(Variable): + value_type = float + entity = SPMUnit + definition_period = MONTH + + def formula(spm_unit, period, parameters): + # Need max_() to clip negative values + gross = add(spm_unit, period, ["employment_income", "self_employment_income"]) + return max_(0, gross) # Prevent negative income +``` + +### Example: With Additional Logic +```python +class household_benefits(Variable): + value_type = float + entity = Household + definition_period = YEAR + + def formula(household, period, parameters): + # Sum existing benefits + BENEFITS = ["snap", "tanf", "ssi", "social_security"] + existing = add(household, period, BENEFITS) + + # Add new benefit conditionally + new_benefit = household("special_benefit", period) + p = parameters(period).gov.special_benefit + + if p.include_in_total: + return existing + new_benefit + return existing +``` + +### Example: Building on Previous Variables +```python +class total_deductions(Variable): + value_type = float + entity = TaxUnit + definition_period = YEAR + + def formula(tax_unit, period, parameters): + p = parameters(period).gov.irs.deductions + + # Get standard deductions using parameter list + standard = add(tax_unit, period, p.standard_items) + + # Apply phase-out logic + income = tax_unit("adjusted_gross_income", period) + phase_out_rate = p.phase_out_rate + phase_out_start = p.phase_out_start + + reduction = max_(0, (income - phase_out_start) * phase_out_rate) + return max_(0, standard - reduction) +``` + +--- + +## 3. Common Anti-Patterns to Avoid + +### โŒ NEVER: Manual Summing +```python +# WRONG - Never do this! +def formula(spm_unit, period, parameters): + person = spm_unit.members + employment = person("employment_income", period) + self_emp = person("self_employment_income", period) + return spm_unit.sum(employment + self_emp) # โŒ BAD +``` + +### โœ… CORRECT: Use adds +```python +# RIGHT - Clean and simple +adds = ["employment_income", "self_employment_income"] # โœ… GOOD +``` + +### โŒ WRONG: Using add() When adds Suffices +```python +# WRONG - Unnecessary complexity +def formula(spm_unit, period, parameters): + return add(spm_unit, period, ["income1", "income2"]) # โŒ Overkill +``` + +### โœ… CORRECT: Use adds +```python +# RIGHT - Simpler +adds = ["income1", "income2"] # โœ… GOOD +``` + +--- + +## 4. Entity Aggregation Explained + +When using `adds` or `add()`, PolicyEngine automatically handles entity aggregation: + +```python +class household_total_income(Variable): + entity = Household # Higher-level entity + definition_period = YEAR + + adds = ["employment_income", "self_employment_income"] + # employment_income is defined for Person (lower-level) + # PolicyEngine automatically: + # 1. Gets employment_income for each person in household + # 2. Gets self_employment_income for each person + # 3. Sums all values to household level +``` + +This works across all entity hierarchies: +- Person โ†’ Tax Unit +- Person โ†’ SPM Unit +- Person โ†’ Household +- Tax Unit โ†’ Household +- SPM Unit โ†’ Household + +--- + +## 5. Parameter Lists + +Parameters can define lists of variables to sum: + +**Parameter file** (`gov/irs/credits/refundable.yaml`): +```yaml +description: List of refundable tax credits +values: + 2024-01-01: + - earned_income_tax_credit + - child_tax_credit + - additional_child_tax_credit +``` + +**Usage in variable**: +```python +adds = "gov.irs.credits.refundable" +# Automatically sums all credits in the list +``` + +--- + +## 6. Decision Matrix + +| Scenario | Solution | Code | +|----------|----------|------| +| Sum 2-3 variables | `adds` attribute | `adds = ["var1", "var2"]` | +| Sum many variables | Parameter list | `adds = "gov.path.list"` | +| Sum + prevent negatives | `add()` with `max_()` | `max_(0, add(...))` | +| Sum + conditional | `add()` with `where()` | `where(eligible, add(...), 0)` | +| Sum + phase-out | `add()` with calculation | `add(...) - reduction` | +| Count people/entities | `adds` with boolean | `adds = ["is_child"]` | + +--- + +## 7. Key Principles + +1. **Default to `adds` attribute** when variable is only a sum +2. **Use `add()` function** only when additional logic is needed +3. **Never manually sum** with `entity.sum(person(...) + person(...))` +4. **Let PolicyEngine handle** entity aggregation automatically +5. **Use parameter lists** for maintainable, configurable sums + +--- + +## Related Skills + +- **policyengine-period-patterns-skill**: For period conversion when summing across different time periods +- **policyengine-core-skill**: For understanding entity hierarchies and relationships + +--- + +## For Agents + +When implementing or reviewing code: + +1. **Check if `adds` can be used** before writing a formula +2. **Prefer declarative over imperative** when possible +3. **Follow existing patterns** in the codebase +4. **Test entity aggregation** carefully in YAML tests +5. **Document parameter lists** clearly for `adds` references + +--- + +## Common Use Cases + +### Earned Income +```python +adds = ["employment_income", "self_employment_income"] +``` + +### Unearned Income +```python +adds = ["interest_income", "dividend_income", "rental_income"] +``` + +### Total Benefits +```python +adds = ["snap", "tanf", "wic", "ssi", "social_security"] +``` + +### Tax Credits +```python +adds = "gov.irs.credits.refundable" +``` + +### Counting Children +```python +adds = ["is_child"] # Returns count of children +``` \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-code-style-skill/SKILL.md b/skills/technical-patterns/policyengine-code-style-skill/SKILL.md new file mode 100644 index 0000000..f952a0c --- /dev/null +++ b/skills/technical-patterns/policyengine-code-style-skill/SKILL.md @@ -0,0 +1,382 @@ +--- +name: policyengine-code-style +description: PolicyEngine code writing style guide - formula optimization, direct returns, eliminating unnecessary variables +--- + +# PolicyEngine Code Writing Style Guide + +Essential patterns for writing clean, efficient PolicyEngine formulas. + +## Core Principles + +1. **Eliminate unnecessary intermediate variables** +2. **Use direct parameter/variable access** +3. **Return directly when possible** +4. **Combine boolean logic** +5. **Use correct period access** (period vs period.this_year) +6. **NO hardcoded values** - use parameters or constants + +--- + +## Pattern 1: Direct Parameter Access + +### โŒ Bad - Unnecessary intermediate variable + +```python +def formula(spm_unit, period, parameters): + countable = spm_unit("tn_tanf_countable_resources", period) + p = parameters(period).gov.states.tn.dhs.tanf.resource_limit + resource_limit = p.amount # โŒ Unnecessary + return countable <= resource_limit +``` + +### โœ… Good - Direct access + +```python +def formula(spm_unit, period, parameters): + countable = spm_unit("tn_tanf_countable_resources", period) + p = parameters(period).gov.states.tn.dhs.tanf.resource_limit + return countable <= p.amount +``` + +--- + +## Pattern 2: Direct Return + +### โŒ Bad - Unnecessary result variable + +```python +def formula(spm_unit, period, parameters): + assets = spm_unit("spm_unit_assets", period.this_year) + p = parameters(period).gov.states.tn.dhs.tanf.resource_limit + vehicle_exemption = p.vehicle_exemption # โŒ Unnecessary + countable = max_(assets - vehicle_exemption, 0) # โŒ Unnecessary + return countable +``` + +### โœ… Good - Direct return + +```python +def formula(spm_unit, period, parameters): + assets = spm_unit("spm_unit_assets", period.this_year) + p = parameters(period).gov.states.tn.dhs.tanf.resource_limit + return max_(assets - p.vehicle_exemption, 0) +``` + +--- + +## Pattern 3: Combined Boolean Logic + +### โŒ Bad - Too many intermediate booleans + +```python +def formula(spm_unit, period, parameters): + person = spm_unit.members + age = person("age", period.this_year) + is_disabled = person("is_disabled", period.this_year) + + caretaker_is_60_or_older = spm_unit.any(age >= 60) # โŒ Unnecessary + caretaker_is_disabled = spm_unit.any(is_disabled) # โŒ Unnecessary + eligible = caretaker_is_60_or_older | caretaker_is_disabled # โŒ Unnecessary + + return eligible +``` + +### โœ… Good - Combined logic + +```python +def formula(spm_unit, period, parameters): + person = spm_unit.members + age = person("age", period.this_year) + is_disabled = person("is_disabled", period.this_year) + + return spm_unit.any((age >= 60) | is_disabled) +``` + +--- + +## Pattern 4: Period Access - period vs period.this_year + +### โŒ Bad - Wrong period access + +```python +def formula(person, period, parameters): + # MONTH formula accessing YEAR variables + age = person("age", period) # โŒ Gives age/12 = 2.5 "monthly age" + assets = person("assets", period) # โŒ Gives assets/12 + monthly_income = person("employment_income", period.this_year) / MONTHS_IN_YEAR # โŒ Redundant + + return (age >= 18) & (assets < 10000) & (monthly_income < 2000) +``` + +### โœ… Good - Correct period access + +```python +def formula(person, period, parameters): + # MONTH formula accessing YEAR variables + age = person("age", period.this_year) # โœ… Gets actual age (30) + assets = person("assets", period.this_year) # โœ… Gets actual assets ($10,000) + monthly_income = person("employment_income", period) # โœ… Auto-converts to monthly + + p = parameters(period).gov.program.eligibility + return (age >= p.age_min) & (age <= p.age_max) & + (assets < p.asset_limit) & (monthly_income < p.income_threshold) +``` + +**Rule:** +- Income/flows โ†’ Use `period` (want monthly from annual) +- Age/assets/counts/booleans โ†’ Use `period.this_year` (don't divide by 12) + +--- + +## Pattern 5: No Hardcoded Values + +### โŒ Bad - Hardcoded numbers + +```python +def formula(spm_unit, period, parameters): + size = spm_unit.nb_persons() + capped_size = min_(size, 10) # โŒ Hardcoded + + age = person("age", period.this_year) + income = person("income", period) / 12 # โŒ Use MONTHS_IN_YEAR + + # โŒ Hardcoded thresholds + if age >= 18 and age <= 65 and income < 2000: + return True +``` + +### โœ… Good - Parameterized + +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.program + capped_size = min_(spm_unit.nb_persons(), p.max_unit_size) # โœ… + + age = person("age", period.this_year) + monthly_income = person("income", period) # โœ… Auto-converts (no manual /12) + + age_eligible = (age >= p.age_min) & (age <= p.age_max) # โœ… + income_eligible = monthly_income < p.income_threshold # โœ… + + return age_eligible & income_eligible +``` + +--- + +## Pattern 6: Streamline Variable Access + +### โŒ Bad - Redundant steps + +```python +def formula(spm_unit, period, parameters): + unit_size = spm_unit.nb_persons() # โŒ Unnecessary + max_size = 10 # โŒ Hardcoded + capped_size = min_(unit_size, max_size) + + p = parameters(period).gov.states.tn.dhs.tanf.benefit + spa = p.standard_payment_amount[capped_size] # โŒ Unnecessary + dgpa = p.differential_grant_payment_amount[capped_size] # โŒ Unnecessary + + eligible = spm_unit("eligible_for_dgpa", period) + return where(eligible, dgpa, spa) +``` + +### โœ… Good - Streamlined + +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.tn.dhs.tanf.benefit + capped_size = min_(spm_unit.nb_persons(), p.max_unit_size) + eligible = spm_unit("eligible_for_dgpa", period) + + return where( + eligible, + p.differential_grant_payment_amount[capped_size], + p.standard_payment_amount[capped_size] + ) +``` + +--- + +## When to Keep Intermediate Variables + +### โœ… Keep when value is used multiple times + +```python +def formula(tax_unit, period, parameters): + p = parameters(period).gov.irs.credits + filing_status = tax_unit("filing_status", period) + + # โœ… Used multiple times - keep as variable + threshold = p.phase_out.start[filing_status] + + income = tax_unit("adjusted_gross_income", period) + excess = max_(0, income - threshold) + reduction = (excess / p.phase_out.width) * threshold + + return max_(0, threshold - reduction) +``` + +### โœ… Keep when calculation is complex + +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.program + gross_earned = spm_unit("gross_earned_income", period) + + # โœ… Complex multi-step calculation - break it down + work_expense_deduction = min_(gross_earned * p.work_expense_rate, p.work_expense_max) + after_work_expense = gross_earned - work_expense_deduction + + earned_disregard = after_work_expense * p.earned_disregard_rate + countable_earned = after_work_expense - earned_disregard + + dependent_care = spm_unit("dependent_care_expenses", period) + + return max_(0, countable_earned - dependent_care) +``` + +--- + +## Complete Example: Before vs After + +### โŒ Before - Multiple Issues + +```python +def formula(person, period, parameters): + # Wrong period access + age = person("age", period) # โŒ age/12 + assets = person("assets", period) # โŒ assets/12 + annual_income = person("employment_income", period.this_year) + monthly_income = annual_income / 12 # โŒ Use MONTHS_IN_YEAR + + # Hardcoded values + min_age = 18 # โŒ + max_age = 64 # โŒ + asset_limit = 10000 # โŒ + income_limit = 2000 # โŒ + + # Unnecessary intermediate variables + age_check = (age >= min_age) & (age <= max_age) + asset_check = assets <= asset_limit + income_check = monthly_income <= income_limit + eligible = age_check & asset_check & income_check + + return eligible +``` + +### โœ… After - Clean and Correct + +```python +def formula(person, period, parameters): + p = parameters(period).gov.program.eligibility + + # Correct period access + age = person("age", period.this_year) + assets = person("assets", period.this_year) + monthly_income = person("employment_income", period) + + # Direct return with combined logic + return ( + (age >= p.age_min) & (age <= p.age_max) & + (assets <= p.asset_limit) & + (monthly_income <= p.income_threshold) + ) +``` + +--- + +## Pattern 7: Minimal Comments + +### Code Should Be Self-Documenting + +**Variable names and structure should explain the code - not comments.** + +### โŒ Bad - Verbose explanatory comments + +```python +def formula(spm_unit, period, parameters): + # Wisconsin disregards all earned income of dependent children (< 18) + # Calculate earned income for adults only + is_adult = spm_unit.members("age", period.this_year) >= 18 # Hard-coded! + adult_earned = spm_unit.sum( + spm_unit.members("tanf_gross_earned_income", period) * is_adult + ) + + # All unearned income is counted (including children's) + gross_unearned = add(spm_unit, period, ["tanf_gross_unearned_income"]) + + # NOTE: Wisconsin disregards many additional income sources that + # are not separately tracked in PolicyEngine (educational aid, etc.) + return max_(total_income - disregards, 0) +``` + +### โœ… Good - Clean self-documenting code + +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.wi.dcf.tanf.income + + is_adult = spm_unit.members("age", period.this_year) >= p.adult_age_threshold + adult_earned = spm_unit.sum( + spm_unit.members("tanf_gross_earned_income", period) * is_adult + ) + gross_unearned = add(spm_unit, period, ["tanf_gross_unearned_income"]) + child_support = add(spm_unit, period, ["child_support_received"]) + + return max_(adult_earned + gross_unearned - child_support, 0) +``` + +### Comment Rules + +1. **NO comments explaining what code does** - variable names should be clear +2. **OK: Brief NOTE about PolicyEngine limitations** (one line): + ```python + # NOTE: Time limit cannot be tracked in PolicyEngine + ``` +3. **NO multi-line explanations** of what the code calculates + +--- + +## Quick Checklist + +Before finalizing code: +- [ ] No hardcoded numbers (use parameters or constants like MONTHS_IN_YEAR) +- [ ] Correct period access: + - Income/flows use `period` + - Age/assets/counts/booleans use `period.this_year` +- [ ] No single-use intermediate variables +- [ ] Direct parameter access (`p.amount` not `amount = p.amount`) +- [ ] Direct returns when possible +- [ ] Combined boolean logic when possible +- [ ] Minimal comments (code should be self-documenting) + +--- + +## Key Takeaways + +1. **Less is more** - Eliminate unnecessary variables +2. **Direct is better** - Access parameters and return directly +3. **Combine when logical** - Group related boolean conditions +4. **Keep when needed** - Complex calculations and reused values deserve variables +5. **Period matters** - Use correct period access to avoid auto-conversion bugs + +--- + +## Related Skills + +- **policyengine-period-patterns-skill** - Deep dive on period handling +- **policyengine-implementation-patterns-skill** - Variable structure and patterns +- **policyengine-vectorization-skill** - NumPy operations and vectorization + +--- + +## For Agents + +When writing or reviewing formulas: +1. **Scan for single-use variables** - eliminate them +2. **Check period access** - ensure correct for variable type +3. **Look for hardcoded values** - parameterize them +4. **Identify redundant steps** - streamline them +5. **Consider readability** - keep complex calculations clear \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-implementation-patterns-skill/SKILL.md b/skills/technical-patterns/policyengine-implementation-patterns-skill/SKILL.md new file mode 100644 index 0000000..359a9c7 --- /dev/null +++ b/skills/technical-patterns/policyengine-implementation-patterns-skill/SKILL.md @@ -0,0 +1,739 @@ +--- +name: policyengine-implementation-patterns +description: PolicyEngine implementation patterns - variable creation, no hard-coding principle, federal/state separation, metadata standards +--- + +# PolicyEngine Implementation Patterns + +Essential patterns for implementing government benefit program rules in PolicyEngine. + +## PolicyEngine Architecture Constraints + +### What CANNOT Be Simulated (Single-Period Limitation) + +**CRITICAL: PolicyEngine uses single-period simulation architecture** + +The following CANNOT be implemented and should be SKIPPED when found in documentation: + +#### 1. Time Limits and Lifetime Counters +**Cannot simulate:** +- ANY lifetime benefit limits (X months total) +- ANY time windows (X months within Y period) +- Benefit clocks and countable months +- Cumulative time tracking + +**Why:** Requires tracking benefit history across multiple periods. PolicyEngine simulates one period at a time with no state persistence. + +**What to do:** Document in comments but DON'T parameterize or implement: +```python +# NOTE: [State] has [X]-month lifetime limit on [Program] benefits +# This cannot be simulated in PolicyEngine's single-period architecture +``` + +#### 2. Work History Requirements +**Cannot simulate:** +- "Must have worked 6 of last 12 months" +- "Averaged 30 hours/week over past quarter" +- Prior employment verification +- Work participation rate tracking + +**Why:** Requires historical data from previous periods. + +#### 3. Waiting Periods and Benefit Delays +**Cannot simulate:** +- "3-month waiting period for new residents" +- "Benefits start month after application" +- Retroactive eligibility +- Benefit recertification cycles + +**Why:** Requires tracking application dates and eligibility history. + +#### 4. Progressive Sanctions and Penalties +**Cannot simulate:** +- "First violation: 1-month sanction, Second: 3-month, Third: permanent" +- Graduated penalties +- Strike systems + +**Why:** Requires tracking violation history. + +#### 5. Asset Spend-Down Over Time +**Cannot simulate:** +- Medical spend-down across months +- Resource depletion tracking +- Accumulated medical expenses + +**Why:** Requires tracking expenses and resources across periods. + +### What CAN Be Simulated (With Caveats) + +PolicyEngine CAN simulate point-in-time eligibility and benefits: +- โœ… Current month income limits +- โœ… Current month resource limits +- โœ… Current benefit calculations +- โœ… Current household composition +- โœ… Current deductions and disregards + +### Time-Limited Benefits That Affect Current Calculations + +**Special Case: Time-limited deductions/disregards** + +When a deduction or disregard is only available for X months: +- **DO implement the deduction** (assume it applies) +- **DO add a comment** explaining the time limitation +- **DON'T try to track or enforce the time limit** + +Example: +```python +class state_tanf_countable_earned_income(Variable): + def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.xx.tanf.income + earned = spm_unit("tanf_gross_earned_income", period) + + # NOTE: In reality, this 75% disregard only applies for first 4 months + # of employment. PolicyEngine cannot track employment duration, so we + # apply the disregard assuming the household qualifies. + # Actual rule: [State Code Citation] + disregard_rate = p.earned_income_disregard_rate # 0.75 + + return earned * (1 - disregard_rate) +``` + +**Rule: If it requires history or future tracking, it CANNOT be fully simulated - but implement what we can and document limitations** + +--- + +## Critical Principles + +### 1. ZERO Hard-Coded Values +**Every numeric value MUST be parameterized** + +```python +โŒ FORBIDDEN: +return where(eligible, 1000, 0) # Hard-coded 1000 +age < 15 # Hard-coded 15 +benefit = income * 0.33 # Hard-coded 0.33 +month >= 10 and month <= 3 # Hard-coded months + +โœ… REQUIRED: +return where(eligible, p.maximum_benefit, 0) +age < p.age_threshold.minor_child +benefit = income * p.benefit_rate +month >= p.season.start_month +``` + +**Acceptable literals:** +- `0`, `1`, `-1` for basic math +- `12` for month conversion (`/ 12`, `* 12`) +- Array indices when structure is known + +### 2. No Placeholder Implementations +**Delete the file rather than leave placeholders** + +```python +โŒ NEVER: +def formula(entity, period, parameters): + # TODO: Implement + return 75 # Placeholder + +โœ… ALWAYS: +# Complete implementation or no file at all +``` + +--- + +## Variable Implementation Standards + +### Variable Metadata Format + +Follow established patterns: +```python +class il_tanf_countable_earned_income(Variable): + value_type = float + entity = SPMUnit + definition_period = MONTH + label = "Illinois TANF countable earned income" + unit = USD + reference = "https://www.law.cornell.edu/regulations/illinois/..." + defined_for = StateCode.IL + + # Use adds for simple sums + adds = ["il_tanf_earned_income_after_disregard"] +``` + +**Key rules:** +- โœ… Use full URL in `reference` (clickable) +- โŒ Don't use `documentation` field +- โŒ Don't use statute citations without URLs + +### When to Use `adds` vs `formula` + +**Use `adds` when:** +- Just summing variables +- Passing through a single variable +- No transformations needed + +```python +โœ… BEST - Simple sum: +class tanf_gross_income(Variable): + adds = ["employment_income", "self_employment_income"] +``` + +**Use `formula` when:** +- Applying transformations +- Conditional logic +- Calculations needed + +```python +โœ… CORRECT - Need logic: +def formula(entity, period, parameters): + income = add(entity, period, ["income1", "income2"]) + return max_(0, income) # Need max_ +``` + +--- + +## TANF Countable Income Pattern + +### Critical: Verify Calculation Order from Legal Code + +**MOST IMPORTANT:** Always check the state's legal code or policy manual for the exact calculation order. The pattern below is typical but not universal. + +**The Typical Pattern:** +1. Apply deductions/disregards to **earned income only** +2. Use `max_()` to prevent negative earned income +3. Add unearned income (which typically has no deductions) + +**This pattern is based on how MOST TANF programs work, but you MUST verify with the specific state's legal code.** + +### โŒ WRONG - Applying deductions to total income + +```python +def formula(spm_unit, period, parameters): + gross_earned = spm_unit("tanf_gross_earned_income", period) + unearned = spm_unit("tanf_gross_unearned_income", period) + deductions = spm_unit("tanf_earned_income_deductions", period) + + # โŒ WRONG: Deductions applied to total income + total_income = gross_earned + unearned + countable = total_income - deductions + + return max_(countable, 0) +``` + +**Why this is wrong:** +- Deductions should ONLY reduce earned income +- Unearned income (SSI, child support, etc.) is not subject to work expense deductions +- This incorrectly reduces unearned income when earned income is low + +**Example error:** +- Earned: $100, Unearned: $500, Deductions: $200 +- Wrong result: `max_($100 + $500 - $200, 0) = $400` (reduces unearned!) +- Correct result: `max_($100 - $200, 0) + $500 = $500` + +### โœ… CORRECT - Apply deductions to earned only, then add unearned + +```python +def formula(spm_unit, period, parameters): + gross_earned = spm_unit("tanf_gross_earned_income", period) + unearned = spm_unit("tanf_gross_unearned_income", period) + deductions = spm_unit("tanf_earned_income_deductions", period) + + # โœ… CORRECT: Deductions applied to earned only, then add unearned + return max_(gross_earned - deductions, 0) + unearned +``` + +### Pattern Variations + +**With multiple deduction steps:** +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.xx.tanf.income + gross_earned = spm_unit("tanf_gross_earned_income", period) + unearned = spm_unit("tanf_gross_unearned_income", period) + + # Step 1: Apply work expense deduction + work_expense = min_(gross_earned * p.work_expense_rate, p.work_expense_max) + after_work_expense = max_(gross_earned - work_expense, 0) + + # Step 2: Apply earnings disregard + earnings_disregard = after_work_expense * p.disregard_rate + countable_earned = max_(after_work_expense - earnings_disregard, 0) + + # Step 3: Add unearned (no deductions applied) + return countable_earned + unearned +``` + +**With disregard percentage (simplified):** +```python +def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.xx.tanf.income + gross_earned = spm_unit("tanf_gross_earned_income", period) + unearned = spm_unit("tanf_gross_unearned_income", period) + + # Apply disregard to earned (keep 33% = disregard 67%) + countable_earned = gross_earned * (1 - p.earned_disregard_rate) + + return max_(countable_earned, 0) + unearned +``` + +### When Unearned Income HAS Deductions + +Some states DO have unearned income deductions (rare). Handle separately: + +```python +def formula(spm_unit, period, parameters): + gross_earned = spm_unit("tanf_gross_earned_income", period) + gross_unearned = spm_unit("tanf_gross_unearned_income", period) + earned_deductions = spm_unit("tanf_earned_income_deductions", period) + unearned_deductions = spm_unit("tanf_unearned_income_deductions", period) + + # Apply each type of deduction to its respective income type + countable_earned = max_(gross_earned - earned_deductions, 0) + countable_unearned = max_(gross_unearned - unearned_deductions, 0) + + return countable_earned + countable_unearned +``` + +### Quick Reference + +**Standard TANF pattern:** +``` +Countable Income = max_(Earned - Earned Deductions, 0) + Unearned +``` + +**NOT:** +``` +โŒ max_(Earned + Unearned - Deductions, 0) +โŒ max_(Earned - Deductions + Unearned, 0) # Can go negative +``` + +--- + +## Federal/State Separation + +### Federal Parameters +Location: `/parameters/gov/{agency}/` +- Base formulas and methodologies +- National standards +- Required elements + +### State Parameters +Location: `/parameters/gov/states/{state}/` +- State-specific thresholds +- Implementation choices +- Scale factors + +```yaml +# Federal: parameters/gov/hhs/fpg/base.yaml +first_person: 14_580 + +# State: parameters/gov/states/ca/scale_factor.yaml +fpg_multiplier: 2.0 # 200% of FPG +``` + +--- + +## Code Reuse Patterns + +### Avoid Duplication - Create Intermediate Variables + +**โŒ ANTI-PATTERN: Copy-pasting calculations** +```python +# File 1: calculates income after deduction +def formula(household, period, parameters): + gross = add(household, period, ["income"]) + deduction = p.deduction * household.nb_persons() + return max_(gross - deduction, 0) + +# File 2: DUPLICATES same calculation +def formula(household, period, parameters): + gross = add(household, period, ["income"]) # Copy-pasted + deduction = p.deduction * household.nb_persons() # Copy-pasted + after_deduction = max_(gross - deduction, 0) # Copy-pasted + return after_deduction < p.threshold +``` + +**โœ… CORRECT: Reuse existing variables** +```python +# File 2: reuses calculation +def formula(household, period, parameters): + countable_income = household("program_countable_income", period) + return countable_income < p.threshold +``` + +**When to create intermediate variables:** +- Same calculation in 2+ places +- Logic exceeds 5 lines +- Reference implementations have similar variable + +--- + +## TANF-Specific Patterns + +### Study Reference Implementations First + +**MANDATORY before implementing any TANF:** +- DC TANF: `/variables/gov/states/dc/dhs/tanf/` +- IL TANF: `/variables/gov/states/il/dhs/tanf/` +- TX TANF: `/variables/gov/states/tx/hhs/tanf/` + +**Learn from them:** +1. Variable organization +2. Naming conventions +3. Code reuse patterns +4. When to use `adds` vs `formula` + +### Standard TANF Structure +``` +tanf/ +โ”œโ”€โ”€ eligibility/ +โ”‚ โ”œโ”€โ”€ demographic_eligible.py +โ”‚ โ”œโ”€โ”€ income_eligible.py +โ”‚ โ””โ”€โ”€ eligible.py +โ”œโ”€โ”€ income/ +โ”‚ โ”œโ”€โ”€ earned/ +โ”‚ โ”œโ”€โ”€ unearned/ +โ”‚ โ””โ”€โ”€ countable_income.py +โ””โ”€โ”€ [state]_tanf.py +``` + +### Simplified TANF Rules + +For simplified implementations: + +**DON'T create state-specific versions of:** +- Demographic eligibility (use federal) +- Immigration eligibility (use federal) +- Income sources (use federal baseline) + +```python +โŒ DON'T CREATE: +ca_tanf_demographic_eligible_person.py +ca_tanf_gross_earned_income.py +parameters/.../income/sources/earned.yaml + +โœ… DO USE: +# Federal demographic eligibility +is_demographic_tanf_eligible +# Federal income aggregation +tanf_gross_earned_income +``` + +### Avoiding Unnecessary Wrapper Variables (CRITICAL) + +**Golden Rule: Only create a state variable if you're adding state-specific logic to it!** + +#### Understand WHY Variables Exist, Not Just WHAT + +When studying reference implementations: +1. **Note which variables they have** +2. **READ THE CODE inside each variable** +3. **Ask: "Does this variable have state-specific logic?"** +4. **If it just returns federal baseline โ†’ DON'T copy it** + +#### Variable Creation Decision Tree + +Before creating ANY state-specific variable, ask: +1. Does federal baseline already calculate this? +2. Does my state do it DIFFERENTLY than federal? +3. Can I write the difference in 1+ lines of state-specific logic? +4. **Will this calculation be used in 2+ other variables?** (Code reuse exception) + +**Decision:** +- If YES/NO/NO/NO โ†’ **DON'T create the variable**, use federal directly +- If YES/YES/YES/NO โ†’ **CREATE the variable** with state logic +- If YES/NO/NO/YES โ†’ **CREATE as intermediate variable** for code reuse (see exception below) + +#### EXCEPTION: Code Reuse Justifies Intermediate Variables + +**Even without state-specific logic, create a variable if the SAME calculation is used in multiple places.** + +โŒ **Bad - Duplicating calculation across variables:** +```python +# Variable 1 - Income eligibility +class mo_tanf_income_eligible(Variable): + def formula(spm_unit, period, parameters): + # Duplicated calculation + gross = add(spm_unit, period, ["tanf_gross_earned_income", "tanf_gross_unearned_income"]) + return gross <= p.income_limit + +# Variable 2 - Countable income +class mo_tanf_countable_income(Variable): + def formula(spm_unit, period, parameters): + # SAME calculation repeated! + gross = add(spm_unit, period, ["tanf_gross_earned_income", "tanf_gross_unearned_income"]) + deductions = spm_unit("mo_tanf_deductions", period) + return max_(gross - deductions, 0) + +# Variable 3 - Need standard +class mo_tanf_need_standard(Variable): + def formula(spm_unit, period, parameters): + # SAME calculation AGAIN! + gross = add(spm_unit, period, ["tanf_gross_earned_income", "tanf_gross_unearned_income"]) + return where(gross < p.threshold, p.high, p.low) +``` + +โœ… **Good - Extract into reusable intermediate variable:** +```python +# Intermediate variable - used in multiple places +class mo_tanf_gross_income(Variable): + adds = ["tanf_gross_earned_income", "tanf_gross_unearned_income"] + +# Variable 1 - Reuses intermediate +class mo_tanf_income_eligible(Variable): + def formula(spm_unit, period, parameters): + gross = spm_unit("mo_tanf_gross_income", period) # Reuse + return gross <= p.income_limit + +# Variable 2 - Reuses intermediate +class mo_tanf_countable_income(Variable): + def formula(spm_unit, period, parameters): + gross = spm_unit("mo_tanf_gross_income", period) # Reuse + deductions = spm_unit("mo_tanf_deductions", period) + return max_(gross - deductions, 0) + +# Variable 3 - Reuses intermediate +class mo_tanf_need_standard(Variable): + def formula(spm_unit, period, parameters): + gross = spm_unit("mo_tanf_gross_income", period) # Reuse + return where(gross < p.threshold, p.high, p.low) +``` + +**When to create intermediate variables for reuse:** +- โœ… Same calculation appears in 2+ variables +- โœ… Represents a meaningful concept (e.g., "gross income", "net resources") +- โœ… Simplifies maintenance (change once vs many places) +- โœ… Follows DRY (Don't Repeat Yourself) principle + +**When NOT to create (still a wrapper):** +- โŒ Only used in ONE place +- โŒ Just passes through another variable unchanged +- โŒ Adds indirection without code reuse benefit + +#### Red Flags for Unnecessary Wrapper Variables + +```python +โŒ INVALID - Pure wrapper, no state logic: +class in_tanf_assistance_unit_size(Variable): + def formula(spm_unit, period): + return spm_unit("spm_unit_size", period) # Just returns federal + +โŒ INVALID - Aggregation without transformation: +class in_tanf_countable_unearned_income(Variable): + def formula(tax_unit, period): + return tax_unit.sum(person("tanf_gross_unearned_income", period)) + +โŒ INVALID - Pass-through with no modification: +class in_tanf_gross_income(Variable): + def formula(entity, period): + return entity("tanf_gross_income", period) +``` + +#### Examples of VALID State Variables + +```python +โœ… VALID - Has state-specific disregard: +class in_tanf_countable_earned_income(Variable): + def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.in.tanf.income + earned = spm_unit("tanf_gross_earned_income", period) + return earned * (1 - p.earned_income_disregard_rate) # STATE LOGIC + +โœ… VALID - Uses state-specific limits: +class in_tanf_income_eligible(Variable): + def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.in.tanf + income = spm_unit("tanf_countable_income", period) + size = spm_unit("spm_unit_size", period.this_year) + limit = p.income_limit[min_(size, p.max_household_size)] # STATE PARAMS + return income <= limit + +โœ… VALID - IL has different counting rules: +class il_tanf_assistance_unit_size(Variable): + adds = [ + "il_tanf_payment_eligible_child", # STATE-SPECIFIC + "il_tanf_payment_eligible_parent", # STATE-SPECIFIC + ] +``` + +#### State Variables to AVOID Creating + +For TANF implementations: + +**โŒ DON'T create these (use federal directly):** +- `state_tanf_assistance_unit_size` (unless different counting rules like IL) +- `state_tanf_countable_unearned_income` (unless state has disregards) +- `state_tanf_gross_income` (just use federal baseline) +- Any variable that's just `return entity("federal_variable", period)` + +**โœ… DO create these (when state has unique rules):** +- `state_tanf_countable_earned_income` (if unique disregard %) +- `state_tanf_income_eligible` (state income limits) +- `state_tanf_maximum_benefit` (state payment standards) +- `state_tanf` (final benefit calculation) + +### Demographic Eligibility Pattern + +**Option 1: Use Federal (Simplified)** +```python +class ca_tanf_eligible(Variable): + def formula(spm_unit, period, parameters): + # Use federal variable + has_eligible = spm_unit.any( + spm_unit.members("is_demographic_tanf_eligible", period) + ) + return has_eligible & income_eligible +``` + +**Option 2: State-Specific (Different thresholds)** +```python +class ca_tanf_demographic_eligible_person(Variable): + def formula(person, period, parameters): + p = parameters(period).gov.states.ca.tanf + age = person("age", period.this_year) # NOT monthly_age + + age_limit = where( + person("is_full_time_student", period), + p.age_threshold.student, + p.age_threshold.minor_child + ) + return age < age_limit +``` + +--- + +## Common Implementation Patterns + +### Income Eligibility +```python +class program_income_eligible(Variable): + value_type = bool + entity = SPMUnit + definition_period = MONTH + + def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.xx.program + income = spm_unit("program_countable_income", period) + size = spm_unit("spm_unit_size", period.this_year) + + # Get threshold from parameters + threshold = p.income_limit[min_(size, p.max_household_size)] + return income <= threshold +``` + +### Benefit Calculation +```python +class program_benefit(Variable): + value_type = float + entity = SPMUnit + definition_period = MONTH + unit = USD + + def formula(spm_unit, period, parameters): + p = parameters(period).gov.states.xx.program + eligible = spm_unit("program_eligible", period) + + # Calculate benefit amount + base = p.benefit_schedule.base_amount + adjustment = p.benefit_schedule.adjustment_rate + size = spm_unit("spm_unit_size", period.this_year) + + amount = base + (size - 1) * adjustment + return where(eligible, amount, 0) +``` + +### Using Scale Parameters +```python +def formula(entity, period, parameters): + p = parameters(period).gov.states.az.program + federal_p = parameters(period).gov.hhs.fpg + + # Federal base with state scale + size = entity("household_size", period.this_year) + fpg = federal_p.first_person + federal_p.additional * (size - 1) + state_scale = p.income_limit_scale # Often exists + income_limit = fpg * state_scale +``` + +--- + +## Variable Creation Checklist + +Before creating any variable: +- [ ] Check if it already exists +- [ ] Use standard demographic variables (age, is_disabled) +- [ ] Reuse federal calculations where applicable +- [ ] Check for household_income before creating new +- [ ] Look for existing intermediate variables +- [ ] Study reference implementations + +--- + +## Quality Standards + +### Complete Implementation Requirements +- All values from parameters (no hard-coding) +- Complete formula logic +- Proper entity aggregation +- Correct period handling +- Meaningful variable names +- Proper metadata + +### Anti-Patterns to Avoid +- Copy-pasting logic between files +- Hard-coding any numeric values +- Creating duplicate income variables +- State-specific versions of federal rules +- Placeholder TODOs in production code + +--- + +## Parameter-to-Variable Mapping Requirements + +### Every Parameter Must Have a Variable + +**CRITICAL: Complete implementation means every parameter is used!** + +When you create parameters, you MUST create corresponding variables: + +| Parameter Type | Required Variable(s) | +|---------------|---------------------| +| resources/limit | `state_program_resource_eligible` | +| income/limit | `state_program_income_eligible` | +| payment_standard | `state_program_maximum_benefit` | +| income/disregard | `state_program_countable_earned_income` | +| categorical/requirements | `state_program_categorically_eligible` | + +### Complete Eligibility Formula + +The main eligibility variable MUST combine ALL checks: + +```python +class state_program_eligible(Variable): + def formula(spm_unit, period, parameters): + income_eligible = spm_unit("state_program_income_eligible", period) + resource_eligible = spm_unit("state_program_resource_eligible", period) # DON'T FORGET! + categorical = spm_unit("state_program_categorically_eligible", period) + + return income_eligible & resource_eligible & categorical +``` + +**Common Implementation Failures:** +- โŒ Created resource limit parameter but no resource_eligible variable +- โŒ Main eligible variable only checks income, ignores resources +- โŒ Parameters created but never referenced in any formula + +--- + +## For Agents + +When implementing variables: +1. **Study reference implementations** (DC, IL, TX TANF) +2. **Never hard-code values** - use parameters +3. **Map every parameter to a variable** - no orphaned parameters +4. **Complete ALL eligibility checks** - income AND resources AND categorical +5. **Reuse existing variables** - avoid duplication +6. **Use `adds` when possible** - cleaner than formula +7. **Create intermediate variables** for complex logic +8. **Follow metadata standards** exactly +9. **Complete implementation** or delete the file \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-parameter-patterns-skill/SKILL.md b/skills/technical-patterns/policyengine-parameter-patterns-skill/SKILL.md new file mode 100644 index 0000000..60a44c7 --- /dev/null +++ b/skills/technical-patterns/policyengine-parameter-patterns-skill/SKILL.md @@ -0,0 +1,440 @@ +--- +name: policyengine-parameter-patterns +description: PolicyEngine parameter patterns - YAML structure, naming conventions, metadata requirements, federal/state separation +--- + +# PolicyEngine Parameter Patterns + +Comprehensive patterns for creating PolicyEngine parameter files. + +## Critical: Required Structure + +Every parameter MUST have this exact structure: +```yaml +description: [One sentence description]. +values: + YYYY-MM-DD: value + +metadata: + unit: [type] # REQUIRED + period: [period] # REQUIRED + label: [name] # REQUIRED + reference: # REQUIRED + - title: [source] + href: [url] +``` + +**Missing ANY metadata field = validation error** + +--- + +## 1. File Naming Conventions + +### Study Reference Implementations First +Before naming, examine: +- DC TANF: `/parameters/gov/states/dc/dhs/tanf/` +- IL TANF: `/parameters/gov/states/il/dhs/tanf/` +- TX TANF: `/parameters/gov/states/tx/hhs/tanf/` + +### Naming Patterns + +**Dollar amounts โ†’ `/amount.yaml`** +``` +income/deductions/work_expense/amount.yaml # $120 +resources/limit/amount.yaml # $6,000 +payment_standard/amount.yaml # $320 +``` + +**Percentages/rates โ†’ `/rate.yaml` or `/percentage.yaml`** +``` +income_limit/rate.yaml # 1.85 (185% FPL) +benefit_reduction/rate.yaml # 0.2 (20%) +income/disregard/percentage.yaml # 0.67 (67%) +``` + +**Thresholds โ†’ `/threshold.yaml`** +``` +age_threshold/minor_child.yaml # 18 +age_threshold/elderly.yaml # 60 +income/threshold.yaml # 30_000 +``` + +--- + +## 2. Description Field + +### The ONLY Acceptable Formula + +```yaml +description: [State] [verb] [category] to [this X] under the [Full Program Name] program. +``` + +**Components:** +1. **[State]**: Full state name (Indiana, Texas, California) +2. **[verb]**: ONLY use: limits, provides, sets, excludes, deducts, uses +3. **[category]**: What's being limited/provided (gross income, resources, payment standard) +4. **[this X]**: ALWAYS use generic placeholder + - `this amount` (for currency-USD) + - `this share` or `this percentage` (for rates/percentages) + - `this threshold` (for age/counts) +5. **[Full Program Name]**: ALWAYS spell out (Temporary Assistance for Needy Families, NOT TANF) + +### Copy These Exact Templates + +**For income limits:** +```yaml +description: [State] limits gross income to this amount under the Temporary Assistance for Needy Families program. +``` + +**For resource limits:** +```yaml +description: [State] limits resources to this amount under the Temporary Assistance for Needy Families program. +``` + +**For payment standards:** +```yaml +description: [State] provides this amount as the payment standard under the Temporary Assistance for Needy Families program. +``` + +**For disregards:** +```yaml +description: [State] excludes this share of earnings from countable income under the Temporary Assistance for Needy Families program. +``` + +### Description Validation Checklist + +Run this check on EVERY description: +```python +# Pseudo-code validation +def validate_description(desc): + checks = [ + desc.count('.') == 1, # Exactly one sentence + 'TANF' not in desc, # No acronyms + 'SNAP' not in desc, # No acronyms + 'this amount' in desc or 'this share' in desc or 'this percentage' in desc, + 'under the' in desc and 'program' in desc, + 'by household size' not in desc, # No explanatory text + 'based on' not in desc, # No explanatory text + 'for eligibility' not in desc, # Redundant + ] + return all(checks) +``` + +**CRITICAL: Always spell out full program names in descriptions!** + +--- + +## 3. Values Section + +### Format Rules +```yaml +values: + 2024-01-01: 3_000 # Use underscores + # NOT: 3000 + + 2024-01-01: 0.2 # Remove trailing zeros + # NOT: 0.20 or 0.200 + + 2024-01-01: 2 # No decimals for integers + # NOT: 2.0 or 2.00 +``` + +### Effective Dates + +**Use exact dates from sources:** +```yaml +# If source says "effective July 1, 2023" +2023-07-01: value + +# If source says "as of October 1" +2024-10-01: value + +# NOT arbitrary dates: +2000-01-01: value # Shows no research +``` + +**Date format:** `YYYY-MM-01` (always use 01 for day) + +--- + +## 4. Metadata Fields (ALL REQUIRED) + +### unit +Common units: +- `currency-USD` - Dollar amounts +- `/1` - Rates, percentages (as decimals) +- `month` - Number of months +- `year` - Age in years +- `bool` - True/false +- `person` - Count of people + +### period +- `year` - Annual values +- `month` - Monthly values +- `day` - Daily values +- `eternity` - Never changes + +### label +Pattern: `[State] [PROGRAM] [description]` +```yaml +label: Montana TANF minor child age threshold +label: Illinois TANF earned income disregard rate +label: California SNAP resource limit +``` +**Rules:** +- Spell out state name +- Abbreviate program (TANF, SNAP) +- No period at end + +### reference +**Requirements:** +1. At least one source (prefer two) +2. Must contain the actual value +3. Legal codes need subsections +4. PDFs need page anchors + +```yaml +โœ… GOOD: +reference: + - title: Idaho Admin Code 16.05.03.205(3) + href: https://adminrules.idaho.gov/rules/current/16/160503.pdf#page=14 + - title: Idaho LIHEAP Guidelines, Section 3, page 8 + href: https://healthandwelfare.idaho.gov/guidelines.pdf#page=8 + +โŒ BAD: +reference: + - title: Federal LIHEAP regulations # Too generic + href: https://www.acf.hhs.gov/ocs # No specific section +``` + +--- + +## 5. Federal/State Separation + +### Federal Parameters +Location: `/parameters/gov/{agency}/{program}/` +```yaml +# parameters/gov/hhs/fpg/first_person.yaml +description: HHS sets this amount as the federal poverty guideline for one person. +``` + +### State Parameters +Location: `/parameters/gov/states/{state}/{agency}/{program}/` +```yaml +# parameters/gov/states/ca/dss/tanf/income_limit/rate.yaml +description: California uses this multiplier of the federal poverty guideline for TANF income eligibility. +``` + +--- + +## 6. Common Parameter Patterns + +### Income Limits (as FPL multiplier) +```yaml +# income_limit/rate.yaml +description: State uses this multiplier of the federal poverty guideline for program income limits. +values: + 2024-01-01: 1.85 # 185% FPL + +metadata: + unit: /1 + period: year + label: State PROGRAM income limit multiplier +``` + +### Benefit Amounts +```yaml +# payment_standard/amount.yaml +description: State provides this amount as the monthly program benefit. +values: + 2024-01-01: 500 + +metadata: + unit: currency-USD + period: month + label: State PROGRAM payment standard amount +``` + +### Age Thresholds +```yaml +# age_threshold/minor_child.yaml +description: State defines minor children as under this age for program eligibility. +values: + 2024-01-01: 18 + +metadata: + unit: year + period: eternity + label: State PROGRAM minor child age threshold +``` + +### Disregard Percentages +```yaml +# income/disregard/percentage.yaml +description: State excludes this share of earned income from program calculations. +values: + 2024-01-01: 0.67 # 67% + +metadata: + unit: /1 + period: eternity + label: State PROGRAM earned income disregard percentage +``` + +--- + +## 7. Validation Checklist + +Before creating parameters: +- [ ] Studied reference implementations (DC, IL, TX) +- [ ] All four metadata fields present +- [ ] Description is one complete sentence +- [ ] Values use underscore separators +- [ ] Trailing zeros removed from decimals +- [ ] References include subsections and page numbers +- [ ] Label follows naming pattern +- [ ] Effective date matches source document + +--- + +## 8. Common Mistakes to Avoid + +### Missing Metadata +```yaml +โŒ WRONG - Missing required fields: +metadata: + unit: currency-USD + label: Benefit amount + # Missing: period, reference +``` + +### Generic References +```yaml +โŒ WRONG: +reference: + - title: State TANF Manual + href: https://state.gov/tanf + +โœ… CORRECT: +reference: + - title: State TANF Manual Section 5.2, page 15 + href: https://state.gov/tanf-manual.pdf#page=15 +``` + +### Arbitrary Dates +```yaml +โŒ WRONG: +values: + 2000-01-01: 500 # Lazy default + +โœ… CORRECT: +values: + 2023-07-01: 500 # From source: "effective July 1, 2023" +``` + +--- + +## Real-World Examples from Production Code + +**CRITICAL: Study actual parameter files, not just examples!** + +Before writing ANY parameter: +1. Open and READ 3+ similar parameter files from TX/IL/DC +2. COPY their exact description pattern +3. Replace state name and specific details only + +### Payment Standards +```yaml +# Texas (actual production) +description: Texas provides this amount as the payment standard under the Temporary Assistance for Needy Families program. + +# Pennsylvania (actual production) +description: Pennsylvania limits TANF benefits to households with resources at or below this amount. +``` + +### Income Limits +```yaml +# Indiana (should be) +description: Indiana limits gross income to this amount under the Temporary Assistance for Needy Families program. + +# Texas (actual production) +description: Texas limits countable resources to this amount under the Temporary Assistance for Needy Families program. +``` + +### Disregards +```yaml +# Indiana (should be) +description: Indiana excludes this share of earnings from countable income under the Temporary Assistance for Needy Families program. + +# Texas (actual production) +description: Texas deducts this standard work expense amount from gross earned income for Temporary Assistance for Needy Families program calculations. +``` + +### Pattern Analysis +- **ALWAYS** spell out full program name +- Use "under the [Program] program" or "for [Program] program calculations" +- One simple verb (limits, provides, excludes, deducts) +- One "this X" placeholder +- NO extra explanation ("based on X", "This is Y") + +### Common Description Mistakes to AVOID + +**โŒ WRONG - Using acronyms:** +```yaml +description: Indiana sets this gross income limit for TANF eligibility by household size. +# Problems: "TANF" not spelled out, unnecessary "by household size" +``` + +**โœ… CORRECT:** +```yaml +description: Indiana limits gross income to this amount under the Temporary Assistance for Needy Families program. +``` + +**โŒ WRONG - Adding explanatory text:** +```yaml +description: Indiana provides this payment standard amount based on household size. +# Problem: "based on household size" is unnecessary (evident from breakdown) +``` + +**โœ… CORRECT:** +```yaml +description: Indiana provides this amount as the payment standard under the Temporary Assistance for Needy Families program. +``` + +**โŒ WRONG - Missing program context:** +```yaml +description: Indiana sets the gross income limit. +# Problem: No program name, no "this amount" +``` + +**โœ… CORRECT:** +```yaml +description: Indiana limits gross income to this amount under the Temporary Assistance for Needy Families program. +``` + +### Authoritative Source Requirements + +**ONLY use official government sources:** +- โœ… State codes and administrative regulations +- โœ… Official state agency websites (.gov domains) +- โœ… Federal regulations (CFR, USC) +- โœ… State plans and official manuals (.gov PDFs) + +**NEVER use:** +- โŒ Third-party guides (singlemotherguide.com, benefits.gov descriptions) +- โŒ Wikipedia +- โŒ Nonprofit summaries (unless no official source exists) +- โŒ News articles + +--- + +## For Agents + +When creating parameters: +1. **READ ACTUAL FILES** - Study TX/IL/DC parameter files, not just skill examples +2. **Include ALL metadata fields** - missing any causes errors +3. **Use exact effective dates** from sources +4. **Follow naming conventions** (amount/rate/threshold) +5. **Write simple descriptions** with "this" placeholders and full program names +6. **Include ONLY official government references** with subsections and pages +7. **Format values properly** (underscores, no trailing zeros) \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-period-patterns-skill/SKILL.md b/skills/technical-patterns/policyengine-period-patterns-skill/SKILL.md new file mode 100644 index 0000000..9296480 --- /dev/null +++ b/skills/technical-patterns/policyengine-period-patterns-skill/SKILL.md @@ -0,0 +1,478 @@ +--- +name: policyengine-period-patterns +description: PolicyEngine period handling - converting between YEAR, MONTH definition periods and testing patterns +--- + +# PolicyEngine Period Patterns + +Essential patterns for handling different definition periods (YEAR, MONTH) in PolicyEngine. + +## Quick Reference + +| From | To | Method | Example | +|------|-----|--------|---------| +| MONTH formula | YEAR variable | `period.this_year` | `age = person("age", period.this_year)` | +| YEAR formula | MONTH variable | `period.first_month` | `person("monthly_rent", period.first_month)` | +| Any | Year integer | `period.start.year` | `year = period.start.year` | +| Any | Month integer | `period.start.month` | `month = period.start.month` | +| Annual โ†’ Monthly | Divide by 12 | `/ MONTHS_IN_YEAR` | `monthly = annual / 12` | +| Monthly โ†’ Annual | Multiply by 12 | `* MONTHS_IN_YEAR` | `annual = monthly * 12` | + +--- + +## 1. Definition Periods in PolicyEngine US + +### Available Periods +- **YEAR**: Annual values (most common - 2,883 variables) +- **MONTH**: Monthly values (395 variables) +- **ETERNITY**: Never changes (1 variable - structural relationships) + +**Note:** QUARTER is NOT used in PolicyEngine US + +### Examples +```python +from policyengine_us.model_api import * + +class annual_income(Variable): + definition_period = YEAR # Annual amount + +class monthly_benefit(Variable): + definition_period = MONTH # Monthly amount + +class is_head(Variable): + definition_period = ETERNITY # Never changes +``` + +--- + +## 2. The Golden Rule + +**When accessing a variable with a different definition period than your formula, you must specify the target period explicitly.** + +```python +# โœ… CORRECT - MONTH formula accessing YEAR variable +def formula(person, period, parameters): + age = person("age", period.this_year) # Gets actual age + +# โŒ WRONG - Would get age/12 +def formula(person, period, parameters): + age = person("age", period) # BAD: gives age divided by 12! +``` + +--- + +## 3. Common Patterns + +### Pattern 1: MONTH Formula Accessing YEAR Variable + +**Use Case**: Monthly benefits need annual demographic data + +```python +class monthly_benefit_eligible(Variable): + value_type = bool + entity = Person + definition_period = MONTH # Monthly eligibility + + def formula(person, period, parameters): + # Age is YEAR-defined, use period.this_year + age = person("age", period.this_year) # โœ… Gets full age + + # is_pregnant is MONTH-defined, just use period + is_pregnant = person("is_pregnant", period) # โœ… Same period + + return (age < 18) | is_pregnant +``` + +### Pattern 2: Accessing Stock Variables (Assets) + +**Stock variables** (point-in-time values like assets) are typically YEAR-defined + +```python +class tanf_countable_resources(Variable): + value_type = float + entity = SPMUnit + definition_period = MONTH # Monthly check + + def formula(spm_unit, period, parameters): + # Assets are stocks (YEAR-defined) + cash = spm_unit("cash_assets", period.this_year) # โœ… + vehicles = spm_unit("vehicles_value", period.this_year) # โœ… + + p = parameters(period).gov.tanf.resources + return cash + max_(0, vehicles - p.vehicle_exemption) +``` + +--- + +## 4. Understanding Auto-Conversion: When to Use `period` vs `period.this_year` + +### The Key Question + +**When accessing a YEAR variable from a MONTH formula, should the value be divided by 12?** + +- **If YES** โ†’ Use `period` (let auto-conversion happen) +- **If NO** โ†’ Use `period.this_year` (prevent auto-conversion) + +### When Auto-Conversion Makes Sense (Use `period`) + +**Flow variables** where you want the monthly portion: + +```python +class monthly_benefit(Variable): + definition_period = MONTH + + def formula(person, period, parameters): + # โœ… Use period - want $2,000/month from $24,000/year + monthly_income = person("employment_income", period) + + # Compare to monthly threshold + p = parameters(period).gov.program + return monthly_income < p.monthly_threshold +``` + +Why: If annual income is $24,000, you want $2,000/month for monthly eligibility checks. + +### When Auto-Conversion Breaks Things (Use `period.this_year`) + +**Stock variables and counts** where division by 12 is nonsensical: + +**1. Age** +```python +# โŒ WRONG - gives age/12 +age = person("age", period) # 30 years โ†’ 2.5 "monthly age" ??? + +# โœ… CORRECT - gives actual age +age = person("age", period.this_year) # 30 years +``` + +**2. Assets/Resources (Stocks)** +```python +# โŒ WRONG - gives assets/12 +assets = spm_unit("spm_unit_assets", period) # $12,000 โ†’ $1,000 ??? + +# โœ… CORRECT - gives point-in-time value +assets = spm_unit("spm_unit_assets", period.this_year) # $12,000 +``` + +**3. Counts (Household Size, Number of Children)** +```python +# โŒ WRONG - gives count/12 +size = spm_unit("household_size", period) # 4 people โ†’ 0.33 people ??? + +# โœ… CORRECT - gives actual count +size = spm_unit("household_size", period.this_year) # 4 people +``` + +**4. Boolean/Enum Variables** +```python +# โŒ WRONG - weird fractional conversion +status = person("is_disabled", period) + +# โœ… CORRECT - actual status +status = person("is_disabled", period.this_year) +``` + +### Decision Tree + +``` +Accessing YEAR variable from MONTH formula? +โ”‚ +โ”œโ”€ Is it an INCOME or FLOW variable? +โ”‚ โ””โ”€ YES โ†’ Use period (auto-convert to monthly) โœ… +โ”‚ Example: employment_income, self_employment_income +โ”‚ +โ””โ”€ Is it AGE, ASSET, COUNT, or BOOLEAN? + โ””โ”€ YES โ†’ Use period.this_year (prevent conversion) โœ… + Examples: age, assets, household_size, is_disabled +``` + +### Complete Example + +```python +class monthly_tanf_eligible(Variable): + value_type = bool + entity = Person + definition_period = MONTH + + def formula(person, period, parameters): + # Age: Use period.this_year (don't want age/12) + age = person("age", period.this_year) # โœ… + + # Assets: Use period.this_year (don't want assets/12) + assets = person("assets", period.this_year) # โœ… + + # Income: Use period (DO want monthly income from annual) + monthly_income = person("employment_income", period) # โœ… + + p = parameters(period).gov.tanf.eligibility + + age_eligible = (age >= 18) & (age <= 64) + asset_eligible = assets <= p.asset_limit + income_eligible = monthly_income <= p.monthly_income_limit + + return age_eligible & asset_eligible & income_eligible +``` + +### Quick Reference for Auto-Conversion + +| Variable Type | Use `period` | Use `period.this_year` | Why | +|--------------|-------------|----------------------|-----| +| Income (flow) | โœ… | โŒ | Want monthly portion | +| Age | โŒ | โœ… | Age/12 is meaningless | +| Assets/Resources (stock) | โŒ | โœ… | Point-in-time value | +| Household size/counts | โŒ | โœ… | Can't divide people | +| Boolean/status flags | โŒ | โœ… | True/12 is nonsense | +| Demographic attributes | โŒ | โœ… | Properties don't divide | + +**Rule of thumb:** If dividing by 12 makes the value meaningless โ†’ use `period.this_year` + +### Pattern 3: Converting Annual to Monthly + +```python +class monthly_income_limit(Variable): + definition_period = MONTH + + def formula(household, period, parameters): + # Get annual parameter + annual_limit = parameters(period).gov.program.annual_limit + + # Convert to monthly + monthly_limit = annual_limit / MONTHS_IN_YEAR # โœ… + + return monthly_limit +``` + +### Pattern 4: Getting Period Components + +```python +class federal_poverty_guideline(Variable): + definition_period = MONTH + + def formula(entity, period, parameters): + # Get year and month as integers + year = period.start.year # e.g., 2024 + month = period.start.month # e.g., 1-12 + + # FPG updates October 1st + if month >= 10: + instant_str = f"{year}-10-01" + else: + instant_str = f"{year - 1}-10-01" + + # Access parameters at specific date + p_fpg = parameters(instant_str).gov.hhs.fpg + return p_fpg.first_person / MONTHS_IN_YEAR +``` + +--- + +## 5. Parameter Access + +### Standard Access +```python +def formula(entity, period, parameters): + # Parameters use current period + p = parameters(period).gov.program.benefit + return p.amount +``` + +### Specific Date Access +```python +def formula(entity, period, parameters): + # Access parameters at specific instant + p = parameters("2024-10-01").gov.hhs.fpg + return p.amount +``` + +**Important**: Never use `parameters(period.this_year)` - parameters always use the formula's period + +--- + +## 6. Testing with Different Periods + +### Critical Testing Rules + +**For MONTH period tests** (`period: 2025-01`): +- **Input** YEAR variables as **annual amounts** +- **Output** YEAR variables show **monthly values** (รท12) + +### Test Examples + +**Example 1: Basic MONTH Test** +```yaml +- name: Monthly income test + period: 2025-01 # MONTH period + input: + people: + person1: + employment_income: 12_000 # Input: Annual + output: + employment_income: 1_000 # Output: Monthly (12_000/12) +``` + +**Example 2: Mixed Variables** +```yaml +- name: Eligibility with age and income + period: 2024-01 # MONTH period + input: + age: 30 # Age doesn't convert + employment_income: 24_000 # Annual input + output: + age: 30 # Age stays same + employment_income: 2_000 # Monthly output + monthly_eligible: true +``` + +**Example 3: YEAR Period Test** +```yaml +- name: Annual calculation + period: 2024 # YEAR period + input: + employment_income: 18_000 # Annual + output: + employment_income: 18_000 # Annual output + annual_tax: 2_000 +``` + +### Testing Best Practices + +1. **Always specify period explicitly** +2. **Input YEAR variables as annual amounts** +3. **Expect monthly output for YEAR variables in MONTH tests** +4. **Use underscore separators**: `12_000` not `12000` +5. **Add calculation comments** in integration tests + +--- + +## 7. Common Mistakes and Solutions + +### โŒ Mistake 1: Not Using period.this_year +```python +# WRONG - From MONTH formula +def formula(person, period, parameters): + age = person("age", period) # Gets age/12! + +# CORRECT +def formula(person, period, parameters): + age = person("age", period.this_year) # Gets actual age +``` + +### โŒ Mistake 2: Mixing Annual and Monthly +```python +# WRONG - Comparing different units +monthly_income = person("monthly_income", period) +annual_limit = parameters(period).gov.limit +if monthly_income < annual_limit: # BAD comparison + +# CORRECT - Convert to same units +monthly_income = person("monthly_income", period) +annual_limit = parameters(period).gov.limit +monthly_limit = annual_limit / MONTHS_IN_YEAR +if monthly_income < monthly_limit: # Good comparison +``` + +### โŒ Mistake 3: Wrong Test Expectations +```yaml +# WRONG - Expecting annual in MONTH test +period: 2024-01 +input: + employment_income: 12_000 +output: + employment_income: 12_000 # Wrong! + +# CORRECT +period: 2024-01 +input: + employment_income: 12_000 # Annual input +output: + employment_income: 1_000 # Monthly output +``` + +--- + +## 8. Quick Patterns Cheat Sheet + +### Accessing Variables +| Your Formula | Target Variable | Use | +|--------------|-----------------|-----| +| MONTH | YEAR | `period.this_year` | +| YEAR | MONTH | `period.first_month` | +| Any | ETERNITY | `period` | + +### Common Variables That Need period.this_year +- `age` +- `household_size`, `spm_unit_size` +- `cash_assets`, `vehicles_value` +- `state_name`, `state_code` +- Any demographic variable + +### Period Conversion +```python +# Annual to monthly +monthly = annual / MONTHS_IN_YEAR + +# Monthly to annual +annual = monthly * MONTHS_IN_YEAR + +# Get year/month numbers +year = period.start.year # 2024 +month = period.start.month # 1-12 +``` + +--- + +## 9. Real-World Example + +```python +class tanf_income_eligible(Variable): + value_type = bool + entity = SPMUnit + definition_period = MONTH # Monthly eligibility + + def formula(spm_unit, period, parameters): + # YEAR variables need period.this_year + household_size = spm_unit("spm_unit_size", period.this_year) + state = spm_unit.household("state_code", period.this_year) + + # MONTH variables use period + gross_income = spm_unit("tanf_gross_income", period) + + # Parameters use period + p = parameters(period).gov.states[state].tanf + + # Convert annual limit to monthly + annual_limit = p.income_limit[household_size] + monthly_limit = annual_limit / MONTHS_IN_YEAR + + return gross_income <= monthly_limit +``` + +--- + +## 10. Checklist for Period Handling + +When writing a formula: + +- [ ] Identify your formula's `definition_period` +- [ ] Check `definition_period` of accessed variables +- [ ] Use `period.this_year` for YEAR variables from MONTH formulas +- [ ] Use `period` for parameters (not `period.this_year`) +- [ ] Convert units when comparing (annual โ†” monthly) +- [ ] Test with appropriate period values + +--- + +## Related Skills + +- **policyengine-aggregation-skill**: For summing across entities with period handling +- **policyengine-core-skill**: For understanding variable and parameter systems + +--- + +## For Agents + +1. **Always check definition_period** before accessing variables +2. **Default to period.this_year** for demographic/stock variables from MONTH formulas +3. **Test thoroughly** - period mismatches cause subtle bugs +4. **Document period conversions** in comments +5. **Follow existing patterns** in similar variables \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-review-patterns-skill/SKILL.md b/skills/technical-patterns/policyengine-review-patterns-skill/SKILL.md new file mode 100644 index 0000000..d875594 --- /dev/null +++ b/skills/technical-patterns/policyengine-review-patterns-skill/SKILL.md @@ -0,0 +1,376 @@ +--- +name: policyengine-review-patterns +description: PolicyEngine code review patterns - validation checklist, common issues, review standards +--- + +# PolicyEngine Review Patterns + +Comprehensive patterns for reviewing PolicyEngine implementations. + +## Understanding WHY, Not Just WHAT + +### Pattern Analysis Before Review + +When reviewing implementations that reference other states: + +**๐Ÿ”ด CRITICAL: Check WHY Variables Exist** + +Before approving any state-specific variable, verify: +1. **Does it have state-specific logic?** - Read the formula +2. **Are state parameters used?** - Check for `parameters(period).gov.states.XX` +3. **Is there transformation beyond aggregation?** - Look for calculations +4. **Would removing it break functionality?** - Test dependencies + +**Example Analysis:** +```python +# IL TANF has this variable: +class il_tanf_assistance_unit_size(Variable): + adds = ["il_tanf_payment_eligible_child", "il_tanf_payment_eligible_parent"] + # โœ… VALID: IL-specific eligibility rules + +# But IN TANF shouldn't copy it blindly: +class in_tanf_assistance_unit_size(Variable): + def formula(spm_unit, period): + return spm_unit("spm_unit_size", period) + # โŒ INVALID: No IN-specific logic, just wrapper +``` + +### Wrapper Variable Detection + +**Red Flags - Variables that shouldn't exist:** +- Formula is just `return entity("federal_variable", period)` +- Aggregates federal baseline with no transformation +- No state parameters accessed +- Comment says "use federal" but creates variable anyway + +**Action:** Request deletion of unnecessary wrapper variables + +--- + +## Priority Review Checklist + +### ๐Ÿ”ด CRITICAL - Automatic Failures + +These issues will cause crashes or incorrect results: + +#### 1. Vectorization Violations +```python +โŒ FAILS: +if household("income") > 1000: # Will crash with arrays + return 500 + +โœ… PASSES: +return where(household("income") > 1000, 500, 100) +``` + +#### 2. Hard-Coded Values +```python +โŒ FAILS: +benefit = min_(income * 0.33, 500) # Hard-coded 0.33 and 500 + +โœ… PASSES: +benefit = min_(income * p.rate, p.maximum) +``` + +#### 3. Missing Parameter Sources +```yaml +โŒ FAILS: +reference: + - title: State website + href: https://state.gov + +โœ… PASSES: +reference: + - title: Idaho Admin Code 16.05.03.205(3) + href: https://adminrules.idaho.gov/rules/current/16/160503.pdf#page=14 +``` + +--- + +### ๐ŸŸก MAJOR - Must Fix + +These affect accuracy or maintainability: + +#### 4. Test Quality Issues +```yaml +โŒ FAILS: +income: 50000 # No separator + +โœ… PASSES: +income: 50_000 # Proper formatting +``` + +#### 5. Calculation Accuracy +- Order of operations matches regulations +- Deductions applied in correct sequence +- Edge cases handled (negatives, zeros) + +#### 6. Description Style +```yaml +โŒ FAILS: +description: The amount of SNAP benefits # Passive voice + +โœ… PASSES: +description: SNAP benefits # Active voice +``` + +--- + +### ๐ŸŸข MINOR - Should Fix + +These improve code quality: + +#### 7. Code Organization +- One variable per file +- Proper use of `defined_for` +- Use of `adds` for simple sums + +#### 8. Documentation +- Clear references to regulation sections +- Changelog entry present + +--- + +## Common Issues Reference + +### Documentation Issues + +| Issue | Example | Fix | +|-------|---------|-----| +| No primary source | "See SNAP website" | Add USC/CFR citation | +| Wrong value | $198 vs $200 in source | Update parameter | +| Generic link | dol.gov | Link to specific regulation | +| Missing subsection | "7 CFR 273" | "7 CFR 273.9(d)(3)" | + +### Code Issues + +| Issue | Impact | Fix | +|-------|--------|-----| +| if-elif-else with data | Crashes microsim | Use where/select | +| Hard-coded values | Inflexible | Move to parameters | +| Missing defined_for | Inefficient | Add eligibility condition | +| Manual summing | Wrong pattern | Use adds attribute | + +### Test Issues + +| Issue | Example | Fix | +|-------|---------|-----| +| No separators | 100000 | 100_000 | +| No documentation | output: 500 | Add calculation comment | +| Wrong period | 2024-04 | Use 2024-01 or 2024 | +| Made-up variables | heating_expense | Use existing variables | + +--- + +## Source Verification Process + +### Step 1: Check Parameter Values + +For each parameter file: +```python +โœ“ Value matches source document +โœ“ Source is primary (statute > regulation > website) +โœ“ URL links to exact section with page anchor +โœ“ Effective dates correct +``` + +### Step 2: Validate References + +**Primary sources (preferred):** +- USC (United States Code) +- CFR (Code of Federal Regulations) +- State statutes +- State admin codes + +**Secondary sources (acceptable):** +- Official policy manuals +- State plan documents + +**Not acceptable alone:** +- Websites without specific sections +- Summaries or fact sheets +- News articles + +--- + +## Code Quality Checks + +### Vectorization Scan + +Search for these patterns: +```python +# Red flags that indicate scalar logic: +"if household" +"if person" +"elif" +"else:" +"and " (should be &) +"or " (should be |) +"not " (should be ~) +``` + +### Hard-Coding Scan + +Search for numeric literals: +```python +# Check for any number except: +# 0, 1, -1 (basic math) +# 12 (month conversion) +# Small indices (2, 3 for known structures) + +# Flag anything like: +"0.5" +"100" +"0.33" +"65" (unless it's a standard age) +``` + +--- + +## Review Response Templates + +### For Approval + +```markdown +## PolicyEngine Review: APPROVED โœ… + +### Verification Summary +- โœ… All parameters trace to primary sources +- โœ… Code is properly vectorized +- โœ… Tests document calculations +- โœ… No hard-coded values + +### Strengths +- Excellent USC/CFR citations +- Comprehensive test coverage +- Clear calculation logic + +### Minor Suggestions (optional) +- Consider adding edge case for zero income +``` + +### For Changes Required + +```markdown +## PolicyEngine Review: CHANGES REQUIRED โŒ + +### Critical Issues (Must Fix) + +1. **Non-vectorized code** - lines 45-50 + ```python + # Replace this: + if income > threshold: + benefit = high_amount + + # With this: + benefit = where(income > threshold, high_amount, low_amount) + ``` + +2. **Parameter value mismatch** - standard_deduction.yaml + - Source shows $200, parameter has $198 + - Reference: 7 CFR 273.9(d)(1), page 5 + +### Major Issues (Should Fix) + +3. **Missing primary source** - income_limit.yaml + - Add statute/regulation citation + - Current website link insufficient + +Please address these issues and re-request review. +``` + +--- + +## Test Validation + +### Check Test Structure + +```yaml +# Verify proper format: +- name: Case 1, description. # Numbered case with period + period: 2024-01 # Valid period (2024-01 or 2024) + input: + people: + person1: # Generic names + employment_income: 50_000 # Underscores + output: + # Calculation documented + # Income: $50,000/year = $4,167/month + program_benefit: 250 +``` + +### Run Test Commands + +```bash +# Unit tests +pytest policyengine_us/tests/policy/baseline/gov/ + +# Integration tests +policyengine-core test -c policyengine_us + +# Microsimulation +pytest policyengine_us/tests/microsimulation/ +``` + +--- + +## Review Priorities by Context + +### New Program Implementation +1. Parameter completeness +2. All documented scenarios tested +3. Eligibility paths covered +4. No hard-coded values + +### Bug Fixes +1. Root cause addressed +2. No regression potential +3. Tests prevent recurrence +4. Vectorization maintained + +### Refactoring +1. Functionality preserved +2. Tests still pass +3. Performance maintained +4. Code clarity improved + +--- + +## Quick Review Checklist + +**Parameters:** +- [ ] Values match sources +- [ ] References include subsections +- [ ] All metadata fields present +- [ ] Effective dates correct + +**Variables:** +- [ ] Properly vectorized (no if-elif-else) +- [ ] No hard-coded values +- [ ] Uses existing variables +- [ ] Includes proper metadata + +**Tests:** +- [ ] Proper period format +- [ ] Underscore separators +- [ ] Calculation comments +- [ ] Realistic scenarios + +**Overall:** +- [ ] Changelog entry +- [ ] Code formatted +- [ ] Tests pass +- [ ] Documentation complete + +--- + +## For Agents + +When reviewing code: +1. **Check vectorization first** - crashes are worst +2. **Verify parameter sources** - accuracy critical +3. **Scan for hard-coding** - maintainability issue +4. **Validate test quality** - ensures correctness +5. **Run all tests** - catch integration issues +6. **Document issues clearly** - help fixes +7. **Provide fix examples** - speed resolution \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-testing-patterns-skill/SKILL.md b/skills/technical-patterns/policyengine-testing-patterns-skill/SKILL.md new file mode 100644 index 0000000..dbe52a2 --- /dev/null +++ b/skills/technical-patterns/policyengine-testing-patterns-skill/SKILL.md @@ -0,0 +1,412 @@ +--- +name: policyengine-testing-patterns +description: PolicyEngine testing patterns - YAML test structure, naming conventions, period handling, and quality standards +--- + +# PolicyEngine Testing Patterns + +Comprehensive patterns and standards for creating PolicyEngine tests. + +## Quick Reference + +### File Structure +``` +policyengine_us/tests/policy/baseline/gov/states/[state]/[agency]/[program]/ +โ”œโ”€โ”€ [variable_name].yaml # Unit test for specific variable +โ”œโ”€โ”€ [another_variable].yaml # Another unit test +โ””โ”€โ”€ integration.yaml # Integration test (NEVER prefixed) +``` + +### Period Restrictions +- โœ… `2024-01` - First month only +- โœ… `2024` - Whole year +- โŒ `2024-04` - Other months NOT supported +- โŒ `2024-01-01` - Full dates NOT supported + +### Naming Convention +- Files: `variable_name.yaml` (matches variable exactly) +- Integration: Always `integration.yaml` (never prefixed) +- Cases: `Case 1, description.` (numbered, comma, period) +- People: `person1`, `person2` (never descriptive names) + +--- + +## 1. Test File Organization + +### File Naming Rules + +**Unit tests** - Named after the variable they test: +``` +โœ… CORRECT: +az_liheap_eligible.yaml # Tests az_liheap_eligible variable +az_liheap_benefit.yaml # Tests az_liheap_benefit variable + +โŒ WRONG: +test_az_liheap.yaml # Wrong prefix +liheap_tests.yaml # Wrong pattern +``` + +**Integration tests** - Always named `integration.yaml`: +``` +โœ… CORRECT: +integration.yaml # Standard name + +โŒ WRONG: +az_liheap_integration.yaml # Never prefix integration +program_integration.yaml # Never prefix integration +``` + +### Folder Structure + +Follow state/agency/program hierarchy: +``` +gov/ +โ””โ”€โ”€ states/ + โ””โ”€โ”€ [state_code]/ + โ””โ”€โ”€ [agency]/ + โ””โ”€โ”€ [program]/ + โ”œโ”€โ”€ eligibility/ + โ”‚ โ””โ”€โ”€ income_eligible.yaml + โ”œโ”€โ”€ income/ + โ”‚ โ””โ”€โ”€ countable_income.yaml + โ””โ”€โ”€ integration.yaml +``` + +--- + +## 2. Period Format Restrictions + +### Critical: Only Two Formats Supported + +PolicyEngine test system ONLY supports: +- `2024-01` - First month of year +- `2024` - Whole year + +**Never use:** +- `2024-04` - April (will fail) +- `2024-10` - October (will fail) +- `2024-01-01` - Full date (will fail) + +### Handling Mid-Year Policy Changes + +If policy changes April 1, 2024: +```yaml +# Option 1: Test with first month +period: 2024-01 # Tests January with new policy + +# Option 2: Test next year +period: 2025-01 # When policy definitely active +``` + +--- + +## 3. Test Naming Conventions + +### Case Names + +Use numbered cases with descriptions: +```yaml +โœ… CORRECT: +- name: Case 1, single parent with one child. +- name: Case 2, two parents with two children. +- name: Case 3, income at threshold. + +โŒ WRONG: +- name: Single parent test +- name: Test case for family +- name: Case 1 - single parent # Wrong punctuation +``` + +### Person Names + +Use generic sequential names: +```yaml +โœ… CORRECT: +people: + person1: + age: 30 + person2: + age: 10 + person3: + age: 8 + +โŒ WRONG: +people: + parent: + age: 30 + child1: + age: 10 +``` + +### Output Format + +Use simplified format without entity key: +```yaml +โœ… CORRECT: +output: + tx_tanf_eligible: true + tx_tanf_benefit: 250 + +โŒ WRONG: +output: + tx_tanf_eligible: + spm_unit: true # Don't nest under entity +``` + +--- + +## 4. Which Variables Need Tests + +### Variables That DON'T Need Tests + +Skip tests for simple composition variables using only `adds` or `subtracts`: +```python +# NO TEST NEEDED - just summing +class tx_tanf_countable_income(Variable): + adds = ["earned_income", "unearned_income"] + +# NO TEST NEEDED - simple arithmetic +class net_income(Variable): + adds = ["gross_income"] + subtracts = ["deductions"] +``` + +### Variables That NEED Tests + +Create tests for variables with: +- Conditional logic (`where`, `select`, `if`) +- Calculations/transformations +- Business logic +- Deductions/disregards +- Eligibility determinations + +```python +# NEEDS TEST - has logic +class tx_tanf_income_eligible(Variable): + def formula(spm_unit, period, parameters): + return where(enrolled, passes_test, other_test) +``` + +--- + +## 5. Period Conversion in Tests + +### Critical Rule for MONTH Tests + +When `period: 2025-01`: +- **Input**: YEAR variables as annual amounts +- **Output**: YEAR variables show monthly values (รท12) + +```yaml +- name: Case 1, income conversion. + period: 2025-01 # MONTH period + input: + people: + person1: + employment_income: 12_000 # Input: Annual + output: + employment_income: 1_000 # Output: Monthly (12_000/12) +``` + +--- + +## 6. Numeric Formatting + +### Always Use Underscore Separators + +```yaml +โœ… CORRECT: +employment_income: 50_000 +cash_assets: 1_500 + +โŒ WRONG: +employment_income: 50000 +cash_assets: 1500 +``` + +--- + +## 7. Integration Test Quality Standards + +### Inline Calculation Comments + +Document every calculation step: +```yaml +- name: Case 2, earnings with deductions. + period: 2025-01 + input: + people: + person1: + employment_income: 3_000 # $250/month + output: + # Person-level arrays + tx_tanf_gross_earned_income: [250, 0] + # Person1: 3,000/12 = 250 + + tx_tanf_earned_after_disregard: [87.1, 0] + # Person1: 250 - 120 = 130 + # Disregard: 130/3 = 43.33 + # After: 130 - 43.33 = 86.67 โ‰ˆ 87.1 +``` + +### Comprehensive Scenarios + +Include 5-7 scenarios covering: +1. Basic eligible case +2. Earnings with deductions +3. Edge case at threshold +4. Mixed enrollment status +5. Special circumstances (SSI, immigration) +6. Ineligible case + +### Verify Intermediate Values + +Check 8-10 values per test: +```yaml +output: + # Income calculation chain + program_gross_income: 250 + program_earned_after_disregard: 87.1 + program_deductions: 200 + program_countable_income: 0 + + # Eligibility chain + program_income_eligible: true + program_resources_eligible: true + program_eligible: true + + # Final benefit + program_benefit: 320 +``` + +--- + +## 8. Common Variables to Use + +### Always Available +```yaml +# Demographics +age: 30 +is_disabled: false +is_pregnant: false + +# Income +employment_income: 50_000 +self_employment_income: 10_000 +social_security: 12_000 +ssi: 9_000 + +# Benefits +snap: 200 +tanf: 150 +medicaid: true + +# Location +state_code: CA +county_code: "06037" # String for FIPS +``` + +### Variables That DON'T Exist + +Never use these (not in PolicyEngine): +- `heating_expense` +- `utility_expense` +- `utility_shut_off_notice` +- `past_due_balance` +- `bulk_fuel_amount` +- `weatherization_needed` + +--- + +## 9. Enum Verification + +### Always Check Actual Enum Values + +Before using enums in tests: +```bash +# Find enum definition +grep -r "class ImmigrationStatus" --include="*.py" +``` + +```python +# Check actual values +class ImmigrationStatus(Enum): + CITIZEN = "Citizen" + LEGAL_PERMANENT_RESIDENT = "Legal Permanent Resident" # NOT "PERMANENT_RESIDENT" + REFUGEE = "Refugee" +``` + +```yaml +โœ… CORRECT: +immigration_status: LEGAL_PERMANENT_RESIDENT + +โŒ WRONG: +immigration_status: PERMANENT_RESIDENT # Doesn't exist +``` + +--- + +## 10. Test Quality Checklist + +Before submitting tests: +- [ ] All variables exist in PolicyEngine +- [ ] Period format is `2024-01` or `2024` only +- [ ] Numbers use underscore separators +- [ ] Integration tests have calculation comments +- [ ] 5-7 comprehensive scenarios in integration.yaml +- [ ] Enum values verified against actual definitions +- [ ] Output values realistic, not placeholders +- [ ] File names match variable names exactly + +--- + +## Common Test Patterns + +### Income Eligibility +```yaml +- name: Case 1, income exactly at threshold. + period: 2024-01 + input: + people: + person1: + employment_income: 30_360 # Annual limit + output: + program_income_eligible: true # At threshold = eligible +``` + +### Priority Groups +```yaml +- name: Case 2, elderly priority. + period: 2024-01 + input: + people: + person1: + age: 65 + output: + program_priority_group: true +``` + +### Categorical Eligibility +```yaml +- name: Case 3, SNAP categorical. + period: 2024-01 + input: + spm_units: + spm_unit: + snap: 200 # Receives SNAP + output: + program_categorical_eligible: true +``` + +--- + +## For Agents + +When creating tests: +1. **Check existing variables** before using any in tests +2. **Use only supported periods** (2024-01 or 2024) +3. **Document calculations** in integration tests +4. **Verify enum values** against actual code +5. **Follow naming conventions** exactly +6. **Include edge cases** at thresholds +7. **Test realistic scenarios** not placeholders \ No newline at end of file diff --git a/skills/technical-patterns/policyengine-vectorization-skill/SKILL.md b/skills/technical-patterns/policyengine-vectorization-skill/SKILL.md new file mode 100644 index 0000000..ee9243f --- /dev/null +++ b/skills/technical-patterns/policyengine-vectorization-skill/SKILL.md @@ -0,0 +1,303 @@ +--- +name: policyengine-vectorization +description: PolicyEngine vectorization patterns - NumPy operations, where/select usage, avoiding scalar logic with arrays +--- + +# PolicyEngine Vectorization Patterns + +Critical patterns for vectorized operations in PolicyEngine. Scalar logic with arrays will crash the microsimulation. + +## The Golden Rule + +**PolicyEngine processes multiple households simultaneously using NumPy arrays. NEVER use if-elif-else with entity data.** + +--- + +## 1. Critical: What Will Crash + +### โŒ NEVER: if-elif-else with Arrays + +```python +# THIS WILL CRASH - household data is an array +def formula(household, period, parameters): + income = household("income", period) + if income > 1000: # โŒ CRASH: "truth value of array is ambiguous" + return 500 + else: + return 100 +``` + +### โœ… ALWAYS: Vectorized Operations + +```python +# CORRECT - works with arrays +def formula(household, period, parameters): + income = household("income", period) + return where(income > 1000, 500, 100) # โœ… Vectorized +``` + +--- + +## 2. Common Vectorization Patterns + +### Pattern 1: Simple Conditions โ†’ `where()` + +```python +# Instead of if-else +โŒ if age >= 65: + amount = senior_amount +else: + amount = regular_amount + +โœ… amount = where(age >= 65, senior_amount, regular_amount) +``` + +### Pattern 2: Multiple Conditions โ†’ `select()` + +```python +# Instead of if-elif-else +โŒ if age < 18: + benefit = child_amount +elif age >= 65: + benefit = senior_amount +else: + benefit = adult_amount + +โœ… benefit = select( + [age < 18, age >= 65], + [child_amount, senior_amount], + default=adult_amount +) +``` + +### Pattern 3: Boolean Operations + +```python +# Combining conditions +eligible = (age >= 18) & (income < threshold) # Use & not 'and' +eligible = (is_disabled | is_elderly) # Use | not 'or' +eligible = ~is_excluded # Use ~ not 'not' +``` + +### Pattern 4: Clipping Values + +```python +# Instead of if for bounds checking +โŒ if amount < 0: + amount = 0 +elif amount > maximum: + amount = maximum + +โœ… amount = clip(amount, 0, maximum) +# Or: amount = max_(0, min_(amount, maximum)) +``` + +--- + +## 3. When if-else IS Acceptable + +### โœ… OK: Parameter-Only Conditions + +```python +# OK - parameters are scalars, not arrays +def formula(entity, period, parameters): + p = parameters(period).gov.program + + # This is fine - p.enabled is a scalar boolean + if p.enabled: + base = p.base_amount + else: + base = 0 + + # But must vectorize when using entity data + income = entity("income", period) + return where(income < p.threshold, base, 0) +``` + +### โœ… OK: Control Flow (Not Data) + +```python +# OK - controlling which calculation to use +def formula(entity, period, parameters): + year = period.start.year + + if year >= 2024: + # Use new formula (still vectorized) + return entity("new_calculation", period) + else: + # Use old formula (still vectorized) + return entity("old_calculation", period) +``` + +--- + +## 4. Common Vectorization Mistakes + +### Mistake 1: Scalar Comparison with Array + +```python +โŒ WRONG: +if household("income", period) > 1000: + # Error: truth value of array is ambiguous + +โœ… CORRECT: +income = household("income", period) +high_income = income > 1000 # Boolean array +benefit = where(high_income, low_benefit, high_benefit) +``` + +### Mistake 2: Using Python's and/or/not + +```python +โŒ WRONG: +eligible = is_elderly or is_disabled # Python's 'or' + +โœ… CORRECT: +eligible = is_elderly | is_disabled # NumPy's '|' +``` + +### Mistake 3: Nested if Statements + +```python +โŒ WRONG: +if eligible: + if income < threshold: + return full_benefit + else: + return partial_benefit +else: + return 0 + +โœ… CORRECT: +return where( + eligible, + where(income < threshold, full_benefit, partial_benefit), + 0 +) +``` + +--- + +## 5. Advanced Patterns + +### Pattern: Vectorized Lookup Tables + +```python +# Instead of if-elif for ranges +โŒ if size == 1: + amount = 100 +elif size == 2: + amount = 150 +elif size == 3: + amount = 190 + +โœ… # Using parameter brackets +amount = p.benefit_schedule.calc(size) + +โœ… # Or using select +amounts = [100, 150, 190, 220, 250] +amount = select( + [size == i for i in range(1, 6)], + amounts[:5], + default=amounts[-1] # 5+ people +) +``` + +### Pattern: Accumulating Conditions + +```python +# Building complex eligibility +income_eligible = income < p.income_threshold +resource_eligible = resources < p.resource_limit +demographic_eligible = (age < 18) | is_pregnant + +# Combine with & (not 'and') +eligible = income_eligible & resource_eligible & demographic_eligible +``` + +### Pattern: Conditional Accumulation + +```python +# Sum only for eligible members +person = household.members +is_eligible = person("is_eligible", period) +person_income = person("income", period) + +# Only count income of eligible members +eligible_income = where(is_eligible, person_income, 0) +total = household.sum(eligible_income) +``` + +--- + +## 6. Performance Implications + +### Why Vectorization Matters + +- **Scalar logic**: Processes 1 household at a time โ†’ SLOW +- **Vectorized**: Processes 1000s of households simultaneously โ†’ FAST + +```python +# Performance comparison +โŒ SLOW (if it worked): +for household in households: + if household.income > 1000: + household.benefit = 500 + +โœ… FAST: +benefits = where(incomes > 1000, 500, 100) # All at once! +``` + +--- + +## 7. Testing for Vectorization Issues + +### Signs Your Code Isn't Vectorized + +**Error messages:** +- "The truth value of an array is ambiguous" +- "ValueError: The truth value of an array with more than one element" + +**Performance:** +- Tests run slowly +- Microsimulation times out + +### How to Test + +```python +# Your formula should work with arrays +def test_vectorization(): + # Create array inputs + incomes = np.array([500, 1500, 3000]) + + # Should return array output + benefits = formula_with_arrays(incomes) + assert len(benefits) == 3 +``` + +--- + +## Quick Reference Card + +| Operation | Scalar (WRONG) | Vectorized (CORRECT) | +|-----------|---------------|---------------------| +| Simple condition | `if x > 5:` | `where(x > 5, ...)` | +| Multiple conditions | `if-elif-else` | `select([...], [...])` | +| Boolean AND | `and` | `&` | +| Boolean OR | `or` | `\|` | +| Boolean NOT | `not` | `~` | +| Bounds checking | `if x < 0: x = 0` | `max_(0, x)` | +| Complex logic | Nested if | Nested where/select | + +--- + +## For Agents + +When implementing formulas: +1. **Never use if-elif-else** with entity data +2. **Always use where()** for simple conditions +3. **Use select()** for multiple conditions +4. **Use NumPy operators** (&, |, ~) not Python (and, or, not) +5. **Test with arrays** to ensure vectorization +6. **Parameter conditions** can use if-else (scalars) +7. **Entity data** must use vectorized operations \ No newline at end of file diff --git a/skills/tools-and-apis/policyengine-api-skill/SKILL.md b/skills/tools-and-apis/policyengine-api-skill/SKILL.md new file mode 100644 index 0000000..0f01100 --- /dev/null +++ b/skills/tools-and-apis/policyengine-api-skill/SKILL.md @@ -0,0 +1,478 @@ +--- +name: policyengine-api +description: PolicyEngine API - Flask REST service powering policyengine.org and programmatic access +--- + +# PolicyEngine API + +The PolicyEngine API is a Flask-based REST service that provides tax and benefit calculations for the web app and programmatic users. + +## For Users ๐Ÿ‘ฅ + +### What is the API? + +When you use policyengine.org, the API processes your calculations on our servers. + +**API base:** https://api.policyengine.org + +**What it does:** +- Runs tax and benefit calculations +- Stores and retrieves policy reforms +- Computes population-wide impacts +- Serves parameter and variable metadata + +### Public Access + +The API is publicly accessible with rate limits: +- **Unauthenticated:** 100 requests/minute +- **Authenticated:** 1,000 requests/minute + +**Try it:** +```bash +curl https://api.policyengine.org/us/policy/2 +``` + +### API Documentation + +**OpenAPI spec:** https://api.policyengine.org/docs + +**Interactive docs:** Swagger UI at API docs endpoint + +## For Analysts ๐Ÿ“Š + +### Using the API + +**Option 1: Python client (recommended)** +```python +# Use the policyengine package +# See policyengine-python-client-skill +``` + +**Option 2: Direct API calls** +```python +import requests + +# Calculate household impact +response = requests.post( + "https://api.policyengine.org/us/calculate", + json={ + "household": household_situation, + "policy_id": None # or reform_id + } +) +result = response.json() +``` + +### Key Endpoints + +**Household calculations:** +``` +POST /us/calculate +POST /uk/calculate +``` + +**Policy management:** +``` +GET /us/policy/{policy_id} +POST /us/policy +``` + +**Economy impacts:** +``` +GET /us/economy/{policy_id}/over/{baseline_policy_id} +``` + +**Metadata:** +``` +GET /us/parameters +GET /us/variables +GET /us/parameter/{parameter_name} +GET /us/variable/{variable_name} +``` + +### Rate Limits and Performance + +**Rate limits:** +- 100 req/min (unauthenticated) +- 1,000 req/min (authenticated - contact team) + +**Response times:** +- Household calculation: ~200-500ms +- Population impact: ~5-30 seconds +- Cached results: <100ms + +**Optimization:** +- Use the same policy_id for multiple requests (caching) +- Batch calculations when possible +- Use webhooks for long-running jobs (population impacts) + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/policyengine-api + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/policyengine-api +cd policyengine-api +``` + +### Current Architecture + +**To see current structure:** +```bash +tree policyengine_api/ + +# Key directories: +ls policyengine_api/ +# - endpoints/ - HTTP endpoint handlers +# - routes/ - Route registration +# - services/ - Business logic +# - compute_api/ - Calculation services +# - economy_api/ - Economy impact calculations +# - utils/ - Helpers (caching, validation) +# - data/ - Static data +``` + +### Current Implementation Patterns + +**Reference endpoint (read this first):** +```bash +cat policyengine_api/endpoints/economy.py +``` + +**This demonstrates:** +- Standard endpoint structure +- Request validation +- Caching pattern +- Error handling +- Response formatting + +**To find other endpoints:** +```bash +ls policyengine_api/endpoints/ +# - household.py +# - policy.py +# - economy.py +# - metadata.py +# - etc. +``` + +### Standard Endpoint Pattern (Stable) + +```python +from flask import Blueprint, request, jsonify +from policyengine_api.utils import cache + +blueprint = Blueprint("my_endpoint", __name__) + +@blueprint.route("/us/calculate", methods=["POST"]) +def calculate(): + """Standard pattern: validate, cache-check, compute, cache, return.""" + try: + # 1. Get and validate input + data = request.json + if not data: + return jsonify({"error": "No data provided"}), 400 + + # 2. Generate cache key + cache_key = f"calc_{hash(str(data))}" + + # 3. Check cache + cached = cache.get(cache_key) + if cached: + return jsonify(cached) + + # 4. Compute + result = perform_calculation(data) + + # 5. Cache result + cache.set(cache_key, result, expire=3600) + + # 6. Return + return jsonify(result) + + except Exception as e: + return jsonify({"error": str(e), "status": "error"}), 500 +``` + +**Current implementation details:** +```bash +# See actual endpoint for current pattern +cat policyengine_api/endpoints/household.py +``` + +### Caching Strategy + +**To see current caching implementation:** +```bash +# Redis configuration +cat policyengine_api/utils/cache.py + +# Find cache usage +grep -r "cache\." policyengine_api/endpoints/ +``` + +**Pattern:** +- Redis for caching +- Cache keys based on inputs +- TTL varies by endpoint (1 hour to 1 day) +- Clear cache on parameter changes + +### Background Jobs + +For long-running calculations (population impacts): + +**To see current implementation:** +```bash +# RQ (Redis Queue) usage +grep -r "@job" policyengine_api/ + +# Job patterns +cat policyengine_api/economy_api/ +``` + +**Pattern:** +- Use RQ for jobs > 5 seconds +- Return job_id immediately +- Poll for completion +- Cache results + +### Country Integration + +**How API loads country packages:** +```bash +cat policyengine_api/country.py +``` + +**Pattern:** +- Dynamically imports country packages +- Routes by country code (/us/, /uk/) +- Manages multiple model versions + +### Service Layer + +**Business logic separated from endpoints:** +```bash +ls policyengine_api/services/ +``` + +**Pattern:** +```python +# endpoints/household.py +from policyengine_api.services import household_service + +@app.route("/us/calculate", methods=["POST"]) +def calculate(): + result = household_service.calculate(data) + return jsonify(result) + +# services/household_service.py +def calculate(data): + # Business logic here + simulation = create_simulation(data) + return simulation.calculate(...) +``` + +### Testing + +**To see current test patterns:** +```bash +ls tests/ +cat tests/test_household.py +``` + +**Run tests:** +```bash +make test + +# Specific test +pytest tests/test_economy.py -v + +# With coverage +make test-coverage +``` + +### Development Server + +**Start locally:** +```bash +make debug +``` + +**Test endpoint:** +```bash +curl http://localhost:5000/us/policy/2 +``` + +### Deployment + +**To see deployment configuration:** +```bash +# Google Cloud Platform +cat app.yaml # App Engine config +cat cloudbuild.yaml # Cloud Build config + +# Environment variables +cat .env.example +``` + +**Current deployment:** +- Google App Engine +- Cloud SQL (PostgreSQL) +- Redis (caching) +- Cloud Build (CI/CD) + +### API Versions + +**To see versioning strategy:** +```bash +grep -r "version" policyengine_api/ +``` + +**Current approach:** +- API version in URLs (may add /v1/ prefix) +- Country package versions independent +- Breaking changes rare (backwards compatible) + +## Architecture Diagrams + +### Request Flow + +``` +User/App โ†’ API Gateway โ†’ Flask App โ†’ Country Package โ†’ Core Engine + โ†“ + Redis Cache + โ†“ + Background Job (if needed) + โ†“ + PostgreSQL (storage) +``` + +### Dependencies + +``` +policyengine-core + โ†“ +policyengine-us, policyengine-uk, etc. + โ†“ +policyengine-api (you are here) + โ†“ +policyengine-app (consumes API) +``` + +**To understand dependencies:** +- See `policyengine-core-skill` for engine patterns +- See `policyengine-us-skill` for country model usage +- See `policyengine-app-skill` for how app calls API + +## Common Development Tasks + +### Task 1: Add New Endpoint + +1. **Study reference implementation:** + ```bash + cat policyengine_api/endpoints/economy.py + ``` + +2. **Create new endpoint file:** + ```python + # policyengine_api/endpoints/my_endpoint.py + # Follow the pattern from economy.py + ``` + +3. **Register route:** + ```bash + # See route registration + cat policyengine_api/routes/__init__.py + ``` + +4. **Add tests:** + ```bash + # Follow test pattern + cat tests/test_economy.py + ``` + +### Task 2: Modify Caching Behavior + +**See current caching:** +```bash +cat policyengine_api/utils/cache.py +``` + +**Common changes:** +- Adjust TTL (time to live) +- Change cache key generation +- Add cache invalidation + +### Task 3: Update Country Package Version + +**To see how versions are managed:** +```bash +# Requirements +cat requirements.txt | grep policyengine- + +# Update and deploy +# See deployment docs in README +``` + +## Security and Best Practices + +### Input Validation + +**Always validate:** +- Country code (us, uk, ca) +- Policy ID format +- Household structure +- Parameter values + +**See validation examples:** +```bash +grep -r "validate" policyengine_api/endpoints/ +``` + +### Error Handling + +**Standard error response:** +```python +return jsonify({ + "error": "Error message", + "details": additional_context, + "status": "error" +}), status_code +``` + +**See error patterns:** +```bash +grep -A 5 "jsonify.*error" policyengine_api/endpoints/ +``` + +### Logging + +**To see logging configuration:** +```bash +cat policyengine_api/gcp_logging.py +``` + +**Pattern:** +- Google Cloud Logging +- Log all errors +- Log slow queries (>1s) +- Don't log sensitive data + +## Related Skills + +- **policyengine-python-client-skill** - Using the API +- **policyengine-core-skill** - Understanding the engine +- **policyengine-us-skill** - Country model integration +- **policyengine-app-skill** - How app consumes API +- **policyengine-standards-skill** - Code quality +- **policyengine-writing-skill** - API documentation style + +## Resources + +**Repository:** https://github.com/PolicyEngine/policyengine-api +**Live API:** https://api.policyengine.org +**Documentation:** https://api.policyengine.org/docs +**Status:** https://status.policyengine.org diff --git a/skills/tools-and-apis/policyengine-app-skill/SKILL.md b/skills/tools-and-apis/policyengine-app-skill/SKILL.md new file mode 100644 index 0000000..6bbf2db --- /dev/null +++ b/skills/tools-and-apis/policyengine-app-skill/SKILL.md @@ -0,0 +1,900 @@ +--- +name: policyengine-app +description: PolicyEngine React web application - the user interface at policyengine.org +--- + +# PolicyEngine App + +The PolicyEngine App is the React-based web application that users interact with at policyengine.org. + +## For Users ๐Ÿ‘ฅ + +### What is the App? + +The app at policyengine.org provides: +- Interactive household calculator +- Policy reform creator +- Population impact analysis +- Blog and research hub + +**Access:** https://policyengine.org + +### App Features + +**Calculator:** +- Enter household details +- See tax and benefit calculations +- Visualize marginal tax rates +- Compare scenarios + +**Policy designer:** +- Browse all parameters +- Create custom reforms +- Share via URL +- Download charts + +**Research hub:** +- Read policy analysis +- Explore modeled programs +- Access documentation + +## For Analysts ๐Ÿ“Š + +### Understanding App URLs + +Reform URLs encode all policy changes in the query string, allowing sharing and reproducibility. + +**Example URL:** +``` +policyengine.org/us/policy? + focus=policyOutput.policyBreakdown& + reform=67696& + region=enhanced_us& + timePeriod=2025& + baseline=2 +``` + +**Parameters:** +- `focus` - Which section to display +- `reform` - Reform ID from database +- `region` - Geographic scope (enhanced_us, CA, congressional districts) +- `timePeriod` - Year of analysis +- `baseline` - Baseline policy ID + +### Embedding PolicyEngine + +**iFrame integration:** +```html + +``` + +**Parameter:** +- `embedded=true` - Removes navigation, optimizes for embedding + +### URL Structure + +**Household calculator:** +``` +/us/household?household=12345 +/uk/household?household=67890 +``` + +**Policy page:** +``` +/us/policy?reform=12345 +/uk/policy?reform=67890 +``` + +**Research/blog:** +``` +/us/research/article-slug +/uk/research/article-slug +``` + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/policyengine-app + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/policyengine-app +cd policyengine-app +``` + +### Current Architecture + +**To see current structure:** +```bash +tree src/ -L 2 + +# Key directories: +ls src/ +# - pages/ - Page components +# - applets/ - Reusable UI modules +# - api/ - API integration +# - controls/ - Form controls +# - layout/ - Layout components +# - posts/ - Blog posts +# - routing/ - Routing configuration +# - hooks/ - Custom React hooks +# - data/ - Static data +``` + +### Technology Stack + +**Current dependencies:** +```bash +# See package.json for versions +cat package.json + +# Key dependencies: +# - React 18 +# - React Router v6 +# - Plotly.js +# - Ant Design +# - axios +``` + +### React Patterns (Critical) + +**โœ… Functional components only (no classes):** +```javascript +// CORRECT +import { useState, useEffect } from "react"; + +export default function TaxCalculator({ income }) { + const [tax, setTax] = useState(0); + + useEffect(() => { + calculateTax(income).then(setTax); + }, [income]); + + return
Tax: ${tax}
; +} +``` + +**โŒ Class components forbidden:** +```javascript +// WRONG - Don't use class components +class TaxCalculator extends Component { + // ... +} +``` + +**To find component examples:** +```bash +# Reference components +ls src/pages/ +ls src/applets/ + +# See a complete page +cat src/pages/HouseholdPage.jsx +``` + +### State Management + +**No global state (Redux, Context) - lift state up:** + +```javascript +// Parent manages state +function PolicyPage() { + const [reform, setReform] = useState({}); + const [impact, setImpact] = useState(null); + + return ( + <> + + + + ); +} +``` + +**To see state patterns:** +```bash +# Find useState usage +grep -r "useState" src/pages/ | head -20 +``` + +### API Integration + +**To see current API patterns:** +```bash +cat src/api/call.js # Base API caller +cat src/api/variables.js # Variable metadata +cat src/api/parameters.js # Parameter metadata +``` + +**Standard pattern:** +```javascript +import { api call } from "api/call"; + +// Fetch data +const result = await call( + `/us/calculate`, + { household: householdData }, + "POST" +); +``` + +### Routing + +**To see current routing:** +```bash +cat src/routing/routes.js + +# Routes defined with React Router v6 +# See examples: +grep -r "useNavigate" src/ +grep -r "useSearchParams" src/ +``` + +**URL parameters:** +```javascript +import { useSearchParams } from "react-router-dom"; + +const [searchParams, setSearchParams] = useSearchParams(); + +// Read +const reformId = searchParams.get("reform"); + +// Update +setSearchParams({ ...Object.fromEntries(searchParams), reform: newId }); +``` + +### Custom Hooks + +**To see PolicyEngine-specific hooks:** +```bash +ls src/hooks/ +# - useCountryId.js - Current country +# - useDisplayCategory.js +# - etc. +``` + +**Usage:** +```javascript +import { useCountryId } from "hooks/useCountryId"; + +function Component() { + const [countryId, setCountryId] = useCountryId(); + // countryId = "us", "uk", or "ca" +} +``` + +### Charts and Visualization + +**Plotly integration:** +```bash +# See chart components +ls src/pages/policy/output/ + +# Reference implementation +cat src/pages/policy/output/EconomyOutput.jsx +``` + +**Standard Plotly pattern:** +```javascript +import Plot from "react-plotly.js"; + +const layout = { + font: { family: "Roboto Serif" }, + plot_bgcolor: "white", + // PolicyEngine branding +}; + +; +``` + +### Blog Posts + +**To see blog post structure:** +```bash +ls src/posts/articles/ + +# Read a recent post +cat src/posts/articles/harris-eitc.md +``` + +**Blog posts:** +- Written in Markdown +- Stored in `src/posts/articles/` +- Include metadata (title, date, authors) +- Follow policyengine-writing-skill style + +**Adding a post:** +```bash +# Create new file +# src/posts/articles/my-analysis.md + +# Add to index (if needed) +# See existing posts for format +``` + +### Styling + +**Current styling approach:** +```bash +# See style configuration +ls src/style/ + +# Colors +cat src/style/colors.js + +# Ant Design theme +cat src/style/theme.js +``` + +**PolicyEngine colors:** +- Teal: `#39C6C0` (primary accent) +- Blue: `#2C6496` (charts, links) +- Dark gray: `#616161` (text) + +### Testing + +**To see current tests:** +```bash +ls src/__tests__/ + +# Run tests +make test + +# Test pattern +cat src/__tests__/example.test.js +``` + +**Testing libraries:** +- Jest (test runner) +- React Testing Library (component testing) +- User-centric testing (not implementation details) + +### Development Server + +**Start locally:** +```bash +make debug +# Opens http://localhost:3000 +``` + +**Environment:** +```bash +# Environment variables +cat .env.example + +# Config +ls src/config/ +``` + +### Building and Deployment + +**Build:** +```bash +make build +# Creates optimized production build +``` + +**Deployment:** +```bash +# See deployment config +cat netlify.toml # or appropriate hosting config +``` + +## Component Patterns + +### Standard Component Structure + +**To see well-structured components:** +```bash +# Example page +cat src/pages/HouseholdPage.jsx + +# Example applet +cat src/applets/PolicySearch.jsx +``` + +**Pattern:** +```javascript +import { useState, useEffect } from "react"; +import { useSearchParams } from "react-router-dom"; +import { useCountryId } from "hooks/useCountryId"; + +export default function MyComponent({ prop1, prop2 }) { + // 1. Hooks first + const [state, setState] = useState(initialValue); + const [countryId] = useCountryId(); + const [searchParams] = useSearchParams(); + + // 2. Effects + useEffect(() => { + // Side effects + }, [dependencies]); + + // 3. Event handlers + const handleClick = () => { + setState(newValue); + }; + + // 4. Render + return ( +
+ {/* JSX */} +
+ ); +} +``` + +### Component Size Limit + +**Keep components under 150 lines after formatting.** + +**If component is too large:** +1. Extract sub-components +2. Move logic to custom hooks +3. Split into multiple files + +**To find large components:** +```bash +# Find files >150 lines +find src/ -name "*.jsx" -exec wc -l {} \; | sort -rn | head -20 +``` + +### File Naming + +**Components:** PascalCase.jsx +- `HouseholdPage.jsx` +- `PolicySearch.jsx` +- `ImpactChart.jsx` + +**Utilities:** camelCase.js +- `formatCurrency.js` +- `apiUtils.js` +- `chartHelpers.js` + +**Hooks:** camelCase.js with 'use' prefix +- `useCountryId.js` +- `usePolicy.js` + +## Common Development Tasks + +### Task 1: Add New Page + +1. **See page structure:** + ```bash + cat src/pages/HouseholdPage.jsx + ``` + +2. **Create new page:** + ```javascript + // src/pages/MyNewPage.jsx + export default function MyNewPage() { + return
Content
; + } + ``` + +3. **Add route:** + ```bash + # See routing + cat src/routing/routes.js + + # Add your route following the pattern + ``` + +### Task 2: Add New Chart + +1. **See chart examples:** + ```bash + ls src/pages/policy/output/ + cat src/pages/policy/output/DistributionalImpact.jsx + ``` + +2. **Create chart component:** + ```javascript + import Plot from "react-plotly.js"; + + export default function MyChart({ data }) { + return ( + + ); + } + ``` + +### Task 3: Add Blog Post + +1. **See post structure:** + ```bash + cat src/posts/articles/harris-eitc.md + ``` + +2. **Create post:** + ```bash + # Create markdown file + # src/posts/articles/my-analysis.md + + # Follow policyengine-writing-skill for style + ``` + +3. **Images:** + ```bash + # Store in public/images/posts/ + # Reference in markdown + ``` + +## API Integration Patterns + +### Fetching Data + +**To see API call patterns:** +```bash +cat src/api/call.js +``` + +**Standard pattern:** +```javascript +import { call } from "api/call"; + +const fetchData = async () => { + const result = await call( + `/us/calculate`, + { household: data }, + "POST" + ); + return result; +}; +``` + +### Loading States + +**Pattern:** +```javascript +const [loading, setLoading] = useState(false); +const [error, setError] = useState(null); +const [data, setData] = useState(null); + +useEffect(() => { + setLoading(true); + fetchData() + .then(setData) + .catch(setError) + .finally(() => setLoading(false)); +}, [dependencies]); + +if (loading) return ; +if (error) return ; +return ; +``` + +## Performance Patterns + +### Code Splitting + +**To see code splitting:** +```bash +grep -r "React.lazy" src/ +``` + +**Pattern:** +```javascript +import { lazy, Suspense } from "react"; + +const HeavyComponent = lazy(() => import("./HeavyComponent")); + +function Page() { + return ( + }> + + + ); +} +``` + +### Memoization + +**Use React.memo for expensive components:** +```javascript +import { memo } from "react"; + +const ExpensiveChart = memo(function ExpensiveChart({ data }) { + // Only re-renders if data changes + return ; +}); +``` + +## Accessibility + +**Requirements:** +- Semantic HTML elements +- ARIA labels for complex widgets +- Keyboard navigation +- Color contrast (WCAG AA) + +**To see accessibility patterns:** +```bash +grep -r "aria-" src/ +grep -r "role=" src/ +``` + +## Country-Specific Features + +### Country Switching + +**To see country switching:** +```bash +cat src/hooks/useCountryId.js +``` + +**Usage:** +```javascript +import { useCountryId } from "hooks/useCountryId"; + +function Component() { + const [countryId] = useCountryId(); // "us", "uk", or "ca" + + // Load country-specific data + const data = countryId === "us" ? usData : ukData; +} +``` + +### Country-Specific Content + +**Conditional rendering:** +```javascript +{countryId === "us" && } +{countryId === "uk" && } +``` + +**To find country-specific code:** +```bash +grep -r "countryId ===" src/ +``` + +## Development Workflow + +### Local Development + +**Start dev server:** +```bash +make debug +# App runs on http://localhost:3000 +# Connects to production API by default +``` + +**Connect to local API:** +```bash +# See environment configuration +cat src/config/environment.js + +# Or set environment variable +REACT_APP_API_URL=http://localhost:5000 make debug +``` + +### Testing + +**Run tests:** +```bash +make test +``` + +**Watch mode:** +```bash +npm test -- --watch +``` + +**Coverage:** +```bash +npm test -- --coverage +``` + +### Linting and Formatting + +**Format code (critical before committing):** +```bash +make format + +# Or manually +npm run lint -- --fix +npx prettier --write . +``` + +**Check linting (CI check):** +```bash +npm run lint -- --max-warnings=0 +``` + +## Current Implementation Reference + +### Component Structure + +**To see current page structure:** +```bash +ls src/pages/ +# - HouseholdPage.jsx +# - PolicyPage.jsx +# - HomePage.jsx +# - etc. +``` + +**To see a complete page:** +```bash +cat src/pages/PolicyPage.jsx +``` + +### API Call Patterns + +**To see current API integration:** +```bash +cat src/api/call.js # Base caller +cat src/api/variables.js # Variable metadata fetching +cat src/api/parameters.js # Parameter metadata fetching +``` + +### Routing Configuration + +**To see current routes:** +```bash +cat src/routing/routes.js +``` + +### Form Controls + +**To see PolicyEngine-specific form controls:** +```bash +ls src/controls/ +# - InputField.jsx +# - SearchParamControl.jsx +# - etc. +``` + +### Chart Components + +**To see chart implementations:** +```bash +ls src/pages/policy/output/ +# - BudgetaryImpact.jsx +# - DistributionalImpact.jsx +# - PovertyImpact.jsx +# - etc. +``` + +**Reference chart:** +```bash +cat src/pages/policy/output/DistributionalImpact.jsx +``` + +## Multi-Repository Integration + +### How App Relates to Other Repos + +``` +policyengine-core (engine) + โ†“ +policyengine-us, policyengine-uk (country models) + โ†“ +policyengine-api (backend) + โ†“ +policyengine-app (you are here) +``` + +**Understanding the stack:** +- See `policyengine-core-skill` for engine concepts +- See `policyengine-us-skill` for what variables/parameters mean +- See `policyengine-api-skill` for API endpoints the app calls + +### Blog Posts Reference Country Models + +**Blog posts often reference variables:** +```bash +# Posts reference variables like "income_tax", "ctc" +# See policyengine-us-skill for variable definitions +cat src/posts/articles/harris-eitc.md +``` + +## Common Development Tasks + +### Task 1: Add New Parameter to UI + +1. **Understand parameter:** + ```bash + # See parameter in country model + cd ../policyengine-us + cat policyengine_us/parameters/gov/irs/credits/ctc/amount/base_amount.yaml + ``` + +2. **Find similar parameter in app:** + ```bash + cd ../policyengine-app + grep -r "ctc.*amount" src/pages/policy/ + ``` + +3. **Add UI control following pattern** + +### Task 2: Add New Chart + +1. **See existing charts:** + ```bash + cat src/pages/policy/output/DistributionalImpact.jsx + ``` + +2. **Create new chart component** + +3. **Add to policy output page** + +### Task 3: Fix Bug in Calculator + +1. **Find relevant component:** + ```bash + # Search for the feature + grep -r "keyword" src/pages/ + ``` + +2. **Read component code** + +3. **Make fix following React patterns** + +4. **Test with dev server:** + ```bash + make debug + ``` + +## Build and Deployment + +**Production build:** +```bash +make build +# Creates optimized bundle in build/ +``` + +**Deployment:** +```bash +# See deployment configuration +cat netlify.toml # or appropriate config +``` + +**Environment variables:** +```bash +# React env vars must have REACT_APP_ prefix +REACT_APP_API_URL=https://api.policyengine.org + +# Or use config file pattern (recommended) +cat src/config/environment.js +``` + +## Style Guide + +**Follow policyengine-standards-skill for:** +- ESLint configuration +- Prettier formatting +- Component size limits +- File organization + +**Follow policyengine-writing-skill for:** +- Blog post content +- Documentation +- UI copy + +## Resources + +**Repository:** https://github.com/PolicyEngine/policyengine-app +**Live app:** https://policyengine.org +**Staging:** https://staging.policyengine.org (if applicable) + +**Related skills:** +- **policyengine-api-skill** - Understanding the backend +- **policyengine-us-skill** - Understanding variables/parameters +- **policyengine-writing-skill** - Blog post style +- **policyengine-standards-skill** - Code quality diff --git a/skills/tools-and-apis/policyengine-core-skill/SKILL.md b/skills/tools-and-apis/policyengine-core-skill/SKILL.md new file mode 100644 index 0000000..e49044d --- /dev/null +++ b/skills/tools-and-apis/policyengine-core-skill/SKILL.md @@ -0,0 +1,487 @@ +--- +name: policyengine-core +description: PolicyEngine Core simulation engine - the foundation powering all PolicyEngine calculations +--- + +# PolicyEngine Core + +PolicyEngine Core is the microsimulation engine that powers all PolicyEngine calculations. It's a fork of OpenFisca-Core adapted for PolicyEngine's needs. + +## For Users ๐Ÿ‘ฅ + +### What is Core? + +When you use policyengine.org to calculate taxes or benefits, PolicyEngine Core is the "calculator" running behind the scenes. + +**Core provides:** +- The simulation engine that processes tax rules +- Variable and parameter management +- Entity relationships (person โ†’ family โ†’ household) +- Period handling (2024, 2025, etc.) + +You don't interact with Core directly - you use it through: +- **Web app:** policyengine.org +- **Python packages:** policyengine-us, policyengine-uk +- **API:** api.policyengine.org + +### Why Core Matters + +Core ensures: +- โœ… **Accuracy** - Calculations follow official rules exactly +- โœ… **Consistency** - Same rules applied everywhere +- โœ… **Transparency** - All rules traceable to legislation +- โœ… **Performance** - Vectorized calculations for speed + +## For Analysts ๐Ÿ“Š + +### Understanding Core Concepts + +When writing PolicyEngine code, you'll encounter Core concepts: + +**Variables:** +- Represent quantities (income_tax, ctc, snap, etc.) +- Defined for specific entities (person, household, tax_unit) +- Calculated from formulas or set directly + +**Parameters:** +- Policy rules that change over time (tax rates, benefit amounts) +- Organized hierarchically (gov.irs.credits.ctc.amount.base_amount) +- Stored in YAML files + +**Entities:** +- Person: Individual +- Family: Family unit +- Tax unit: Tax filing unit +- Household: Physical household +- Marital unit: Marital status grouping +- SPM unit: Supplemental Poverty Measure unit + +**Periods:** +- Year: 2024, 2025, etc. +- Month: 2024-01, 2024-02, etc. +- Specific dates: 2024-06-15 + +### Core in Action + +```python +from policyengine_us import Simulation + +# When you create a simulation +sim = Simulation(situation=household) + +# Core manages: +# - Entity relationships +# - Variable dependencies +# - Parameter lookups +# - Period conversions + +# When you calculate +result = sim.calculate("income_tax", 2024) + +# Core: +# 1. Checks if already calculated +# 2. Identifies dependencies (income โ†’ AGI โ†’ taxable income โ†’ tax) +# 3. Calculates dependencies first +# 4. Applies formulas +# 5. Returns result +``` + +### Core vs Country Packages + +**Core (policyengine-core):** +- Generic simulation engine +- No specific tax/benefit rules +- Variable and parameter infrastructure + +**Country packages (policyengine-us, etc.):** +- Built on Core +- Contain specific tax/benefit rules +- Define variables and parameters for that country + +**Relationship:** +``` +policyengine-core (engine) + โ†“ powers +policyengine-us (US rules) + โ†“ used by +policyengine-api (REST API) + โ†“ serves +policyengine-app (web interface) +``` + +## For Contributors ๐Ÿ’ป + +### Repository + +**Location:** PolicyEngine/policyengine-core +**Origin:** Fork of OpenFisca-Core + +**Clone:** +```bash +git clone https://github.com/PolicyEngine/policyengine-core +``` + +### Current Architecture + +**To see current structure:** +```bash +tree policyengine_core/ + +# Key directories: +# - variables/ - Variable class and infrastructure +# - parameters/ - Parameter class and infrastructure +# - entities/ - Entity definitions +# - simulations/ - Simulation class +# - periods/ - Period handling +# - reforms/ - Reform application +``` + +**To understand a specific component:** +```bash +# Variable system +cat policyengine_core/variables/variable.py + +# Parameter system +cat policyengine_core/parameters/parameter.py + +# Simulation engine +cat policyengine_core/simulations/simulation.py + +# Entity system +cat policyengine_core/entities/entity.py +``` + +### Key Classes + +**Variable:** +```python +# To see Variable class implementation +cat policyengine_core/variables/variable.py + +# Variables in country packages inherit from this: +from policyengine_core.variables import Variable + +class income_tax(Variable): + value_type = float + entity = Person + label = "Income tax" + definition_period = YEAR + + def formula(person, period, parameters): + # Vectorized formula + return calculate_tax(...) +``` + +**Simulation:** +```python +# To see Simulation class implementation +cat policyengine_core/simulations/simulation.py + +# Manages calculation graph and caching +sim = Simulation(situation=situation) +sim.calculate("variable", period) +``` + +**Parameters:** +```python +# To see Parameter handling +cat policyengine_core/parameters/parameter_node.py + +# Access in formulas: +parameters(period).gov.irs.credits.ctc.amount.base_amount +``` + +### Vectorization (Critical!) + +Core requires vectorized operations - no if-elif-else with arrays: + +**โŒ Wrong (scalar logic):** +```python +if age < 18: + eligible = True +else: + eligible = False +``` + +**โœ… Correct (vectorized):** +```python +eligible = age < 18 # NumPy boolean array +``` + +**Why:** Core processes many households simultaneously for performance. + +**To see vectorization examples:** +```bash +# Search for where() usage (vectorized if-then-else) +grep -r "np.where" policyengine_core/ + +# Find select() usage (vectorized case statements) +grep -r "select" policyengine_core/ +``` + +### Formula Dependencies + +Core automatically resolves variable dependencies: + +```python +class taxable_income(Variable): + def formula(person, period, parameters): + # Core automatically calculates these first: + agi = person("adjusted_gross_income", period) + deduction = person("standard_deduction", period) + return agi - deduction + +class income_tax(Variable): + def formula(person, period, parameters): + # Core knows to calculate taxable_income first + taxable = person("taxable_income", period) + return apply_brackets(taxable, ...) +``` + +**To see dependency resolution:** +```bash +# Find trace functionality +grep -r "trace" policyengine_core/simulations/ + +# Enable in your code: +simulation.trace = True +simulation.calculate("income_tax", 2024) +``` + +### Period Handling + +**To see period implementation:** +```bash +cat policyengine_core/periods/period.py + +# Period types: +# - YEAR: 2024 +# - MONTH: 2024-01 +# - ETERNITY: permanent values +``` + +**Usage in variables:** +```python +# Annual variable +definition_period = YEAR # Called with 2024 + +# Monthly variable +definition_period = MONTH # Called with "2024-01" + +# Convert periods +yearly_value = person("monthly_income", period.this_year) * 12 +``` + +### Testing Core Changes + +**To run Core tests:** +```bash +cd policyengine-core +make test + +# Specific test +pytest tests/core/test_variables.py -v +``` + +**To test in country package:** +```bash +# Changes to Core affect all country packages +cd policyengine-us +pip install -e ../policyengine-core # Local development install +make test +``` + +### Key Differences from OpenFisca + +PolicyEngine Core differs from OpenFisca-Core: + +**To see PolicyEngine changes:** +```bash +# Compare to OpenFisca +# Core fork diverged to add: +# - Enhanced performance +# - Better error messages +# - PolicyEngine-specific features + +# See commit history for PolicyEngine changes +git log --oneline +``` + +## Core Development Workflow + +### Making Changes to Core + +1. **Clone repo:** + ```bash + git clone https://github.com/PolicyEngine/policyengine-core + ``` + +2. **Install for development:** + ```bash + make install + ``` + +3. **Make changes** to variable.py, simulation.py, etc. + +4. **Test locally:** + ```bash + make test + ``` + +5. **Test in country package:** + ```bash + cd ../policyengine-us + pip install -e ../policyengine-core + make test + ``` + +6. **Format and commit:** + ```bash + make format + git commit -m "Description" + ``` + +### Understanding Impact + +Changes to Core affect: +- โœ… All country packages (US, UK, Canada, IL, NG) +- โœ… The API +- โœ… The web app +- โœ… All analysis tools + +**Critical:** Always test in multiple country packages before merging. + +## Common Core Patterns + +### Pattern 1: Adding a New Variable Type + +**Current variable types:** +```bash +# See supported types +grep "value_type" policyengine_core/variables/variable.py +``` + +**Types:** int, float, bool, str, Enum, date + +### Pattern 2: Custom Formulas + +**Formula signature:** +```python +def formula(entity, period, parameters): + # entity: Person, TaxUnit, Household, etc. + # period: 2024, "2024-01", etc. + # parameters: Parameter tree for period + return calculated_value +``` + +**To see formula examples:** +```bash +# Search country packages for formulas +grep -A 10 "def formula" ../policyengine-us/policyengine_us/variables/ | head -50 +``` + +### Pattern 3: Parameter Access + +**Accessing parameters in formulas:** +```python +# Navigate parameter tree +param = parameters(period).gov.irs.credits.ctc.amount.base_amount + +# Parameters automatically valid for period +# No need to check dates manually +``` + +**To see parameter structure:** +```bash +# Example from country package +tree ../policyengine-us/policyengine_us/parameters/gov/ +``` + +## Advanced Topics + +### Formula Caching + +Core caches calculations automatically: +```python +# First call calculates +tax1 = sim.calculate("income_tax", 2024) + +# Second call returns cached value +tax2 = sim.calculate("income_tax", 2024) # Instant +``` + +### Neutralizing Variables + +```python +# Set variable to zero in reform +reform = { + "income_tax": { + "2024-01-01.2100-12-31": 0 + } +} +``` + +### Adding Variables + +Country packages add variables by inheriting from Core's Variable class. + +**See policyengine-us-skill for variable creation patterns.** + +## Resources + +**Repository:** https://github.com/PolicyEngine/policyengine-core + +**Documentation:** +- Core API docs (see README in repo) +- OpenFisca docs (original): https://openfisca.org/doc/ + +**Related skills:** +- **policyengine-us-skill** - Using Core through country packages +- **policyengine-standards-skill** - Code quality standards + +## Troubleshooting + +### Common Issues + +**Variable not found:** +```python +# Error: Variable 'income_tax' not found +# Solution: Variable is defined in country package, not Core +# Use policyengine-us, not policyengine-core directly +``` + +**Scalar vs array operations:** +```python +# Error: truth value of array is ambiguous +# Solution: Use np.where() instead of if-else +# See vectorization section above +``` + +**Period mismatch:** +```python +# Error: Cannot compute variable_name for period 2024-01 +# Solution: Check definition_period matches request +# YEAR variables need YEAR periods (2024, not "2024-01") +``` + +**To debug:** +```python +# Enable tracing +sim.trace = True +sim.calculate("variable", period) +# See calculation dependency tree +``` + +## Contributing to Core + +**Before contributing:** +1. Read Core README +2. Understand OpenFisca architecture +3. Test changes in multiple country packages +4. Follow policyengine-standards-skill + +**Development standards:** +- Python 3.10-3.13 +- Black formatting (79-char) +- Comprehensive tests +- No breaking changes without discussion diff --git a/skills/tools-and-apis/policyengine-python-client-skill/SKILL.md b/skills/tools-and-apis/policyengine-python-client-skill/SKILL.md new file mode 100644 index 0000000..0ae7387 --- /dev/null +++ b/skills/tools-and-apis/policyengine-python-client-skill/SKILL.md @@ -0,0 +1,356 @@ +--- +name: policyengine-python-client +description: Using PolicyEngine programmatically via Python client or REST API +--- + +# PolicyEngine Python Client + +This skill covers programmatic access to PolicyEngine for analysts and researchers. + +## Installation + +```bash +# Install the Python client +pip install policyengine + +# Or for local development +pip install policyengine-us # Just the US model (offline) +``` + +## Quick Start: Python Client + +```python +from policyengine import Simulation + +# Create a household +household = { + "people": { + "you": { + "age": {"2024": 30}, + "employment_income": {"2024": 50000} + } + }, + "households": { + "your household": { + "members": ["you"], + "state_name": {"2024": "CA"} + } + } +} + +# Run simulation +sim = Simulation(situation=household, country_id="us") +income_tax = sim.calculate("income_tax", "2024") +``` + +## For Users: Why Use Python? + +**Web app limitations:** +- โœ… Great for exploring policies interactively +- โŒ Can't analyze many households at once +- โŒ Can't automate repetitive analyses +- โŒ Limited customization of charts + +**Python benefits:** +- โœ… Analyze thousands of households in batch +- โœ… Automate regular policy analysis +- โœ… Create custom visualizations +- โœ… Integrate with other data sources +- โœ… Reproducible research + +## For Analysts: Common Workflows + +### Workflow 1: Calculate Your Own Taxes + +```python +from policyengine import Simulation + +# Your household (more complex than web app) +household = { + "people": { + "you": { + "age": {"2024": 35}, + "employment_income": {"2024": 75000}, + "qualified_dividend_income": {"2024": 5000}, + "charitable_cash_donations": {"2024": 3000} + }, + "spouse": { + "age": {"2024": 33}, + "employment_income": {"2024": 60000} + }, + "child1": {"age": {"2024": 8}}, + "child2": {"age": {"2024": 5}} + }, + # ... entities setup (see policyengine-us-skill) +} + +sim = Simulation(situation=household, country_id="us") + +# Calculate specific values +federal_income_tax = sim.calculate("income_tax", "2024") +state_income_tax = sim.calculate("state_income_tax", "2024") +ctc = sim.calculate("ctc", "2024") +eitc = sim.calculate("eitc", "2024") + +print(f"Federal income tax: ${federal_income_tax:,.0f}") +print(f"State income tax: ${state_income_tax:,.0f}") +print(f"Child Tax Credit: ${ctc:,.0f}") +print(f"EITC: ${eitc:,.0f}") +``` + +### Workflow 2: Analyze a Policy Reform + +```python +from policyengine import Simulation + +# Define reform (increase CTC to $5,000) +reform = { + "gov.irs.credits.ctc.amount.base_amount": { + "2024-01-01.2100-12-31": 5000 + } +} + +# Compare baseline vs reform +household = create_household() # Your household definition + +sim_baseline = Simulation(situation=household, country_id="us") +sim_reform = Simulation(situation=household, country_id="us", reform=reform) + +ctc_baseline = sim_baseline.calculate("ctc", "2024") +ctc_reform = sim_reform.calculate("ctc", "2024") + +print(f"CTC baseline: ${ctc_baseline:,.0f}") +print(f"CTC reform: ${ctc_reform:,.0f}") +print(f"Increase: ${ctc_reform - ctc_baseline:,.0f}") +``` + +### Workflow 3: Batch Analysis + +```python +import pandas as pd +from policyengine import Simulation + +# Analyze multiple households +households = [ + {"income": 30000, "children": 0}, + {"income": 50000, "children": 2}, + {"income": 100000, "children": 3}, +] + +results = [] +for h in households: + situation = create_household(income=h["income"], num_children=h["children"]) + sim = Simulation(situation=situation, country_id="us") + + results.append({ + "income": h["income"], + "children": h["children"], + "income_tax": sim.calculate("income_tax", "2024"), + "ctc": sim.calculate("ctc", "2024"), + "eitc": sim.calculate("eitc", "2024") + }) + +df = pd.DataFrame(results) +print(df) +``` + +## Using the REST API Directly + +### Authentication + +**Public access:** +- 100 requests per minute (unauthenticated) +- No API key needed for basic use + +**Authenticated access:** +- 1,000 requests per minute +- Contact hello@policyengine.org for API key + +### Key Endpoints + +**Calculate household impact:** +```python +import requests + +url = "https://api.policyengine.org/us/calculate" +payload = { + "household": household_dict, + "policy_id": reform_id # or None for baseline +} + +response = requests.post(url, json=payload) +result = response.json() +``` + +**Get policy details:** +```python +# Get policy metadata +response = requests.get("https://api.policyengine.org/us/policy/12345") +policy = response.json() +``` + +**Get parameter values:** +```python +# Get current parameter value +response = requests.get( + "https://api.policyengine.org/us/parameter/gov.irs.credits.ctc.amount.base_amount" +) +parameter = response.json() +``` + +### For Full API Documentation + +**OpenAPI spec:** https://api.policyengine.org/docs + +**To explore:** +```bash +# View all endpoints +curl https://api.policyengine.org/docs + +# Test calculate endpoint +curl -X POST https://api.policyengine.org/us/calculate \ + -H "Content-Type: application/json" \ + -d '{"household": {...}}' +``` + +## Limitations and Considerations + +### Rate Limits + +**Unauthenticated:** +- 100 requests/minute +- Good for exploratory analysis + +**Authenticated:** +- 1,000 requests/minute +- Required for production use + +### Data Privacy + +- PolicyEngine does not store household data +- All calculations happen server-side and are not logged +- Reform URLs are public (don't include personal info in reforms) + +### Performance + +**API calls:** +- Simple household: ~200-500ms +- Population impact: ~5-30 seconds (varies by reform) +- Use caching for repeated calculations + +**Local simulation (policyengine-us):** +- Faster for batch analysis +- No rate limits +- No network dependency +- Limited to one country per package + +## Choosing Local vs API + +### Use Local (policyengine-us package) + +**When:** +- Batch analysis of many households +- Need offline capability +- Analyzing parameter sweeps (axes) +- Development/testing + +**Install:** +```bash +pip install policyengine-us # US only +pip install policyengine-uk # UK only +``` + +**Example:** +```python +from policyengine_us import Simulation + +# Works offline +sim = Simulation(situation=household) +``` + +### Use API (policyengine or requests) + +**When:** +- Multi-country analysis +- Using latest model version +- Don't want to manage dependencies +- Integration with web services + +**Example:** +```python +import requests + +# Requires internet +response = requests.post("https://api.policyengine.org/us/calculate", ...) +``` + +## For Contributors: Understanding the Client + +**Repository:** PolicyEngine/policyengine.py + +**To see implementation:** +```bash +# Clone the client +git clone https://github.com/PolicyEngine/policyengine.py + +# See the Simulation class +cat policyengine/simulation.py + +# See API integration +cat policyengine/api.py +``` + +**Architecture:** +- `Simulation` class wraps API calls +- `calculate()` method handles caching +- Transparent fallback between API and local + +## Advanced: Direct Country Package Usage + +For maximum control and performance, use country packages directly: + +```python +from policyengine_us import Simulation + +# Full control over situation structure +situation = { + # Complete situation dictionary + # See policyengine-us-skill for patterns +} + +sim = Simulation(situation=situation) +result = sim.calculate("variable_name", 2024) +``` + +**Benefits:** +- No API dependency +- Faster (no network) +- Full access to all variables +- Use axes for parameter sweeps + +**See policyengine-us-skill for detailed patterns.** + +## Examples and Tutorials + +**PolicyEngine documentation:** +- US: https://policyengine.org/us/docs +- UK: https://policyengine.org/uk/docs + +**Example notebooks:** +- Repository: PolicyEngine/analysis-notebooks +- See policyengine-analysis-skill for analysis patterns + +**Community examples:** +- Blog posts: policyengine.org/us/research +- GitHub discussions: github.com/PolicyEngine discussions + +## Getting Help + +**For usage questions:** +- GitHub Discussions: https://github.com/PolicyEngine/policyengine-us/discussions + +**For bugs:** +- File issues in appropriate repo (policyengine-us, policyengine.py, etc.) + +**For collaboration:** +- Email: hello@policyengine.org