From 335acf9d10a192a0408f2470e4a34a8d641dbecf Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:59:19 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 85 + skills/.gitkeep | 0 skills/using-python-engineering/SKILL.md | 398 ++++ .../async-patterns-and-concurrency.md | 1131 ++++++++++ .../debugging-and-profiling.md | 1047 ++++++++++ .../ml-engineering-workflows.md | 1072 ++++++++++ .../modern-syntax-and-types.md | 848 ++++++++ .../project-structure-and-tooling.md | 1593 ++++++++++++++ .../resolving-mypy-errors.md | 1120 ++++++++++ .../scientific-computing-foundations.md | 981 +++++++++ .../systematic-delinting.md | 1506 ++++++++++++++ .../testing-and-quality.md | 1848 +++++++++++++++++ 14 files changed, 11644 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/.gitkeep create mode 100644 skills/using-python-engineering/SKILL.md create mode 100644 skills/using-python-engineering/async-patterns-and-concurrency.md create mode 100644 skills/using-python-engineering/debugging-and-profiling.md create mode 100644 skills/using-python-engineering/ml-engineering-workflows.md create mode 100644 skills/using-python-engineering/modern-syntax-and-types.md create mode 100644 skills/using-python-engineering/project-structure-and-tooling.md create mode 100644 skills/using-python-engineering/resolving-mypy-errors.md create mode 100644 skills/using-python-engineering/scientific-computing-foundations.md create mode 100644 skills/using-python-engineering/systematic-delinting.md create mode 100644 skills/using-python-engineering/testing-and-quality.md diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..354d5c7 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "axiom-python-engineering", + "description": "Modern Python 3.12+ engineering: types, testing, async, scientific computing, ML workflows - 10 skills", + "version": "1.1.1", + "author": { + "name": "tachyon-beep", + "email": "zhongweili@tubi.tv" + }, + "skills": [ + "./skills" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..9dc63bf --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# axiom-python-engineering + +Modern Python 3.12+ engineering: types, testing, async, scientific computing, ML workflows - 10 skills diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..f0e96ac --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,85 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:tachyon-beep/skillpacks:plugins/axiom-python-engineering", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "d2e2cd2da633625dc09d9fa35a3dc0f60c3cafe5", + "treeHash": "0d643778d6d2de81b6df637544298a6b9a3214de1a3ca03e3ba9d6eaf0cd3f4c", + "generatedAt": "2025-11-28T10:28:30.940625Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "axiom-python-engineering", + "description": "Modern Python 3.12+ engineering: types, testing, async, scientific computing, ML workflows - 10 skills", + "version": "1.1.1" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "501cc915afac6582b68359b9cd06b82b4f156ca4439348d16be2515a17d4425e" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "f8c9d4521cad5f45e5aade5cc4aa8041a256579a1c8b00041d146bcb830a01b8" + }, + { + "path": "skills/.gitkeep", + "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + }, + { + "path": "skills/using-python-engineering/resolving-mypy-errors.md", + "sha256": "f5ff38011ecf1304093b84cfa59f41daf585d36a9d75598ac4a045e7c41ceafa" + }, + { + "path": "skills/using-python-engineering/ml-engineering-workflows.md", + "sha256": "3b45246f1477f341e770877a95fd5b67f25774e1b5e7c21898aba2f1b27fa8d0" + }, + { + "path": "skills/using-python-engineering/project-structure-and-tooling.md", + "sha256": "3e225c744c5138ec6c4945f60e6bc959aac1663d1a4cfb741efaf0e622351dc2" + }, + { + "path": "skills/using-python-engineering/modern-syntax-and-types.md", + "sha256": "56a51be261616cc49041af9dcb5943f6e5b3f2424b84669a6f7df84a5b6458c3" + }, + { + "path": "skills/using-python-engineering/systematic-delinting.md", + "sha256": "57df2647863de7e4937b4c5d92cc4832559e10f0a77917f64802bf4bf89ace83" + }, + { + "path": "skills/using-python-engineering/testing-and-quality.md", + "sha256": "9515f2638edfaaedf0d8664beb141de376f5f6d233aad0fd128588c1fffc257d" + }, + { + "path": "skills/using-python-engineering/scientific-computing-foundations.md", + "sha256": "2f1157d97cbc98ed3b7fbf2489b9e5ef8a6c0c05847095bd5b0acb2d45f4cb71" + }, + { + "path": "skills/using-python-engineering/SKILL.md", + "sha256": "f265281bc5cd8efd8e3e034ddcbad83038485b2789aa01e0480024cf9f34aee4" + }, + { + "path": "skills/using-python-engineering/async-patterns-and-concurrency.md", + "sha256": "83003bd109a5393c689415fe9529a2fb8b77cbc10e4aaf5ec706a609e1122b50" + }, + { + "path": "skills/using-python-engineering/debugging-and-profiling.md", + "sha256": "9073f36ae95bcc55458bc78aedacf6e005d1fb6b5d60b883fc7ff6b1e4d61260" + } + ], + "dirSha256": "0d643778d6d2de81b6df637544298a6b9a3214de1a3ca03e3ba9d6eaf0cd3f4c" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/.gitkeep b/skills/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/skills/using-python-engineering/SKILL.md b/skills/using-python-engineering/SKILL.md new file mode 100644 index 0000000..481758b --- /dev/null +++ b/skills/using-python-engineering/SKILL.md @@ -0,0 +1,398 @@ +--- +name: using-python-engineering +description: Routes to appropriate Python specialist skill based on symptoms and problem type +mode: true +--- + +# Using Python Engineering + +## Overview + +This meta-skill routes you to the right Python specialist based on symptoms. Python engineering problems fall into distinct categories that require specialized knowledge. Load this skill when you encounter Python-specific issues but aren't sure which specialized skill to use. + +**Core Principle**: Different Python problems require different specialists. Match symptoms to the appropriate specialist skill. Don't guess at solutions—route to the expert. + +## When to Use + +Load this skill when: +- Working with Python and encountering problems +- User mentions: "Python", "type hints", "mypy", "pytest", "async", "pandas", "numpy" +- Need to implement Python projects or optimize performance +- Setting up Python tooling or fixing lint warnings +- Debugging Python code or profiling performance + +**Don't use for**: Non-Python languages, algorithm theory (not Python-specific), deployment infrastructure (not Python-specific) + +--- + +## Routing by Symptom + +### Type Errors and Type Hints + +**Symptoms - Learning Type Syntax**: +- "How to use type hints?" +- "Python 3.12 type syntax" +- "Generic types" +- "Protocol vs ABC" +- "TypeVar usage" +- "Configure mypy/pyright" + +**Route to**: See [modern-syntax-and-types.md](modern-syntax-and-types.md) for comprehensive type system guidance. + +**Why**: Learning type hint syntax, patterns, and configuration. + +**Symptoms - Fixing Type Errors**: +- "mypy error: Incompatible types" +- "mypy error: Argument has incompatible type" +- "How to fix mypy errors?" +- "100+ mypy errors, where to start?" +- "When to use type: ignore?" +- "Add types to legacy code" +- "Understanding mypy error messages" + +**Route to**: See [resolving-mypy-errors.md](resolving-mypy-errors.md) for systematic mypy error resolution. + +**Why**: Resolving type errors requires systematic methodology, understanding error messages, and knowing when to fix vs ignore. + +**Example queries**: +- "Getting mypy error about incompatible types" → [resolving-mypy-errors.md](resolving-mypy-errors.md) +- "How to use Python 3.12 type parameter syntax?" → [modern-syntax-and-types.md](modern-syntax-and-types.md) +- "Fix 150 mypy errors systematically" → [resolving-mypy-errors.md](resolving-mypy-errors.md) + +--- + +### Project Setup and Tooling + +**Symptoms**: +- "How to structure my Python project?" +- "Setup pyproject.toml" +- "Configure ruff/black/mypy" +- "Dependency management" +- "Pre-commit hooks" +- "Package my project" +- "src layout vs flat layout" + +**Route to**: [project-structure-and-tooling.md](project-structure-and-tooling.md) + +**Why**: Project setup involves multiple tools (ruff, mypy, pre-commit) and architectural decisions (src vs flat layout). Need comprehensive setup guide. + +**Example queries**: +- "Starting new Python project, how to set up?" +- "Configure ruff for my team" +- "Should I use poetry or pip-tools?" + +--- + +### Lint Warnings and Delinting + +**Symptoms**: +- "Too many lint warnings" +- "Fix ruff errors" +- "How to delint legacy code?" +- "Systematic approach to fixing lint" +- "Don't want to disable warnings" +- "Clean up codebase lint" + +**Route to**: [systematic-delinting.md](systematic-delinting.md) + +**Why**: Delinting requires systematic methodology to fix warnings without disabling them or over-refactoring. Process-driven approach needed. + +**Example queries**: +- "1000+ lint warnings, where to start?" +- "Fix lint warnings systematically" +- "Legacy code has no linting" + +**Note**: If setting UP linting (not fixing), route to [project-structure-and-tooling.md](project-structure-and-tooling.md) first. + +--- + +### Testing Issues + +**Symptoms**: +- "pytest not working" +- "Flaky tests" +- "How to structure tests?" +- "Fixture issues" +- "Mock/patch problems" +- "Test coverage" +- "Property-based testing" + +**Route to**: [testing-and-quality.md](testing-and-quality.md) + +**Why**: Testing requires understanding pytest architecture, fixture scopes, mocking patterns, and test organization strategies. + +**Example queries**: +- "Tests fail intermittently" +- "How to use pytest fixtures properly?" +- "Improve test coverage" + +--- + +### Async/Await Issues + +**Symptoms**: +- "asyncio not working" +- "async/await errors" +- "Event loop issues" +- "Blocking the event loop" +- "TaskGroup (Python 3.11+)" +- "Async context managers" +- "When to use async?" + +**Route to**: [async-patterns-and-concurrency.md](async-patterns-and-concurrency.md) + +**Why**: Async programming has unique patterns, pitfalls (blocking event loop), and requires understanding structured concurrency. + +**Example queries**: +- "Getting 'coroutine never awaited' error" +- "How to use Python 3.11 TaskGroup?" +- "Async code is slow" + +--- + +### Performance and Profiling + +**Symptoms**: +- "Python code is slow" +- "How to profile?" +- "Memory leak" +- "Optimize performance" +- "Bottleneck identification" +- "CPU profiling" +- "Memory profiling" + +**Route to**: [debugging-and-profiling.md](debugging-and-profiling.md) FIRST + +**Why**: MUST profile before optimizing. Many "performance" problems are actually I/O or algorithm issues. Profile to identify the real bottleneck. + +**After profiling**, may route to: +- [async-patterns-and-concurrency.md](async-patterns-and-concurrency.md) if I/O-bound +- [scientific-computing-foundations.md](scientific-computing-foundations.md) if array operations slow +- Same skill for optimization strategies + +**Example queries**: +- "Code is slow, how to speed up?" +- "Find bottleneck in my code" +- "Memory usage too high" + +--- + +### Array and DataFrame Operations + +**Symptoms**: +- "NumPy operations" +- "Pandas DataFrame slow" +- "Vectorization" +- "Array performance" +- "Replace loops with numpy" +- "DataFrame best practices" +- "Large dataset processing" + +**Route to**: [scientific-computing-foundations.md](scientific-computing-foundations.md) + +**Why**: NumPy/pandas have specific patterns for vectorization, memory efficiency, and avoiding anti-patterns (iterrows). + +**Example queries**: +- "How to vectorize this loop?" +- "Pandas operation too slow" +- "DataFrame memory usage high" + +--- + +### ML Experiment Tracking and Workflows + +**Symptoms**: +- "Track ML experiments" +- "MLflow setup" +- "Reproducible ML pipelines" +- "ML model lifecycle" +- "Hyperparameter management" +- "ML monitoring" +- "Data versioning" + +**Route to**: [ml-engineering-workflows.md](ml-engineering-workflows.md) + +**Why**: ML workflows require experiment tracking, reproducibility patterns, configuration management, and monitoring strategies. + +**Example queries**: +- "How to track experiments with MLflow?" +- "Make ML training reproducible" +- "Monitor model in production" + +--- + +## Cross-Cutting Scenarios + +### Multiple Skills Needed + +Some scenarios require multiple specialized skills in sequence: + +**New Python project setup with ML**: +1. Route to [project-structure-and-tooling.md](project-structure-and-tooling.md) (setup) +2. THEN [ml-engineering-workflows.md](ml-engineering-workflows.md) (ML specifics) + +**Legacy code cleanup**: +1. Route to [project-structure-and-tooling.md](project-structure-and-tooling.md) (setup linting) +2. THEN [systematic-delinting.md](systematic-delinting.md) (fix warnings) + +**Slow pandas code**: +1. Route to [debugging-and-profiling.md](debugging-and-profiling.md) (profile) +2. THEN [scientific-computing-foundations.md](scientific-computing-foundations.md) (optimize) + +**Type hints for existing code**: +1. Route to [project-structure-and-tooling.md](project-structure-and-tooling.md) (setup mypy) +2. THEN `modern-syntax-and-types` (add types) + +**Load in order of execution**: Setup before optimization, diagnosis before fixes, structure before specialization. + +--- + +## Ambiguous Queries - Ask First + +When symptom unclear, ASK ONE clarifying question: + +**"Fix my Python code"** +→ Ask: "What specific issue? Type errors? Lint warnings? Tests failing? Performance?" + +**"Optimize my code"** +→ Ask: "Optimize what? Speed? Memory? Code quality?" + +**"Setup Python project"** +→ Ask: "General project or ML-specific? Starting fresh or fixing existing?" + +**"My code doesn't work"** +→ Ask: "What's broken? Import errors? Type errors? Runtime errors? Tests?" + +**Never guess when ambiguous. Ask once, route accurately.** + +--- + +## Common Routing Mistakes + +| Symptom | Wrong Route | Correct Route | Why | +|---------|-------------|---------------|-----| +| "Code slow" | async-patterns | debugging-and-profiling FIRST | Don't optimize without profiling | +| "Setup linting and fix" | systematic-delinting only | project-structure THEN delinting | Setup before fixing | +| "Pandas slow" | debugging only | debugging THEN scientific-computing | Profile then vectorize | +| "Add type hints" | modern-syntax only | project-structure THEN modern-syntax | Setup mypy first | +| "Fix 1000 lint warnings" | project-structure | systematic-delinting | Process for fixing, not setup | +| "Fix mypy errors" | modern-syntax-and-types | resolving-mypy-errors | Syntax vs resolution process | +| "100 mypy errors" | modern-syntax-and-types | resolving-mypy-errors | Need systematic approach | + +**Key principle**: Diagnosis before solutions, setup before optimization, profile before performance fixes. + +--- + +## Red Flags - Stop and Route + +If you catch yourself about to: +- Suggest "use async" for slow code → Route to [debugging-and-profiling.md](debugging-and-profiling.md) to profile first +- Show pytest example → Route to [testing-and-quality.md](testing-and-quality.md) for complete patterns +- Suggest "just fix the lint warnings" → Route to [systematic-delinting.md](systematic-delinting.md) for methodology +- Show type hint syntax → Route to `modern-syntax-and-types` for comprehensive guide +- Suggest "use numpy instead" → Route to [scientific-computing-foundations.md](scientific-computing-foundations.md) for vectorization patterns + +**All of these mean: You're about to give incomplete advice. Route to the specialist instead.** + +--- + +## Common Rationalizations (Don't Do These) + +| Excuse | Reality | What To Do | +|--------|---------|------------| +| "User is rushed, skip routing" | Routing takes 5 seconds. Wrong fix wastes hours. | Route anyway - specialists have quick answers | +| "Simple question" | Simple questions deserve complete answers. | Route to specialist for comprehensive coverage | +| "Just need quick syntax" | Syntax without context leads to misuse. | Route to get syntax + patterns + anti-patterns | +| "User sounds experienced" | Experience in one area ≠ expertise in all Python. | Route based on symptoms, not perceived skill | +| "Already tried X" | May have done X wrong or incompletely. | Route to specialist to verify X properly | +| "Too many skills" | 8 focused skills > 1 overwhelming wall of text. | Use router to navigate - that's its purpose | + +**If you catch yourself thinking ANY of these, STOP and route to the specialist.** + +--- + +## Red Flags Checklist - Self-Check Before Answering + +Before giving ANY Python advice, ask yourself: + +1. ❓ **Did I identify the symptom?** + - If no → Read query again, identify symptoms + +2. ❓ **Is this symptom in my routing table?** + - If yes → Route to that specialist + - If no → Ask clarifying question + +3. ❓ **Am I about to give advice directly?** + - If yes → STOP. Why am I not routing? + - Check rationalization table - am I making excuses? + +4. ❓ **Is this a diagnosis issue or solution issue?** + - Diagnosis → Route to profiling/debugging skill FIRST + - Solution → Route to appropriate implementation skill + +5. ❓ **Is query ambiguous?** + - If yes → Ask ONE clarifying question + - If no → Route confidently + +6. ❓ **Am I feeling pressure to skip routing?** + - Time pressure → Route anyway (faster overall) + - Complexity → Route anyway (specialists handle complexity) + - User confidence → Route anyway (verify assumptions) + - "Simple" question → Route anyway (simple deserves correct) + +**If you failed ANY check above, do NOT give direct advice. Route to specialist or ask clarifying question.** + +--- + +## Python Engineering Specialist Skills + +After routing, load the appropriate specialist skill for detailed guidance: + +1. [modern-syntax-and-types.md](modern-syntax-and-types.md) - Type hints, mypy/pyright, Python 3.10-3.12 features, generics, protocols +2. [resolving-mypy-errors.md](resolving-mypy-errors.md) - Systematic mypy error resolution, type: ignore best practices, typing legacy code +3. [project-structure-and-tooling.md](project-structure-and-tooling.md) - pyproject.toml, ruff, pre-commit, dependency management, packaging +4. [systematic-delinting.md](systematic-delinting.md) - Process for fixing lint warnings without disabling or over-refactoring +5. [testing-and-quality.md](testing-and-quality.md) - pytest patterns, fixtures, mocking, coverage, property-based testing +6. [async-patterns-and-concurrency.md](async-patterns-and-concurrency.md) - async/await, asyncio, TaskGroup, structured concurrency, threading +7. [scientific-computing-foundations.md](scientific-computing-foundations.md) - NumPy/pandas, vectorization, memory efficiency, large datasets +8. [ml-engineering-workflows.md](ml-engineering-workflows.md) - MLflow, experiment tracking, reproducibility, monitoring, model lifecycle +9. [debugging-and-profiling.md](debugging-and-profiling.md) - pdb/debugpy, cProfile, memory_profiler, optimization strategies + +--- + +## When NOT to Use Python Skills + +**Skip Python pack when**: +- Non-Python language (use appropriate language pack) +- Algorithm selection (use computer science / algorithms pack) +- Infrastructure/deployment (use DevOps/infrastructure pack) +- Database design (use database pack) + +**Python pack is for**: Python-specific implementation, tooling, patterns, debugging, and optimization. + +--- + +## Diagnosis-First Principle + +**Critical**: Many Python issues require diagnosis before solutions: + +| Issue Type | Diagnosis Skill | Then Solution Skill | +|------------|----------------|---------------------| +| Performance | debugging-and-profiling | async or scientific-computing | +| Slow arrays | debugging-and-profiling | scientific-computing-foundations | +| Type errors | modern-syntax-and-types | modern-syntax-and-types (same) | +| Lint warnings | systematic-delinting | systematic-delinting (same) | + +**If unclear what's wrong, route to diagnostic skill first.** + +--- + +## Integration Notes + +**Phase 1 - Standalone**: Python skills are self-contained + +**Future cross-references**: +- superpowers:test-driven-development (TDD methodology before implementing) +- superpowers:systematic-debugging (systematic debugging before profiling) + +**Current focus**: Route within Python pack only. Other packs handle other concerns. diff --git a/skills/using-python-engineering/async-patterns-and-concurrency.md b/skills/using-python-engineering/async-patterns-and-concurrency.md new file mode 100644 index 0000000..095b870 --- /dev/null +++ b/skills/using-python-engineering/async-patterns-and-concurrency.md @@ -0,0 +1,1131 @@ + +# Async Patterns and Concurrency + +## Overview + +**Core Principle:** Async code is about I/O concurrency, not CPU parallelism. Use async when waiting for network, files, or databases. Don't use async to speed up CPU-bound work. + +Python's async/await (asyncio) enables single-threaded concurrency through cooperative multitasking. Structured concurrency (TaskGroup in 3.11+) makes async code safer and easier to reason about. The most common mistake: blocking the event loop with synchronous operations. + +## When to Use + +**Use this skill when:** +- "asyncio not working" +- "async/await errors" +- "Event loop issues" +- "Coroutine never awaited" +- "How to use TaskGroup?" +- "When to use async?" +- "Async code is slow" +- "Blocking the event loop" + +**Don't use when:** +- CPU-bound work (use multiprocessing or threads) +- Setting up project (use project-structure-and-tooling) +- Profiling needed (use debugging-and-profiling first) + +**Symptoms triggering this skill:** +- RuntimeWarning: coroutine was never awaited +- Event loop errors +- Async functions not running concurrently +- Need to parallelize I/O operations + + +## Async Fundamentals + +### When to Use Async vs Sync + +```python +# ❌ WRONG: Using async for CPU-bound work +async def calculate_fibonacci(n: int) -> int: + if n < 2: + return n + return await calculate_fibonacci(n-1) + await calculate_fibonacci(n-2) +# Problem: No I/O, just CPU work. Async adds overhead without benefit. + +# ✅ CORRECT: Use regular function for CPU work +def calculate_fibonacci(n: int) -> int: + if n < 2: + return n + return calculate_fibonacci(n-1) + calculate_fibonacci(n-2) + +# ✅ CORRECT: Use async for I/O-bound work +async def fetch_user(user_id: int) -> dict: + async with aiohttp.ClientSession() as session: + async with session.get(f"https://api.example.com/users/{user_id}") as resp: + return await resp.json() +# Async shines: waiting for network response, can do other work + +# ✅ CORRECT: Use async when orchestrating multiple I/O operations +async def fetch_all_users(user_ids: list[int]) -> list[dict]: + async with aiohttp.ClientSession() as session: + tasks = [fetch_user(session, uid) for uid in user_ids] + return await asyncio.gather(*tasks) +# Multiple network calls run concurrently +``` + +**Why this matters**: Async adds complexity. Only use when you benefit from I/O concurrency. For CPU work, use threads or multiprocessing. + +### Basic async/await Syntax + +```python +# ❌ WRONG: Forgetting await +async def get_data(): + return fetch_from_api() # Returns coroutine, doesn't execute! + +result = get_data() # RuntimeWarning: coroutine never awaited +print(result) # Prints , not data + +# ✅ CORRECT: Always await async functions +async def get_data(): + return await fetch_from_api() + +# ✅ CORRECT: Running from sync code +import asyncio + +def main(): + result = asyncio.run(get_data()) + print(result) + +# ✅ CORRECT: Running from async code +async def main(): + result = await get_data() + print(result) + +asyncio.run(main()) +``` + +**Why this matters**: Async functions return coroutines. Must `await` them to execute. `asyncio.run()` bridges sync and async worlds. + +### Running the Event Loop + +```python +# ❌ WRONG: Running event loop multiple times +import asyncio + +asyncio.run(task1()) +asyncio.run(task2()) # Creates new event loop, inefficient + +# ✅ CORRECT: Single event loop for all async work +async def main(): + await task1() + await task2() + +asyncio.run(main()) + +# ❌ WRONG: Mixing asyncio.run and manual loop management +loop = asyncio.get_event_loop() +loop.run_until_complete(task1()) +asyncio.run(task2()) # Error: loop already running + +# ✅ CORRECT: Use asyncio.run() (Python 3.7+) +asyncio.run(main()) + +# ✅ CORRECT: For advanced cases, manual loop management +async def main(): + await task1() + await task2() + +loop = asyncio.new_event_loop() +asyncio.set_event_loop(loop) +try: + loop.run_until_complete(main()) +finally: + loop.close() +``` + +**Why this matters**: `asyncio.run()` handles loop creation and cleanup automatically. Prefer it unless you need fine-grained control. + + +## Structured Concurrency with TaskGroup (Python 3.11+) + +### TaskGroup Basics + +```python +# ❌ WRONG: Creating tasks without proper cleanup (old style) +async def fetch_all_old(urls: list[str]) -> list[str]: + tasks = [] + for url in urls: + task = asyncio.create_task(fetch(url)) + tasks.append(task) + + results = await asyncio.gather(*tasks) + return results +# Problem: If one task fails, others continue. No automatic cleanup. + +# ✅ CORRECT: TaskGroup (Python 3.11+) +async def fetch_all(urls: list[str]) -> list[str]: + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(fetch(url)) for url in urls] + + # When exiting context, all tasks guaranteed complete or cancelled + return [task.result() for task in tasks] + +# Why this matters: TaskGroup ensures: +# 1. All tasks complete before proceeding +# 2. If any task fails, all others cancelled +# 3. Automatic cleanup, no leaked tasks +``` + +### Handling Errors with TaskGroup + +```python +# ❌ WRONG: Silent failures with gather +async def process_all_gather(items: list[str]) -> list[str]: + tasks = [asyncio.create_task(process(item)) for item in items] + results = await asyncio.gather(*tasks, return_exceptions=True) + return [r for r in results if not isinstance(r, Exception)] +# Problem: Errors silently ignored, hard to debug + +# ✅ CORRECT: TaskGroup raises ExceptionGroup +async def process_all(items: list[str]) -> list[str]: + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(process(item)) for item in items] + return [task.result() for task in tasks] + +# Usage with error handling +try: + results = await process_all(items) +except* ValueError as eg: + # Handle all ValueErrors + for exc in eg.exceptions: + log.error(f"Validation error: {exc}") +except* ConnectionError as eg: + # Handle all ConnectionErrors + for exc in eg.exceptions: + log.error(f"Network error: {exc}") + +# ✅ CORRECT: Selective error handling with gather +async def process_with_fallback(items: list[str]) -> list[str]: + tasks = [asyncio.create_task(process(item)) for item in items] + results = await asyncio.gather(*tasks, return_exceptions=True) + + processed = [] + for item, result in zip(items, results): + if isinstance(result, Exception): + log.warning(f"Failed to process {item}: {result}") + processed.append(None) # Or default value + else: + processed.append(result) + return processed +``` + +**Why this matters**: TaskGroup provides structured concurrency with automatic cleanup. Use `gather` when you need partial results despite failures. + +### Timeout Handling + +```python +# ❌ WRONG: No timeout on I/O operations +async def fetch_data(url: str) -> str: + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() +# Problem: Can hang forever if server doesn't respond + +# ✅ CORRECT: Timeout with asyncio.timeout (Python 3.11+) +async def fetch_data(url: str) -> str: + async with asyncio.timeout(10.0): # 10 second timeout + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() +# Raises TimeoutError after 10 seconds + +# ✅ CORRECT: Timeout on TaskGroup +async def fetch_all_with_timeout(urls: list[str]) -> list[str]: + async with asyncio.timeout(30.0): # Total timeout + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(fetch_data(url)) for url in urls] + return [task.result() for task in tasks] + +# ✅ CORRECT: Individual timeouts (Python <3.11) +async def fetch_with_timeout_old(url: str) -> str: + try: + return await asyncio.wait_for(fetch_data(url), timeout=10.0) + except asyncio.TimeoutError: + log.error(f"Timeout fetching {url}") + raise +``` + +**Why this matters**: Always timeout I/O operations. Network calls can hang indefinitely. `asyncio.timeout()` (3.11+) is cleaner than `wait_for()`. + + +## Async Context Managers + +### Basic Async Context Manager + +```python +# ❌ WRONG: Using sync context manager in async code +class DatabaseConnection: + def __enter__(self): + self.conn = connect_to_db() # Blocking I/O! + return self.conn + + def __exit__(self, exc_type, exc_val, exc_tb): + self.conn.close() # Blocking I/O! + +async def query(): + with DatabaseConnection() as conn: # Blocks event loop + return await conn.query("SELECT * FROM users") + +# ✅ CORRECT: Async context manager +class AsyncDatabaseConnection: + async def __aenter__(self): + self.conn = await async_connect_to_db() + return self.conn + + async def __aexit__(self, exc_type, exc_val, exc_tb): + await self.conn.close() + +async def query(): + async with AsyncDatabaseConnection() as conn: + return await conn.query("SELECT * FROM users") +``` + +### Using contextlib for Async Context Managers + +```python +from contextlib import asynccontextmanager + +# ✅ CORRECT: Simple async context manager with decorator +@asynccontextmanager +async def database_connection(host: str): + conn = await connect_to_database(host) + try: + yield conn + finally: + await conn.close() + +# Usage +async def fetch_users(): + async with database_connection("localhost") as conn: + return await conn.query("SELECT * FROM users") + +# ✅ CORRECT: Resource pool management +@asynccontextmanager +async def http_session(): + session = aiohttp.ClientSession() + try: + yield session + finally: + await session.close() + +async def fetch_multiple(urls: list[str]): + async with http_session() as session: + tasks = [fetch_url(session, url) for url in urls] + return await asyncio.gather(*tasks) +``` + +**Why this matters**: Async context managers ensure resources cleaned up properly. Use `@asynccontextmanager` for simple cases, `__aenter__/__aexit__` for complex ones. + + +## Async Iterators and Generators + +### Async Iterators + +```python +# ❌ WRONG: Sync iterator doing async work +class DataFetcher: + def __init__(self, ids: list[int]): + self.ids = ids + self.index = 0 + + def __iter__(self): + return self + + def __next__(self): + if self.index >= len(self.ids): + raise StopIteration + data = asyncio.run(fetch_data(self.ids[self.index])) # Don't do this! + self.index += 1 + return data + +# ✅ CORRECT: Async iterator +class AsyncDataFetcher: + def __init__(self, ids: list[int]): + self.ids = ids + self.index = 0 + + def __aiter__(self): + return self + + async def __anext__(self): + if self.index >= len(self.ids): + raise StopAsyncIteration + data = await fetch_data(self.ids[self.index]) + self.index += 1 + return data + +# Usage +async def process_all(): + async for data in AsyncDataFetcher([1, 2, 3, 4]): + print(data) +``` + +### Async Generators + +```python +# ✅ CORRECT: Async generator (simpler than iterator) +async def fetch_users_paginated(page_size: int = 100): + page = 0 + while True: + users = await fetch_page(page, page_size) + if not users: + break + for user in users: + yield user + page += 1 + +# Usage +async def process_all_users(): + async for user in fetch_users_paginated(): + await process_user(user) + +# ✅ CORRECT: Async generator with cleanup +async def stream_file_lines(path: str): + async with aiofiles.open(path) as f: + async for line in f: + yield line.strip() + +# Usage with async comprehension +async def load_data(path: str) -> list[str]: + return [line async for line in stream_file_lines(path)] +``` + +**Why this matters**: Async iterators/generators enable streaming I/O-bound data without loading everything into memory. Essential for large datasets. + + +## Common Async Pitfalls + +### Blocking the Event Loop + +```python +# ❌ WRONG: Blocking operation in async function +import time +import requests + +async def fetch_data(url: str) -> str: + # Blocks entire event loop for 2 seconds! + time.sleep(2) + + # Also blocks event loop (requests is synchronous) + response = requests.get(url) + return response.text + +# ✅ CORRECT: Use async sleep and async HTTP +import asyncio +import aiohttp + +async def fetch_data(url: str) -> str: + await asyncio.sleep(2) # Non-blocking sleep + + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() + +# ✅ CORRECT: If must use blocking code, run in executor +import asyncio +import requests + +async def fetch_data_sync(url: str) -> str: + loop = asyncio.get_running_loop() + + # Run blocking code in thread pool + response = await loop.run_in_executor( + None, # Use default executor + requests.get, + url + ) + return response.text + +# ✅ CORRECT: CPU-bound work in process pool +async def heavy_computation(data: bytes) -> bytes: + loop = asyncio.get_running_loop() + + # Run in process pool for CPU work + with concurrent.futures.ProcessPoolExecutor() as pool: + result = await loop.run_in_executor(pool, process_data, data) + return result +``` + +**Why this matters**: Blocking the event loop stops ALL async code. Use async libraries (aiohttp not requests), async sleep, or run_in_executor for blocking code. + +### Forgetting to Await + +```python +# ❌ WRONG: Not awaiting async functions +async def main(): + fetch_data() # Returns coroutine, doesn't run! + print("Done") + +# ✅ CORRECT: Always await +async def main(): + await fetch_data() + print("Done") + +# ❌ WRONG: Collecting coroutines without running them +async def process_all(items: list[str]): + results = [process_item(item) for item in items] # List of coroutines! + return results + +# ✅ CORRECT: Await or gather +async def process_all(items: list[str]): + tasks = [asyncio.create_task(process_item(item)) for item in items] + return await asyncio.gather(*tasks) + +# ✅ BETTER: TaskGroup (Python 3.11+) +async def process_all(items: list[str]): + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(process_item(item)) for item in items] + return [task.result() for task in tasks] +``` + +### Shared Mutable State + +```python +# ❌ WRONG: Shared mutable state without locks +counter = 0 + +async def increment(): + global counter + temp = counter + await asyncio.sleep(0) # Yield control + counter = temp + 1 # Race condition! + +async def main(): + await asyncio.gather(*[increment() for _ in range(100)]) + print(counter) # Not 100! Lost updates due to race + +# ✅ CORRECT: Use asyncio.Lock +counter = 0 +lock = asyncio.Lock() + +async def increment(): + global counter + async with lock: + temp = counter + await asyncio.sleep(0) + counter = temp + 1 + +async def main(): + await asyncio.gather(*[increment() for _ in range(100)]) + print(counter) # 100, as expected + +# ✅ BETTER: Avoid shared state +async def increment(current: int) -> int: + await asyncio.sleep(0) + return current + 1 + +async def main(): + results = await asyncio.gather(*[increment(i) for i in range(100)]) + print(sum(results)) +``` + +**Why this matters**: Async code is concurrent. Race conditions exist. Use locks or avoid shared mutable state. + + +## Async Patterns + +### Fire and Forget + +```python +# ❌ WRONG: Creating task without tracking it +async def main(): + asyncio.create_task(background_job()) # Task may not complete! + return "Done" + +# ✅ CORRECT: Track background tasks +background_tasks = set() + +async def main(): + task = asyncio.create_task(background_job()) + background_tasks.add(task) + task.add_done_callback(background_tasks.discard) + return "Done" + +# ✅ CORRECT: Wait for background tasks before exit +async def main(): + task = asyncio.create_task(background_job()) + try: + return "Done" + finally: + await task +``` + +### Retry with Exponential Backoff + +```python +# ❌ WRONG: Retry without delay +async def fetch_with_retry(url: str, max_retries: int = 3) -> str: + for attempt in range(max_retries): + try: + return await fetch_data(url) + except Exception: + if attempt == max_retries - 1: + raise + # Hammers server, no backoff + +# ✅ CORRECT: Exponential backoff with jitter +async def fetch_with_retry( + url: str, + max_retries: int = 3, + base_delay: float = 1.0 +) -> str: + for attempt in range(max_retries): + try: + return await fetch_data(url) + except Exception as e: + if attempt == max_retries - 1: + raise + + # Exponential backoff with jitter + delay = base_delay * (2 ** attempt) + random.uniform(0, 1) + log.warning(f"Retry {attempt + 1}/{max_retries} after {delay:.2f}s: {e}") + await asyncio.sleep(delay) + + raise RuntimeError("Unreachable") +``` + +### Rate Limiting + +```python +# ❌ WRONG: No rate limiting +async def fetch_all(urls: list[str]) -> list[str]: + tasks = [asyncio.create_task(fetch(url)) for url in urls] + return await asyncio.gather(*tasks) +# Can overwhelm server with 1000s of concurrent requests + +# ✅ CORRECT: Semaphore for concurrent request limit +async def fetch_all(urls: list[str], max_concurrent: int = 10) -> list[str]: + semaphore = asyncio.Semaphore(max_concurrent) + + async def fetch_with_sem(url: str) -> str: + async with semaphore: + return await fetch(url) + + tasks = [asyncio.create_task(fetch_with_sem(url)) for url in urls] + return await asyncio.gather(*tasks) + +# ✅ CORRECT: Token bucket rate limiting +class RateLimiter: + def __init__(self, rate: float, capacity: int): + self.rate = rate # Tokens per second + self.capacity = capacity + self.tokens = capacity + self.last_update = asyncio.get_event_loop().time() + self.lock = asyncio.Lock() + + async def acquire(self): + async with self.lock: + now = asyncio.get_event_loop().time() + elapsed = now - self.last_update + self.tokens = min(self.capacity, self.tokens + elapsed * self.rate) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) / self.rate + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + +# Usage +rate_limiter = RateLimiter(rate=10.0, capacity=10) # 10 req/sec + +async def fetch_with_limit(url: str) -> str: + await rate_limiter.acquire() + return await fetch(url) +``` + +**Why this matters**: Rate limiting prevents overwhelming servers and respects API limits. Semaphore limits concurrency, token bucket smooths bursts. + +### Async Queue for Producer/Consumer + +```python +# ✅ CORRECT: Producer/consumer with asyncio.Queue +import asyncio + +async def producer(queue: asyncio.Queue, items: list[str]): + for item in items: + await queue.put(item) + await asyncio.sleep(0.1) # Simulate work + + # Signal completion + await queue.put(None) + +async def consumer(queue: asyncio.Queue, consumer_id: int): + while True: + item = await queue.get() + + if item is None: + # Re-queue sentinel for other consumers + await queue.put(None) + break + + print(f"Consumer {consumer_id} processing {item}") + await asyncio.sleep(0.2) # Simulate work + queue.task_done() + +async def main(): + queue = asyncio.Queue(maxsize=10) + items = [f"item_{i}" for i in range(20)] + + # Start producer and consumers + async with asyncio.TaskGroup() as tg: + tg.create_task(producer(queue, items)) + for i in range(3): + tg.create_task(consumer(queue, i)) + + # Wait for all items processed + await queue.join() + +# ✅ CORRECT: Multiple producers, multiple consumers +async def worker(name: str, queue: asyncio.Queue): + while True: + item = await queue.get() + if item is None: + break + + await process_item(item) + queue.task_done() + +async def main(): + queue = asyncio.Queue() + + # Create workers + workers = [asyncio.create_task(worker(f"worker-{i}", queue)) for i in range(5)] + + # Add work + for item in items: + await queue.put(item) + + # Wait for all work done + await queue.join() + + # Stop workers + for _ in workers: + await queue.put(None) + await asyncio.gather(*workers) +``` + +**Why this matters**: asyncio.Queue is thread-safe and async-safe. Perfect for producer/consumer patterns in async code. + + +## Threading vs Async vs Multiprocessing + +### When to Use What + +```python +# CPU-bound work: Use multiprocessing +def cpu_bound(n: int) -> int: + return sum(i * i for i in range(n)) + +async def process_cpu_tasks(data: list[int]) -> list[int]: + loop = asyncio.get_running_loop() + with concurrent.futures.ProcessPoolExecutor() as pool: + results = await asyncio.gather(*[ + loop.run_in_executor(pool, cpu_bound, n) for n in data + ]) + return results + +# I/O-bound work: Use async +async def io_bound(url: str) -> str: + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() + +async def process_io_tasks(urls: list[str]) -> list[str]: + return await asyncio.gather(*[io_bound(url) for url in urls]) + +# Blocking I/O (no async library): Use threads +def blocking_io(path: str) -> str: + with open(path) as f: # Blocking file I/O + return f.read() + +async def process_files(paths: list[str]) -> list[str]: + loop = asyncio.get_running_loop() + with concurrent.futures.ThreadPoolExecutor() as pool: + results = await asyncio.gather(*[ + loop.run_in_executor(pool, blocking_io, path) for path in paths + ]) + return results +``` + +**Decision tree:** +``` +Is work CPU-bound? +├─ Yes → multiprocessing (ProcessPoolExecutor) +└─ No → I/O-bound + ├─ Async library available? → async/await + └─ Only sync library? → threads (ThreadPoolExecutor) +``` + +### Combining Async and Threads + +```python +# ✅ CORRECT: Running async code in thread +import threading + +def run_async_in_thread(coro): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + loop.close() + +def sync_function(): + result = run_async_in_thread(async_operation()) + return result + +# ✅ CORRECT: Thread-safe async queue +class AsyncThreadSafeQueue: + def __init__(self): + self._queue = queue.Queue() + + async def get(self): + loop = asyncio.get_running_loop() + return await loop.run_in_executor(None, self._queue.get) + + async def put(self, item): + loop = asyncio.get_running_loop() + await loop.run_in_executor(None, self._queue.put, item) +``` + + +## Debugging Async Code + +### Common Errors and Solutions + +```python +# Error: RuntimeWarning: coroutine 'fetch' was never awaited +# ❌ WRONG: +async def main(): + fetch_data() # Missing await + +# ✅ CORRECT: +async def main(): + await fetch_data() + +# Error: RuntimeError: Event loop is closed +# ❌ WRONG: +asyncio.run(coro1()) +asyncio.run(coro2()) # Creates new loop, first loop closed + +# ✅ CORRECT: +async def main(): + await coro1() + await coro2() +asyncio.run(main()) + +# Error: RuntimeError: Task got Future attached to different loop +# ❌ WRONG: +loop1 = asyncio.new_event_loop() +task = loop1.create_task(coro()) +loop2 = asyncio.new_event_loop() +loop2.run_until_complete(task) # Task from different loop! + +# ✅ CORRECT: Use same loop +loop = asyncio.new_event_loop() +task = loop.create_task(coro()) +loop.run_until_complete(task) +``` + +### Enabling Debug Mode + +```python +# Enable asyncio debug mode for better errors +import asyncio +import logging + +# Method 1: Environment variable +# PYTHONASYNCIODEBUG=1 python script.py + +# Method 2: In code +asyncio.run(main(), debug=True) + +# Method 3: For existing loop +loop = asyncio.get_event_loop() +loop.set_debug(True) + +# Configure logging +logging.basicConfig(level=logging.DEBUG) + +# Debug mode enables: +# - Warnings for slow callbacks (>100ms) +# - Warnings for coroutines never awaited +# - Better stack traces +``` + +### Detecting Blocking Code + +```python +# ✅ CORRECT: Monitor event loop lag +import asyncio +import time + +class LoopMonitor: + def __init__(self, threshold: float = 0.1): + self.threshold = threshold + self.last_check = time.monotonic() + + async def monitor(self): + while True: + now = time.monotonic() + lag = now - self.last_check - 1.0 # Expecting 1 second sleep + + if lag > self.threshold: + log.warning(f"Event loop blocked for {lag:.3f}s") + + self.last_check = now + await asyncio.sleep(1.0) + +async def main(): + monitor = LoopMonitor() + asyncio.create_task(monitor.monitor()) + + # Your async code here + await run_application() +``` + + +## Async Libraries Ecosystem + +### Essential Async Libraries + +```python +# HTTP client +import aiohttp + +async def fetch(url: str) -> str: + async with aiohttp.ClientSession() as session: + async with session.get(url) as resp: + return await resp.text() + +# File I/O +import aiofiles + +async def read_file(path: str) -> str: + async with aiofiles.open(path) as f: + return await f.read() + +# Database (PostgreSQL) +import asyncpg + +async def query_db(): + conn = await asyncpg.connect('postgresql://user@localhost/db') + try: + rows = await conn.fetch('SELECT * FROM users') + return rows + finally: + await conn.close() + +# Redis +import aioredis + +async def cache_get(key: str) -> str | None: + redis = await aioredis.create_redis_pool('redis://localhost') + try: + value = await redis.get(key) + return value.decode() if value else None + finally: + redis.close() + await redis.wait_closed() +``` + +### Async Testing with pytest-asyncio + +```python +# Install: pip install pytest-asyncio + +import pytest + +# Mark async test +@pytest.mark.asyncio +async def test_fetch_data(): + result = await fetch_data("https://api.example.com") + assert result is not None + +# Async fixture +@pytest.fixture +async def http_session(): + async with aiohttp.ClientSession() as session: + yield session + +@pytest.mark.asyncio +async def test_with_session(http_session): + async with http_session.get("https://api.example.com") as resp: + assert resp.status == 200 +``` + + +## Anti-Patterns + +### Async Over Everything + +```python +# ❌ WRONG: Making everything async without reason +async def calculate_total(prices: list[float]) -> float: + total = 0.0 + for price in prices: + total += price # No I/O, no benefit from async + return total + +# ✅ CORRECT: Keep sync when no I/O +def calculate_total(prices: list[float]) -> float: + return sum(prices) + +# ❌ WRONG: Async wrapper for sync function +async def async_sum(numbers: list[int]) -> int: + return sum(numbers) # Why? + +# ✅ CORRECT: Only async when doing I/O +async def fetch_and_sum(urls: list[str]) -> int: + results = await asyncio.gather(*[fetch_number(url) for url in urls]) + return sum(results) # sum() is sync, that's fine +``` + +### Creating Too Many Tasks + +```python +# ❌ WRONG: Creating millions of tasks +async def process_all(items: list[str]): # 1M items + tasks = [asyncio.create_task(process(item)) for item in items] + return await asyncio.gather(*tasks) +# Problem: Creates 1M tasks, high memory usage + +# ✅ CORRECT: Batch processing with semaphore +async def process_all(items: list[str], max_concurrent: int = 100): + semaphore = asyncio.Semaphore(max_concurrent) + + async def process_with_sem(item: str): + async with semaphore: + return await process(item) + + return await asyncio.gather(*[process_with_sem(item) for item in items]) + +# ✅ BETTER: Process in batches +async def process_all(items: list[str], batch_size: int = 100): + results = [] + for i in range(0, len(items), batch_size): + batch = items[i:i + batch_size] + batch_results = await asyncio.gather(*[process(item) for item in batch]) + results.extend(batch_results) + return results +``` + +### Mixing Sync and Async Poorly + +```python +# ❌ WRONG: Calling asyncio.run inside async function +async def bad_function(): + result = asyncio.run(some_async_function()) # Error! + return result + +# ✅ CORRECT: Just await +async def good_function(): + result = await some_async_function() + return result + +# ❌ WRONG: Sync wrapper calling async repeatedly +def process_all_sync(items: list[str]) -> list[str]: + return [asyncio.run(process(item)) for item in items] +# Creates new event loop for each item! + +# ✅ CORRECT: Single event loop +def process_all_sync(items: list[str]) -> list[str]: + async def process_all_async(): + return await asyncio.gather(*[process(item) for item in items]) + + return asyncio.run(process_all_async()) +``` + + +## Decision Trees + +### Should I Use Async? + +``` +Does my code do I/O? (network, files, database) +├─ No → Don't use async (CPU-bound work) +└─ Yes → Does an async library exist? + ├─ Yes → Use async/await + └─ No → Can I use sync library with threads? + ├─ Yes → Use run_in_executor with ThreadPoolExecutor + └─ No → Rethink approach or write async wrapper +``` + +### Concurrent Execution Strategy + +``` +What am I waiting for? +├─ Network/database → async/await (asyncio) +├─ File I/O → async/await with aiofiles +├─ CPU computation → multiprocessing (ProcessPoolExecutor) +├─ Blocking library (no async version) → threads (ThreadPoolExecutor) +└─ Nothing (pure computation) → Regular sync code +``` + +### Error Handling in Concurrent Tasks + +``` +Do I need all results? +├─ Yes → TaskGroup (3.11+) or gather without return_exceptions +│ └─ Fails fast on first error +└─ No (partial results OK) → gather with return_exceptions=True + └─ Filter exceptions from results +``` + + +## Integration with Other Skills + +**After using this skill:** +- If profiling async code → See @debugging-and-profiling for async profiling +- If testing async code → See @testing-and-quality for pytest-asyncio +- If setting up project → See @project-structure-and-tooling for async dependencies + +**Before using this skill:** +- If code is slow → Use @debugging-and-profiling to verify it's I/O-bound first +- If starting project → Use @project-structure-and-tooling to set up dependencies + + +## Quick Reference + +### Python 3.11+ Features + +| Feature | Description | When to Use | +|---------|-------------|-------------| +| TaskGroup | Structured concurrency | Multiple concurrent tasks, automatic cleanup | +| asyncio.timeout() | Context manager for timeouts | Cleaner than wait_for() | +| except* | Exception group handling | Handle multiple concurrent errors | + +### Common Async Patterns + +```python +# Concurrent execution +async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(func(x)) for x in items] +results = [t.result() for t in tasks] + +# Timeout +async with asyncio.timeout(10.0): + result = await long_operation() + +# Rate limiting +semaphore = asyncio.Semaphore(10) +async with semaphore: + await rate_limited_operation() + +# Retry with backoff +for attempt in range(max_retries): + try: + return await operation() + except Exception: + await asyncio.sleep(2 ** attempt) +``` + +### When NOT to Use Async + +- Pure computation (no I/O) +- Single I/O operation (overhead not worth it) +- CPU-bound work (use multiprocessing) +- When sync code is simpler and performance is acceptable diff --git a/skills/using-python-engineering/debugging-and-profiling.md b/skills/using-python-engineering/debugging-and-profiling.md new file mode 100644 index 0000000..5f5464a --- /dev/null +++ b/skills/using-python-engineering/debugging-and-profiling.md @@ -0,0 +1,1047 @@ + +# Debugging and Profiling + +## Overview + +**Core Principle:** Profile before optimizing. Humans are terrible at guessing where code is slow. Always measure before making changes. + +Python debugging and profiling enables systematic problem diagnosis and performance optimization. Use debugpy/pdb for step-through debugging, cProfile for CPU profiling, memory_profiler for memory analysis. The biggest mistake: optimizing code without profiling first—you'll likely optimize the wrong thing. + +## When to Use + +**Use this skill when:** +- "Code is slow" +- "How to profile Python?" +- "Memory leak" +- "Debugging not working" +- "Find bottleneck" +- "Optimize performance" +- "Step through code" +- "Where is my code spending time?" + +**Don't use when:** +- Setting up project (use project-structure-and-tooling) +- Already know what to optimize (but still profile to verify!) +- Algorithm selection (different skill domain) + +**Symptoms triggering this skill:** +- Code runs slower than expected +- Memory usage growing over time +- Need to understand execution flow +- Performance degraded after changes + + +## Debugging Fundamentals + +### Using debugpy with VS Code + +```python +# ✅ CORRECT: debugpy for remote debugging +import debugpy + +# Allow VS Code to attach +debugpy.listen(5678) +print("Waiting for debugger to attach...") +debugpy.wait_for_client() + +# Your code here +def process_data(data): + result = [] + for item in data: + # Set breakpoint in VS Code on this line + transformed = transform(item) + result.append(transformed) + return result + +# VS Code launch.json configuration: +""" +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Attach", + "type": "python", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + } + } + ] +} +""" +``` + +### Using pdb (Python Debugger) + +```python +# ✅ CORRECT: pdb for interactive debugging +import pdb + +def buggy_function(data): + result = [] + for i, item in enumerate(data): + # Drop into debugger + pdb.set_trace() # Or: breakpoint() in Python 3.7+ + + processed = item * 2 + result.append(processed) + return result + +# pdb commands: +# n (next): Execute next line +# s (step): Step into function +# c (continue): Continue execution +# p variable: Print variable +# pp variable: Pretty print variable +# l (list): Show current location in code +# w (where): Show stack trace +# q (quit): Quit debugger +``` + +### Conditional Breakpoints + +```python +# ❌ WRONG: Breaking on every iteration +def process_items(items): + for item in items: + pdb.set_trace() # Breaks 10000 times! + process(item) + +# ✅ CORRECT: Conditional breakpoint +def process_items(items): + for i, item in enumerate(items): + if i == 5000: # Only break on specific iteration + breakpoint() + process(item) + +# ✅ BETTER: Use pdb.set_trace with condition +def process_items(items): + for item in items: + if item.value < 0: # Break only when problematic + breakpoint() + process(item) +``` + +### Post-Mortem Debugging + +```python +# ✅ CORRECT: Debug after exception +import pdb + +def main(): + try: + # Code that might raise exception + result = risky_operation() + except Exception: + # Drop into debugger at exception point + pdb.post_mortem() + +# ✅ CORRECT: Auto post-mortem for unhandled exceptions +import sys + +def custom_excepthook(type, value, traceback): + pdb.post_mortem(traceback) + +sys.excepthook = custom_excepthook + +# Now unhandled exceptions drop into pdb automatically +``` + +**Why this matters**: Breakpoints let you inspect state at exact point of failure. Conditional breakpoints avoid noise. Post-mortem debugging examines crashes. + + +## CPU Profiling + +### cProfile for Function-Level Profiling + +```python +import cProfile +import pstats + +# ❌ WRONG: Guessing which function is slow +def slow_program(): + # "I think this loop is the problem..." + for i in range(1000): + process_data(i) + +# ✅ CORRECT: Profile to find actual bottleneck +def slow_program(): + for i in range(1000): + process_data(i) + +# Profile the function +cProfile.run('slow_program()', 'profile_stats') + +# Analyze results +stats = pstats.Stats('profile_stats') +stats.strip_dirs() +stats.sort_stats('cumulative') +stats.print_stats(20) # Top 20 functions by cumulative time + +# ✅ CORRECT: Profile with context manager +from contextlib import contextmanager +import cProfile + +@contextmanager +def profiled(): + pr = cProfile.Profile() + pr.enable() + yield + pr.disable() + + stats = pstats.Stats(pr) + stats.strip_dirs() + stats.sort_stats('cumulative') + stats.print_stats(20) + +# Usage +with profiled(): + slow_program() +``` + +### Profiling Specific Code Blocks + +```python +# ✅ CORRECT: Profile specific section +import cProfile + +pr = cProfile.Profile() + +# Normal code +setup_data() + +# Profile this section +pr.enable() +expensive_operation() +pr.disable() + +# More normal code +cleanup() + +# View results +pr.print_stats(sort='cumulative') +``` + +### Line-Level Profiling with line_profiler + +```python +# Install: pip install line_profiler + +# ✅ CORRECT: Line-by-line profiling +from line_profiler import LineProfiler + +@profile # Use @profile decorator +def slow_function(): + total = 0 + for i in range(10000): + total += i ** 2 + return total + +# Run with kernprof: +# kernprof -l -v script.py + +# Or programmatically: +lp = LineProfiler() +lp.add_function(slow_function) +lp.enable() +slow_function() +lp.disable() +lp.print_stats() + +# Output shows time spent per line: +# Line # Hits Time Per Hit % Time Line Contents +# ============================================================== +# 1 def slow_function(): +# 2 1 2.0 2.0 0.0 total = 0 +# 3 10001 15234.0 1.5 20.0 for i in range(10000): +# 4 10000 60123.0 6.0 80.0 total += i ** 2 +# 5 1 1.0 1.0 0.0 return total +``` + +**Why this matters**: cProfile shows which functions are slow. line_profiler shows which lines within functions. Both essential for optimization. + +### Visualizing Profiles with SnakeViz + +```bash +# Install: pip install snakeviz + +# Profile code +python -m cProfile -o program.prof script.py + +# Visualize +snakeviz program.prof + +# Opens browser with interactive visualization: +# - Sunburst chart showing call hierarchy +# - Icicle chart showing time distribution +# - Click functions to zoom in +``` + + +## Memory Profiling + +### Memory Usage with memory_profiler + +```python +# Install: pip install memory_profiler + +from memory_profiler import profile + +# ✅ CORRECT: Track memory usage per line +@profile +def memory_hungry_function(): + # Line-by-line memory usage shown + big_list = [i for i in range(1000000)] # Allocates ~40MB + big_dict = {i: i**2 for i in range(1000000)} # Another ~40MB + return len(big_list), len(big_dict) + +# Run with: +# python -m memory_profiler script.py + +# Output: +# Line # Mem usage Increment Line Contents +# ================================================ +# 3 38.3 MiB 38.3 MiB @profile +# 4 def memory_hungry_function(): +# 5 45.2 MiB 6.9 MiB big_list = [i for i in range(1000000)] +# 6 83.1 MiB 37.9 MiB big_dict = {i: i**2 for i in range(1000000)} +# 7 83.1 MiB 0.0 MiB return len(big_list), len(big_dict) +``` + +### Finding Memory Leaks + +```python +# ✅ CORRECT: Detect memory leaks with tracemalloc +import tracemalloc + +# Start tracing +tracemalloc.start() + +# Take snapshot before +snapshot1 = tracemalloc.take_snapshot() + +# Run code that might leak +problematic_function() + +# Take snapshot after +snapshot2 = tracemalloc.take_snapshot() + +# Compare snapshots +top_stats = snapshot2.compare_to(snapshot1, 'lineno') + +print("Top 10 memory increases:") +for stat in top_stats[:10]: + print(stat) + +tracemalloc.stop() + +# ✅ CORRECT: Track specific objects +import gc +import sys + +def find_memory_leak(): + # Force garbage collection + gc.collect() + + # Track objects before + before = len(gc.get_objects()) + + # Run potentially leaky code + for _ in range(100): + leaky_operation() + + # Force GC again + gc.collect() + + # Track objects after + after = len(gc.get_objects()) + + if after > before: + print(f"Potential leak: {after - before} objects not collected") + + # Find what's keeping objects alive + for obj in gc.get_objects(): + if isinstance(obj, MyClass): # Suspect class + print(f"Found {type(obj)}: {sys.getrefcount(obj)} references") + print(gc.get_referrers(obj)) +``` + +### Profiling Memory with objgraph + +```python +# Install: pip install objgraph + +import objgraph + +# ✅ CORRECT: Find most common objects +def analyze_memory(): + objgraph.show_most_common_types() + # Output: + # dict 12453 + # function 8234 + # list 6789 + # ... + +# ✅ CORRECT: Track object growth +objgraph.show_growth() +potentially_leaky_function() +objgraph.show_growth() # Shows objects that increased + +# ✅ CORRECT: Visualize object references +import objgraph +objgraph.show_refs([my_object], filename='refs.png') +# Creates graph showing what references my_object +``` + +**Why this matters**: Memory leaks cause gradual performance degradation. tracemalloc and memory_profiler help find exactly where memory is allocated. + + +## Profiling Async Code + +### Profiling Async Functions + +```python +import asyncio +import cProfile +import pstats + +# ❌ WRONG: cProfile doesn't work well with async +async def slow_async(): + await asyncio.sleep(1) + await process_data() + +cProfile.run('asyncio.run(slow_async())') # Misleading results + +# ✅ CORRECT: Use yappi for async profiling +# Install: pip install yappi +import yappi + +async def slow_async(): + await asyncio.sleep(1) + await process_data() + +yappi.set_clock_type("wall") # Use wall time, not CPU time +yappi.start() + +asyncio.run(slow_async()) + +yappi.stop() + +# Print stats +stats = yappi.get_func_stats() +stats.sort("totaltime", "desc") +stats.print_all() + +# ✅ CORRECT: Profile coroutines specifically +stats = yappi.get_func_stats(filter_callback=lambda x: 'coroutine' in x.name) +stats.print_all() +``` + +### Detecting Blocking Code in Async + +```python +# ✅ CORRECT: Detect event loop blocking +import asyncio +import time + +class LoopMonitor: + def __init__(self, threshold: float = 0.1): + self.threshold = threshold + + async def monitor(self): + while True: + start = time.monotonic() + await asyncio.sleep(0.01) # Very short sleep + elapsed = time.monotonic() - start + + if elapsed > self.threshold: + print(f"WARNING: Event loop blocked for {elapsed:.3f}s") + +async def main(): + # Start monitor + monitor = LoopMonitor(threshold=0.1) + monitor_task = asyncio.create_task(monitor.monitor()) + + # Run your async code + await your_async_function() + + monitor_task.cancel() + +# ✅ CORRECT: Use asyncio debug mode +asyncio.run(main(), debug=True) +# Warns about slow callbacks (>100ms) +``` + + +## Performance Optimization Strategies + +### Optimization Workflow + +```python +# ✅ CORRECT: Systematic optimization approach + +# 1. Profile to find bottleneck +import cProfile +cProfile.run('main()', 'profile_stats') + +# 2. Analyze results +stats = pstats.Stats('profile_stats') +stats.sort_stats('cumulative') +stats.print_stats(10) # Focus on top 10 + +# 3. Identify specific slow function +def slow_function(data): + # Original implementation + result = [] + for item in data: + if is_valid(item): + result.append(transform(item)) + return result + +# 4. Create benchmark +import timeit + +def benchmark(): + data = create_test_data(10000) + time_taken = timeit.timeit( + lambda: slow_function(data), + number=100 + ) + print(f"Average time: {time_taken / 100:.4f}s") + +benchmark() # Baseline: 0.1234s + +# 5. Optimize +def optimized_function(data): + # Use list comprehension (faster) + return [transform(item) for item in data if is_valid(item)] + +# 6. Benchmark again +time_taken = timeit.timeit( + lambda: optimized_function(data), + number=100 +) +print(f"Average time: {time_taken / 100:.4f}s") # 0.0789s - 36% faster! + +# 7. Verify correctness +assert slow_function(data) == optimized_function(data) + +# 8. Re-profile entire program to verify improvement +cProfile.run('main()', 'profile_stats_optimized') +``` + +**Why this matters**: Without profiling, you might optimize code that takes 1% of runtime, ignoring the 90% bottleneck. Always measure. + +### Common Optimizations + +```python +# ❌ WRONG: Repeated expensive operations +def process_items(items): + for item in items: + # Regex compiled every iteration! + pattern = re.compile(r'\d+') + match = pattern.search(item) + +# ✅ CORRECT: Move expensive operations outside loop +def process_items(items): + pattern = re.compile(r'\d+') # Compile once + for item in items: + match = pattern.search(item) + +# ❌ WRONG: Growing list with repeated concatenation +def build_large_list(): + result = [] + for i in range(100000): + result = result + [i] # Creates new list each time! O(n²) + +# ✅ CORRECT: Use append +def build_large_list(): + result = [] + for i in range(100000): + result.append(i) # O(n) + +# ❌ WRONG: Checking membership in list +def filter_items(items, blacklist): + return [item for item in items if item not in blacklist] + # O(n * m) if blacklist is list + +# ✅ CORRECT: Use set for membership checks +def filter_items(items, blacklist): + blacklist_set = set(blacklist) # O(m) + return [item for item in items if item not in blacklist_set] + # O(n) for iteration + O(1) per lookup = O(n) +``` + +### Caching Results + +```python +from functools import lru_cache + +# ❌ WRONG: Recomputing expensive results +def fibonacci(n): + if n < 2: + return n + return fibonacci(n-1) + fibonacci(n-2) +# O(2^n) - recalculates same values repeatedly + +# ✅ CORRECT: Cache results +@lru_cache(maxsize=None) +def fibonacci(n): + if n < 2: + return n + return fibonacci(n-1) + fibonacci(n-2) +# O(n) - each value computed once + +# ✅ CORRECT: Custom caching for unhashable arguments +from functools import wraps + +def cache_dataframe_results(func): + cache = {} + + @wraps(func) + def wrapper(df): + # Use hash of dataframe content as key + key = hashlib.md5(df.to_csv(index=False).encode()).hexdigest() + + if key not in cache: + cache[key] = func(df) + + return cache[key] + + return wrapper + +@cache_dataframe_results +def expensive_dataframe_operation(df): + # Complex computation + return df.groupby('category').agg({'value': 'sum'}) +``` + + +## Systematic Diagnosis + +### Performance Degradation Diagnosis + +```python +# ✅ CORRECT: Diagnose performance regression +import cProfile +import pstats + +def diagnose_slowdown(): + """Compare current vs baseline performance.""" + + # Profile current code + cProfile.run('main()', 'current_profile.prof') + + # Load baseline profile (from git history or previous run) + # git show main:profile.prof > baseline_profile.prof + + current = pstats.Stats('current_profile.prof') + baseline = pstats.Stats('baseline_profile.prof') + + print("=== CURRENT ===") + current.sort_stats('cumulative') + current.print_stats(10) + + print("\n=== BASELINE ===") + baseline.sort_stats('cumulative') + baseline.print_stats(10) + + # Look for functions that got slower + # Compare cumulative times +``` + +### Memory Leak Diagnosis + +```python +# ✅ CORRECT: Systematic memory leak detection +import tracemalloc +import gc + +def diagnose_memory_leak(): + """Run function multiple times and check memory growth.""" + + gc.collect() + tracemalloc.start() + + # Baseline + snapshot1 = tracemalloc.take_snapshot() + + # Run 100 times + for _ in range(100): + potentially_leaky_function() + gc.collect() + + # Check memory + snapshot2 = tracemalloc.take_snapshot() + + top_stats = snapshot2.compare_to(snapshot1, 'lineno') + + print("Top 10 memory allocations:") + for stat in top_stats[:10]: + print(f"{stat.traceback}: +{stat.size_diff / 1024:.1f} KB") + + tracemalloc.stop() +``` + +### I/O vs CPU Bound Diagnosis + +```python +# ✅ CORRECT: Determine if I/O or CPU bound +import time +import cProfile + +def diagnose_bottleneck(): + """Determine if program is I/O or CPU bound.""" + + # Time wall clock + start_wall = time.time() + main() + wall_time = time.time() - start_wall + + # Profile CPU time + pr = cProfile.Profile() + pr.enable() + start_cpu = time.process_time() + main() + cpu_time = time.process_time() - start_cpu + pr.disable() + + print(f"Wall time: {wall_time:.2f}s") + print(f"CPU time: {cpu_time:.2f}s") + + if cpu_time / wall_time > 0.9: + print("CPU bound - optimize computation") + # Consider: Cython, NumPy, multiprocessing + else: + print("I/O bound - optimize I/O") + # Consider: async/await, caching, batching +``` + + +## Common Bottlenecks and Solutions + +### String Concatenation + +```python +# ❌ WRONG: String concatenation in loop +def build_string(items): + result = "" + for item in items: + result += str(item) + "\n" # Creates new string each time + return result +# O(n²) time complexity + +# ✅ CORRECT: Use join +def build_string(items): + return "\n".join(str(item) for item in items) +# O(n) time complexity + +# Benchmark: +# 1000 items: 0.0015s (join) vs 0.0234s (concatenation) - 15x faster +# 10000 items: 0.015s (join) vs 2.341s (concatenation) - 156x faster +``` + +### List Comprehension vs Map/Filter + +```python +import timeit + +# ✅ CORRECT: List comprehension (usually fastest) +def with_list_comp(data): + return [x * 2 for x in data if x > 0] + +# ✅ CORRECT: Generator (memory efficient for large data) +def with_generator(data): + return (x * 2 for x in data if x > 0) + +# Map/filter (sometimes faster for simple operations) +def with_map_filter(data): + return map(lambda x: x * 2, filter(lambda x: x > 0, data)) + +# Benchmark +data = list(range(1000000)) +print(timeit.timeit(lambda: list(with_list_comp(data)), number=10)) +print(timeit.timeit(lambda: list(with_generator(data)), number=10)) +print(timeit.timeit(lambda: list(with_map_filter(data)), number=10)) + +# Results: List comprehension usually fastest for complex logic +# Generator best when you don't need all results at once +``` + +### Dictionary Lookups vs List Searches + +```python +# ❌ WRONG: Searching in list +def find_users_list(user_ids, all_users_list): + results = [] + for user_id in user_ids: + for user in all_users_list: # O(n) per lookup + if user['id'] == user_id: + results.append(user) + break + return results +# O(n * m) time complexity + +# ✅ CORRECT: Use dictionary +def find_users_dict(user_ids, all_users_dict): + return [all_users_dict[uid] for uid in user_ids if uid in all_users_dict] +# O(n) time complexity + +# Benchmark: +# 1000 lookups in 10000 items: +# List: 1.234s +# Dict: 0.001s - 1234x faster! +``` + +### DataFrame Iteration Anti-Pattern + +```python +import pandas as pd +import numpy as np + +# ❌ WRONG: Iterating over DataFrame rows +def process_rows_iterrows(df): + results = [] + for idx, row in df.iterrows(): # VERY SLOW + if row['value'] > 0: + results.append(row['value'] * 2) + return results + +# ✅ CORRECT: Vectorized operations +def process_rows_vectorized(df): + mask = df['value'] > 0 + return (df.loc[mask, 'value'] * 2).tolist() + +# Benchmark with 100,000 rows: +# iterrows: 15.234s +# vectorized: 0.015s - 1000x faster! +``` + + +## Profiling Tools Comparison + +### When to Use Which Tool + +| Tool | Use Case | Output | +|------|----------|--------| +| cProfile | Function-level CPU profiling | Which functions take most time | +| line_profiler | Line-level CPU profiling | Which lines within function slow | +| memory_profiler | Line-level memory profiling | Memory usage per line | +| tracemalloc | Memory allocation tracking | Where memory allocated | +| yappi | Async/multithreaded profiling | Profile concurrent code | +| py-spy | Sampling profiler (no code changes) | Profile running processes | +| scalene | CPU+GPU+memory profiling | Comprehensive profiling | + +### py-spy for Production Profiling + +```bash +# Install: pip install py-spy + +# Profile running process (no code changes needed!) +py-spy record -o profile.svg --pid 12345 + +# Profile for 60 seconds +py-spy record -o profile.svg --duration 60 -- python script.py + +# Top-like view of running process +py-spy top --pid 12345 + +# Why use py-spy: +# - No code changes needed +# - Minimal overhead +# - Can attach to running process +# - Great for production debugging +``` + + +## Anti-Patterns + +### Premature Optimization + +```python +# ❌ WRONG: Optimizing before measuring +def process_data(data): + # "Let me make this fast with complex caching..." + # Spend hours optimizing function that takes 0.1% of runtime + +# ✅ CORRECT: Profile first +cProfile.run('main()', 'profile.prof') +# Oh, process_data only takes 0.1% of time +# The real bottleneck is database queries (90% of time) +# Optimize database queries instead! +``` + +### Micro-Optimizations + +```python +# ❌ WRONG: Micro-optimizing at expense of readability +def calculate(x, y): + # "Using bit shift instead of multiply by 2 for speed!" + return (x << 1) + (y << 1) +# Saved: ~0.0000001 seconds per call +# Cost: Unreadable code + +# ✅ CORRECT: Clear code first +def calculate(x, y): + return 2 * x + 2 * y +# Modern Python JIT optimizes this anyway +# Only optimize if profiler shows this is bottleneck +``` + +### Not Benchmarking Changes + +```python +# ❌ WRONG: Assuming optimization worked +def slow_function(): + # Original code + pass + +def optimized_function(): + # "Optimized" code + pass + +# Assume optimized_function is faster without measuring + +# ✅ CORRECT: Benchmark before and after +import timeit + +before = timeit.timeit(slow_function, number=1000) +after = timeit.timeit(optimized_function, number=1000) + +print(f"Before: {before:.4f}s") +print(f"After: {after:.4f}s") +print(f"Speedup: {before/after:.2f}x") + +# Verify correctness +assert slow_function() == optimized_function() +``` + + +## Decision Trees + +### What Tool to Use for Profiling? + +``` +What do I need to profile? +├─ CPU time +│ ├─ Function-level → cProfile +│ ├─ Line-level → line_profiler +│ └─ Async code → yappi +├─ Memory usage +│ ├─ Line-level → memory_profiler +│ ├─ Allocation tracking → tracemalloc +│ └─ Object types → objgraph +└─ Running process (no code changes) → py-spy +``` + +### Optimization Strategy + +``` +Is code slow? +├─ Yes → Profile to find bottleneck +│ ├─ CPU bound → Profile with cProfile +│ │ └─ Optimize hot functions (vectorize, cache, algorithms) +│ └─ I/O bound → Profile with timing +│ └─ Use async/await, caching, batching +└─ No → Don't optimize (focus on features/correctness) +``` + +### Memory Issue Diagnosis + +``` +Is memory usage high? +├─ Yes → Profile with memory_profiler +│ ├─ Growing over time → Memory leak +│ │ └─ Use tracemalloc to find leak +│ └─ High but stable → Large data structures +│ └─ Optimize data structures (generators, efficient types) +└─ No → Monitor but don't optimize yet +``` + + +## Integration with Other Skills + +**After using this skill:** +- If I/O bound → See @async-patterns-and-concurrency for async optimization +- If data processing slow → See @scientific-computing-foundations for vectorization +- If need to track improvements → See @ml-engineering-workflows for metrics + +**Before using this skill:** +- If unsure code is slow → Use this skill to profile and confirm! +- If setting up profiling → See @project-structure-and-tooling for dependencies + + +## Quick Reference + +### Essential Profiling Commands + +```python +# CPU profiling +import cProfile +cProfile.run('main()', 'profile.prof') + +# View results +import pstats +stats = pstats.Stats('profile.prof') +stats.sort_stats('cumulative') +stats.print_stats(20) + +# Memory profiling +import tracemalloc +tracemalloc.start() +# ... code ... +snapshot = tracemalloc.take_snapshot() +top_stats = snapshot.statistics('lineno') +for stat in top_stats[:10]: + print(stat) +``` + +### Debugging Commands + +```python +# Set breakpoint +breakpoint() # Python 3.7+ +# or +import pdb; pdb.set_trace() + +# pdb commands: +# n - next line +# s - step into +# c - continue +# p var - print variable +# l - list code +# w - where am I +# q - quit +``` + +### Optimization Checklist + +- [ ] Profile before optimizing (use cProfile) +- [ ] Identify bottleneck (top 20% of time) +- [ ] Create benchmark for bottleneck +- [ ] Optimize bottleneck +- [ ] Benchmark again to verify improvement +- [ ] Re-profile entire program +- [ ] Verify correctness (tests still pass) + +### Common Optimizations + +| Problem | Solution | Speedup | +|---------|----------|---------| +| String concatenation in loop | Use str.join() | 10-100x | +| List membership checks | Use set | 100-1000x | +| DataFrame iteration | Vectorize with NumPy/pandas | 100-1000x | +| Repeated expensive computation | Cache with @lru_cache | ∞ (depends on cache hits) | +| I/O bound | Use async/await | 10-100x | +| CPU bound with parallelizable work | Use multiprocessing | ~number of cores | + +### Red Flags + +If you find yourself: +- Optimizing before profiling → STOP, profile first +- Spending hours on micro-optimizations → Check if it's bottleneck +- Making code unreadable for speed → Benchmark the benefit +- Assuming what's slow → Profile to verify + +**Always measure. Never assume.** diff --git a/skills/using-python-engineering/ml-engineering-workflows.md b/skills/using-python-engineering/ml-engineering-workflows.md new file mode 100644 index 0000000..61a63d1 --- /dev/null +++ b/skills/using-python-engineering/ml-engineering-workflows.md @@ -0,0 +1,1072 @@ + +# ML Engineering Workflows + +## Overview + +**Core Principle:** Experiments must be reproducible. Track everything: code, data, parameters, metrics, environment. Without reproducibility, ML experiments are just random number generation. + +ML engineering is about systematic experimentation and production deployment. Track experiments with MLflow/Weights & Biases, manage configuration with Hydra, ensure reproducible data splits, monitor models in production. The biggest mistake: running experiments without tracking parameters or random seeds. + +## When to Use + +**Use this skill when:** +- "Track ML experiments" +- "MLflow setup" +- "Reproducible ML" +- "Model lifecycle" +- "Hyperparameter management" +- "ML monitoring" +- "ML project structure" +- "Experiment comparison" + +**Don't use when:** +- Setting up Python project (use project-structure-and-tooling first) +- NumPy/pandas optimization (use scientific-computing-foundations) +- Profiling ML code (use debugging-and-profiling) + +**Symptoms triggering this skill:** +- Can't reproduce results +- Lost track of which parameters produced which metrics +- Need to compare many experiments +- Deploying model to production + + +## Experiment Tracking with MLflow + +### Basic MLflow Setup + +```python +import mlflow +import mlflow.sklearn +from sklearn.ensemble import RandomForestClassifier +from sklearn.metrics import accuracy_score, f1_score + +# ❌ WRONG: Not tracking experiments +model = RandomForestClassifier(n_estimators=100, max_depth=10) +model.fit(X_train, y_train) +accuracy = accuracy_score(y_test, model.predict(X_test)) +print(f"Accuracy: {accuracy}") # Lost forever after terminal closes + +# ✅ CORRECT: Track with MLflow +mlflow.set_experiment("my_experiment") + +with mlflow.start_run(): + # Log parameters + params = {"n_estimators": 100, "max_depth": 10} + mlflow.log_params(params) + + # Train model + model = RandomForestClassifier(**params) + model.fit(X_train, y_train) + + # Log metrics + y_pred = model.predict(X_test) + mlflow.log_metric("accuracy", accuracy_score(y_test, y_pred)) + mlflow.log_metric("f1_score", f1_score(y_test, y_pred, average='weighted')) + + # Log model + mlflow.sklearn.log_model(model, "model") + +# ✅ CORRECT: Log artifacts (plots, confusion matrix) +import matplotlib.pyplot as plt +from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay + +with mlflow.start_run(): + mlflow.log_params(params) + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + + # Create and save confusion matrix + cm = confusion_matrix(y_test, y_pred) + disp = ConfusionMatrixDisplay(cm) + disp.plot() + plt.savefig("confusion_matrix.png") + mlflow.log_artifact("confusion_matrix.png") + plt.close() + + mlflow.log_metrics({ + "accuracy": accuracy_score(y_test, y_pred), + "f1_score": f1_score(y_test, y_pred, average='weighted') + }) +``` + +**Why this matters**: MLflow tracks all experiments with parameters and metrics. Can compare runs, reproduce results, and deploy best model. + +### Nested Runs for Cross-Validation + +```python +# ❌ WRONG: CV results not tracked properly +from sklearn.model_selection import cross_val_score +scores = cross_val_score(model, X, y, cv=5) +print(f"Mean: {scores.mean()}") # Lost context + +# ✅ CORRECT: Track CV with nested runs +from sklearn.model_selection import KFold + +with mlflow.start_run(run_name="rf_cv_experiment") as parent_run: + mlflow.log_params(params) + + kf = KFold(n_splits=5, shuffle=True, random_seed=42) + cv_scores = [] + + for fold, (train_idx, val_idx) in enumerate(kf.split(X)): + with mlflow.start_run(run_name=f"fold_{fold}", nested=True): + X_train_fold, X_val_fold = X[train_idx], X[val_idx] + y_train_fold, y_val_fold = y[train_idx], y[val_idx] + + model = RandomForestClassifier(**params, random_state=42) + model.fit(X_train_fold, y_train_fold) + + score = accuracy_score(y_val_fold, model.predict(X_val_fold)) + cv_scores.append(score) + + mlflow.log_metric("accuracy", score) + mlflow.log_metric("fold", fold) + + # Log aggregate metrics in parent run + mlflow.log_metric("cv_mean_accuracy", np.mean(cv_scores)) + mlflow.log_metric("cv_std_accuracy", np.std(cv_scores)) +``` + +### Hyperparameter Tuning with Tracking + +```python +from sklearn.model_selection import GridSearchCV + +# ❌ WRONG: GridSearchCV without tracking +param_grid = { + 'n_estimators': [50, 100, 200], + 'max_depth': [5, 10, 20] +} +grid_search = GridSearchCV(RandomForestClassifier(), param_grid, cv=5) +grid_search.fit(X_train, y_train) +print(grid_search.best_params_) # Only get best, lose all other trials + +# ✅ CORRECT: Track all hyperparameter trials +with mlflow.start_run(run_name="grid_search"): + for n_est in [50, 100, 200]: + for max_d in [5, 10, 20]: + with mlflow.start_run(nested=True): + params = {"n_estimators": n_est, "max_depth": max_d} + mlflow.log_params(params) + + model = RandomForestClassifier(**params, random_state=42) + model.fit(X_train, y_train) + + score = accuracy_score(y_test, model.predict(X_test)) + mlflow.log_metric("accuracy", score) + +# ✅ BETTER: Use MLflow with Optuna for Bayesian optimization +import optuna +from optuna.integration.mlflow import MLflowCallback + +def objective(trial): + params = { + "n_estimators": trial.suggest_int("n_estimators", 50, 200), + "max_depth": trial.suggest_int("max_depth", 5, 20), + } + + model = RandomForestClassifier(**params, random_state=42) + model.fit(X_train, y_train) + return accuracy_score(y_test, model.predict(X_test)) + +mlflc = MLflowCallback(tracking_uri="mlruns", metric_name="accuracy") +study = optuna.create_study(direction="maximize") +study.optimize(objective, n_trials=50, callbacks=[mlflc]) +``` + +**Why this matters**: Hyperparameter tuning generates many experiments. Tracking all trials enables comparison and understanding of parameter importance. + + +## Configuration Management with Hydra + +### Basic Hydra Configuration + +```python +# ❌ WRONG: Hardcoded parameters +def train(): + learning_rate = 0.001 + batch_size = 32 + epochs = 100 + # What if we want to try different values? Edit code each time? + +# ✅ CORRECT: Hydra configuration +# File: config.yaml +""" +model: + learning_rate: 0.001 + batch_size: 32 + epochs: 100 + +data: + train_path: data/train.csv + test_path: data/test.csv +""" + +# File: train.py +import hydra +from omegaconf import DictConfig + +@hydra.main(config_path=".", config_name="config", version_base=None) +def train(cfg: DictConfig): + print(f"Learning rate: {cfg.model.learning_rate}") + print(f"Batch size: {cfg.model.batch_size}") + + # Access config values + model = create_model( + lr=cfg.model.learning_rate, + batch_size=cfg.model.batch_size + ) + +if __name__ == "__main__": + train() + +# Run with overrides: +# python train.py model.learning_rate=0.01 model.batch_size=64 +``` + +### Structured Configs with Dataclasses + +```python +# ✅ CORRECT: Type-safe configs with dataclasses +from dataclasses import dataclass +from hydra.core.config_store import ConfigStore + +@dataclass +class ModelConfig: + learning_rate: float = 0.001 + batch_size: int = 32 + epochs: int = 100 + hidden_dim: int = 256 + +@dataclass +class DataConfig: + train_path: str = "data/train.csv" + test_path: str = "data/test.csv" + val_split: float = 0.2 + +@dataclass +class Config: + model: ModelConfig = ModelConfig() + data: DataConfig = DataConfig() + +cs = ConfigStore.instance() +cs.store(name="config", node=Config) + +@hydra.main(config_path=None, config_name="config", version_base=None) +def train(cfg: Config): + # Type hints work! + lr: float = cfg.model.learning_rate + batch_size: int = cfg.model.batch_size + +if __name__ == "__main__": + train() +``` + +**Why this matters**: Hydra enables command-line overrides without code changes. Structured configs provide type safety and IDE autocomplete. + +### Multi-Run Sweeps + +```python +# ✅ CORRECT: Hydra multirun for hyperparameter sweeps +# config.yaml +""" +defaults: + - override hydra/launcher: basic + +model: + learning_rate: 0.001 + batch_size: 32 +""" + +# Run multiple experiments: +# python train.py -m model.learning_rate=0.001,0.01,0.1 model.batch_size=32,64,128 +# Creates 9 runs (3 x 3) + +@hydra.main(config_path=".", config_name="config", version_base=None) +def train(cfg: DictConfig): + with mlflow.start_run(): + mlflow.log_params({ + "learning_rate": cfg.model.learning_rate, + "batch_size": cfg.model.batch_size + }) + + model = train_model(cfg) + metrics = evaluate_model(model, test_data) + mlflow.log_metrics(metrics) +``` + + +## Reproducibility Best Practices + +### Random Seed Management + +```python +import random +import numpy as np +import torch + +# ❌ WRONG: No random seed +model = create_model() +model.fit(X_train, y_train) +# Different results every run! + +# ✅ CORRECT: Set all random seeds +def set_seed(seed: int = 42): + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + # For full reproducibility with CUDA + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + +# In training script +set_seed(42) +model = create_model() +model.fit(X_train, y_train) + +# ✅ CORRECT: Track seed in MLflow +with mlflow.start_run(): + seed = 42 + mlflow.log_param("random_seed", seed) + set_seed(seed) + # ... training code ... +``` + +### Reproducible Data Splits + +```python +# ❌ WRONG: Non-reproducible split +from sklearn.model_selection import train_test_split +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) +# Different split every time! + +# ✅ CORRECT: Fixed random seed for splits +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) + +# ✅ BETTER: Hash-based deterministic split (stable across runs) +import hashlib + +def hash_split(df, test_size=0.2, id_column='id'): + """Deterministic split based on ID hash.""" + def test_set_check(identifier, test_size): + hash_val = int(hashlib.md5(str(identifier).encode()).hexdigest(), 16) + return hash_val % 100 < test_size * 100 + + is_test = df[id_column].apply(lambda x: test_set_check(x, test_size)) + return df[~is_test], df[is_test] + +train_df, test_df = hash_split(df, test_size=0.2, id_column='user_id') +# Same split even if data order changes or new rows added +``` + +### Environment Reproducibility + +```python +# ✅ CORRECT: Log environment info +import mlflow +import sys +import platform + +with mlflow.start_run(): + # Log Python version + mlflow.log_param("python_version", sys.version) + + # Log package versions + import sklearn + import pandas + import numpy + mlflow.log_params({ + "sklearn_version": sklearn.__version__, + "pandas_version": pandas.__version__, + "numpy_version": numpy.__version__, + }) + + # Log system info + mlflow.log_params({ + "platform": platform.platform(), + "cpu_count": os.cpu_count() + }) + +# ✅ BETTER: Use conda/docker for full reproducibility +# conda env export > environment.yml +# Log environment file as artifact +with mlflow.start_run(): + mlflow.log_artifact("environment.yml") +``` + +**Why this matters**: Reproducibility requires controlling all randomness sources. Different package versions or Python versions can produce different results. + + +## Data Versioning and Lineage + +### Data Versioning with DVC + +```bash +# Initialize DVC +dvc init + +# Track large data files +dvc add data/train.csv +git add data/train.csv.dvc data/.gitignore +git commit -m "Track training data" + +# Configure remote storage (S3, GCS, Azure, etc.) +dvc remote add -d myremote s3://mybucket/dvcstore +dvc push + +# Retrieve specific version +git checkout v1.0 +dvc pull +``` + +### Logging Data Info in MLflow + +```python +# ✅ CORRECT: Log data characteristics +import pandas as pd +import mlflow + +with mlflow.start_run(): + # Load data + df = pd.read_csv("data/train.csv") + + # Log data info + mlflow.log_params({ + "n_samples": len(df), + "n_features": len(df.columns), + "class_balance": df['target'].value_counts().to_dict(), + "data_version": "v1.0", # Track data version + "data_hash": hashlib.md5(df.to_csv(index=False).encode()).hexdigest() + }) + + # Log sample of data + df.head(100).to_csv("data_sample.csv", index=False) + mlflow.log_artifact("data_sample.csv") +``` + +### Feature Engineering Pipeline Tracking + +```python +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import StandardScaler +from sklearn.decomposition import PCA + +# ✅ CORRECT: Track entire preprocessing pipeline +with mlflow.start_run(): + # Define pipeline + pipeline = Pipeline([ + ('scaler', StandardScaler()), + ('pca', PCA(n_components=50)), + ('classifier', RandomForestClassifier(n_estimators=100)) + ]) + + # Log pipeline parameters + mlflow.log_params({ + "scaler": "StandardScaler", + "pca_components": 50, + "classifier": "RandomForestClassifier", + "n_estimators": 100 + }) + + # Fit pipeline + pipeline.fit(X_train, y_train) + + # Log entire pipeline + mlflow.sklearn.log_model(pipeline, "model_pipeline") + + # Evaluate + score = pipeline.score(X_test, y_test) + mlflow.log_metric("accuracy", score) +``` + + +## Model Lifecycle Management + +### Model Registry + +```python +# ✅ CORRECT: Register model in MLflow +with mlflow.start_run() as run: + model = train_model(X_train, y_train) + mlflow.sklearn.log_model(model, "model") + + # Register model + model_uri = f"runs:/{run.info.run_id}/model" + mlflow.register_model(model_uri, "my_model") + +# ✅ CORRECT: Promote model to production +from mlflow.tracking import MlflowClient + +client = MlflowClient() + +# Get latest version +latest_version = client.get_latest_versions("my_model", stages=["None"])[0] + +# Transition to staging +client.transition_model_version_stage( + name="my_model", + version=latest_version.version, + stage="Staging" +) + +# After testing, promote to production +client.transition_model_version_stage( + name="my_model", + version=latest_version.version, + stage="Production" +) + +# ✅ CORRECT: Load production model +model = mlflow.pyfunc.load_model( + model_uri="models:/my_model/Production" +) +predictions = model.predict(X_new) +``` + +### Model Metadata and Tags + +```python +# ✅ CORRECT: Add tags for searchability +with mlflow.start_run() as run: + mlflow.set_tag("model_type", "random_forest") + mlflow.set_tag("task", "classification") + mlflow.set_tag("dataset", "customer_churn") + mlflow.set_tag("owner", "data_science_team") + + # Train and log model + model = train_model(X_train, y_train) + mlflow.sklearn.log_model(model, "model") + + # Add version tag + mlflow.set_tag("version", "v2.1.0") + +# Search for runs with tags +from mlflow.tracking import MlflowClient +client = MlflowClient() +runs = client.search_runs( + experiment_ids=["0"], + filter_string="tags.model_type = 'random_forest' AND metrics.accuracy > 0.85" +) +``` + + +## Metrics and Logging + +### Structured Logging + +```python +import logging +import mlflow + +# ✅ CORRECT: Structured logging with MLflow +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +with mlflow.start_run(): + logger.info("Starting training") + mlflow.log_param("learning_rate", 0.001) + + for epoch in range(num_epochs): + train_loss = train_epoch(model, train_loader) + val_loss = validate(model, val_loader) + + # Log metrics per epoch + mlflow.log_metrics({ + "train_loss": train_loss, + "val_loss": val_loss + }, step=epoch) + + logger.info(f"Epoch {epoch}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}") + + # Early stopping check + if val_loss > best_val_loss: + patience_counter += 1 + if patience_counter > patience: + logger.info(f"Early stopping at epoch {epoch}") + mlflow.set_tag("early_stopped", "true") + mlflow.log_param("stopped_epoch", epoch) + break +``` + +### Custom Metrics + +```python +from sklearn.metrics import make_scorer + +# ✅ CORRECT: Define and log custom metrics +def business_metric(y_true, y_pred): + """Custom metric: cost of false positives vs false negatives.""" + fp_cost = 10 # Cost of false positive + fn_cost = 100 # Cost of false negative + + fp = ((y_pred == 1) & (y_true == 0)).sum() + fn = ((y_pred == 0) & (y_true == 1)).sum() + + return fp * fp_cost + fn * fn_cost + +with mlflow.start_run(): + model.fit(X_train, y_train) + y_pred = model.predict(X_test) + + # Log standard metrics + mlflow.log_metrics({ + "accuracy": accuracy_score(y_test, y_pred), + "f1": f1_score(y_test, y_pred) + }) + + # Log custom business metric + cost = business_metric(y_test, y_pred) + mlflow.log_metric("business_cost", cost) +``` + +### Metric Visualization + +```python +# ✅ CORRECT: Log plots and visualizations +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.metrics import roc_curve, auc + +with mlflow.start_run(): + # Train model + model.fit(X_train, y_train) + y_pred_proba = model.predict_proba(X_test)[:, 1] + + # ROC curve + fpr, tpr, _ = roc_curve(y_test, y_pred_proba) + roc_auc = auc(fpr, tpr) + + plt.figure() + plt.plot(fpr, tpr, label=f'ROC curve (AUC = {roc_auc:.2f})') + plt.plot([0, 1], [0, 1], 'k--') + plt.xlabel('False Positive Rate') + plt.ylabel('True Positive Rate') + plt.title('ROC Curve') + plt.legend() + plt.savefig("roc_curve.png") + mlflow.log_artifact("roc_curve.png") + plt.close() + + # Feature importance + importances = model.feature_importances_ + indices = np.argsort(importances)[::-1] + + plt.figure(figsize=(10, 6)) + plt.bar(range(len(importances)), importances[indices]) + plt.xlabel('Feature Index') + plt.ylabel('Importance') + plt.title('Feature Importances') + plt.savefig("feature_importance.png") + mlflow.log_artifact("feature_importance.png") + plt.close() + + mlflow.log_metric("roc_auc", roc_auc) +``` + + +## Production Monitoring + +### Model Performance Monitoring + +```python +# ✅ CORRECT: Monitor model performance in production +import mlflow +from datetime import datetime + +class ModelMonitor: + def __init__(self, model_name: str, model_version: str): + self.model_name = model_name + self.model_version = model_version + self.mlflow_client = MlflowClient() + + def log_prediction(self, features, prediction, actual=None): + """Log prediction for monitoring.""" + with mlflow.start_run(run_name=f"prediction_{datetime.now().isoformat()}"): + mlflow.log_param("model_name", self.model_name) + mlflow.log_param("model_version", self.model_version) + + # Log feature statistics + mlflow.log_params({ + f"feature_{i}_mean": float(features[:, i].mean()) + for i in range(features.shape[1]) + }) + + # Log prediction + mlflow.log_metric("prediction", float(prediction)) + + # If actual available (for online evaluation) + if actual is not None: + mlflow.log_metric("actual", float(actual)) + mlflow.log_metric("error", abs(float(prediction - actual))) + + def check_data_drift(self, current_data, reference_data): + """Detect data drift using KS test.""" + from scipy.stats import ks_2samp + + drift_detected = False + drift_features = [] + + with mlflow.start_run(run_name="drift_check"): + for i in range(current_data.shape[1]): + stat, p_value = ks_2samp( + reference_data[:, i], + current_data[:, i] + ) + + mlflow.log_metric(f"feature_{i}_ks_stat", stat) + mlflow.log_metric(f"feature_{i}_p_value", p_value) + + if p_value < 0.05: # Significant drift + drift_detected = True + drift_features.append(i) + + mlflow.log_param("drift_detected", drift_detected) + mlflow.log_param("drift_features", drift_features) + + return drift_detected, drift_features +``` + +### Alerting and Anomaly Detection + +```python +# ✅ CORRECT: Monitor for anomalies in predictions +class PredictionMonitor: + def __init__(self, threshold_std: float = 3.0): + self.threshold_std = threshold_std + self.recent_predictions = [] + self.window_size = 1000 + + def check_anomaly(self, prediction: float) -> bool: + """Check if prediction is anomalous.""" + self.recent_predictions.append(prediction) + + if len(self.recent_predictions) > self.window_size: + self.recent_predictions.pop(0) + + if len(self.recent_predictions) < 100: + return False # Not enough data + + mean = np.mean(self.recent_predictions) + std = np.std(self.recent_predictions) + + z_score = abs(prediction - mean) / std + + is_anomaly = z_score > self.threshold_std + + # Log to MLflow + mlflow.log_metrics({ + "prediction": prediction, + "rolling_mean": mean, + "rolling_std": std, + "z_score": z_score, + "is_anomaly": int(is_anomaly) + }) + + return is_anomaly +``` + + +## ML Project Structure + +### Standard Project Layout + +``` +ml_project/ +├── data/ +│ ├── raw/ # Original immutable data +│ ├── processed/ # Cleaned, transformed data +│ └── features/ # Engineered features +├── notebooks/ # Exploratory notebooks +│ └── eda.ipynb +├── src/ +│ ├── __init__.py +│ ├── data/ +│ │ ├── __init__.py +│ │ ├── load.py # Data loading +│ │ └── preprocess.py # Preprocessing +│ ├── features/ +│ │ ├── __init__.py +│ │ └── build.py # Feature engineering +│ ├── models/ +│ │ ├── __init__.py +│ │ ├── train.py # Training +│ │ ├── predict.py # Inference +│ │ └── evaluate.py # Evaluation +│ └── utils/ +│ ├── __init__.py +│ └── config.py # Configuration +├── tests/ +│ ├── test_data.py +│ ├── test_features.py +│ └── test_models.py +├── configs/ # Hydra configs +│ ├── config.yaml +│ ├── model/ +│ │ ├── rf.yaml +│ │ └── xgboost.yaml +│ └── data/ +│ └── default.yaml +├── mlruns/ # MLflow tracking +├── outputs/ # Hydra outputs +├── requirements.txt +├── setup.py +└── README.md +``` + +### Makefile for Common Tasks + +```makefile +# ✅ CORRECT: Makefile for reproducible workflows +.PHONY: data features train evaluate + +data: + python src/data/load.py + python src/data/preprocess.py + +features: data + python src/features/build.py + +train: features + python src/models/train.py + +evaluate: train + python src/models/evaluate.py + +clean: + rm -rf data/processed/* + rm -rf mlruns/* + +test: + pytest tests/ + +lint: + ruff check src/ + mypy src/ +``` + + +## Integration Patterns + +### MLflow + Hydra Integration + +```python +# ✅ CORRECT: Combine MLflow tracking with Hydra config +import hydra +from omegaconf import DictConfig, OmegaConf +import mlflow + +@hydra.main(config_path="configs", config_name="config", version_base=None) +def train(cfg: DictConfig): + # Set MLflow experiment + mlflow.set_experiment(cfg.experiment_name) + + with mlflow.start_run(): + # Log all Hydra config as parameters + mlflow.log_params(OmegaConf.to_container(cfg, resolve=True)) + + # Log Hydra config file as artifact + config_path = ".hydra/config.yaml" + mlflow.log_artifact(config_path) + + # Train model + model = create_model(cfg.model) + model.fit(X_train, y_train) + + # Log metrics + metrics = evaluate_model(model, X_test, y_test) + mlflow.log_metrics(metrics) + + # Log model + mlflow.sklearn.log_model(model, "model") + +if __name__ == "__main__": + train() +``` + +### Continuous Training Pipeline + +```python +# ✅ CORRECT: Automated retraining pipeline +from datetime import datetime +import mlflow + +def continuous_training_pipeline(): + """Retrain model if performance degrades.""" + # Load production model + prod_model = mlflow.pyfunc.load_model("models:/my_model/Production") + + # Load recent data + recent_data = load_recent_data() + + # Evaluate production model on recent data + prod_metrics = evaluate_model(prod_model, recent_data) + + # Check if retraining needed + if prod_metrics['accuracy'] < 0.85: # Threshold + print("Performance degraded, retraining...") + + with mlflow.start_run(run_name=f"retrain_{datetime.now().isoformat()}"): + # Log why retraining + mlflow.set_tag("retrain_reason", "accuracy_below_threshold") + mlflow.log_metric("prod_accuracy", prod_metrics['accuracy']) + + # Train new model + new_model = train_model(load_training_data()) + + # Evaluate new model + new_metrics = evaluate_model(new_model, recent_data) + mlflow.log_metrics(new_metrics) + + # If better, register and promote + if new_metrics['accuracy'] > prod_metrics['accuracy']: + mlflow.sklearn.log_model(new_model, "model") + + # Register new version + model_uri = f"runs:/{mlflow.active_run().info.run_id}/model" + model_version = mlflow.register_model(model_uri, "my_model") + + # Promote to production + client = MlflowClient() + client.transition_model_version_stage( + name="my_model", + version=model_version.version, + stage="Production", + archive_existing_versions=True + ) +``` + + +## Anti-Patterns + +### Not Tracking Experiments + +```python +# ❌ WRONG: No tracking +for lr in [0.001, 0.01, 0.1]: + model = train_model(lr) + print(f"LR={lr}, Accuracy={evaluate(model)}") +# Which LR was best? Lost after terminal closes. + +# ✅ CORRECT: Track everything +for lr in [0.001, 0.01, 0.1]: + with mlflow.start_run(): + mlflow.log_param("learning_rate", lr) + model = train_model(lr) + acc = evaluate(model) + mlflow.log_metric("accuracy", acc) + mlflow.sklearn.log_model(model, "model") +``` + +### Non-Reproducible Data Splits + +```python +# ❌ WRONG: Random split without seed +X_train, X_test = train_test_split(X, y, test_size=0.2) +# Different split every run! + +# ✅ CORRECT: Fixed seed +X_train, X_test = train_test_split(X, y, test_size=0.2, random_state=42) +``` + +### Hardcoded Paths and Parameters + +```python +# ❌ WRONG: Hardcoded values +data = pd.read_csv("/home/user/data/train.csv") +model = RandomForestClassifier(n_estimators=100, max_depth=10) + +# ✅ CORRECT: Config-driven +@hydra.main(config_path=".", config_name="config", version_base=None) +def train(cfg: DictConfig): + data = pd.read_csv(cfg.data.train_path) + model = RandomForestClassifier( + n_estimators=cfg.model.n_estimators, + max_depth=cfg.model.max_depth + ) +``` + + +## Decision Trees + +### Should I Track This Experiment? + +``` +Is this a throwaway experiment? +├─ Yes (just testing) → Maybe skip tracking +└─ No → ALWAYS TRACK + ├─ Comparing models → Track + ├─ Tuning hyperparameters → Track + ├─ Production candidate → Track + └─ Debugging → Track (helps identify issues) +``` + +### When to Register a Model? + +``` +Is model for production use? +├─ Yes → Register in model registry +│ ├─ Test in staging first +│ └─ Promote to production after validation +└─ No (experiment only) → Log but don't register +``` + + +## Integration with Other Skills + +**After using this skill:** +- If profiling ML code → See @debugging-and-profiling +- If optimizing data processing → See @scientific-computing-foundations +- If setting up CI/CD → See @project-structure-and-tooling + +**Before using this skill:** +- If setting up project → Use @project-structure-and-tooling first +- If data processing slow → Use @scientific-computing-foundations to optimize + + +## Quick Reference + +### MLflow Essential Commands + +```python +# Start run +with mlflow.start_run(): + mlflow.log_param("param_name", value) + mlflow.log_metric("metric_name", value) + mlflow.log_artifact("file.png") + mlflow.sklearn.log_model(model, "model") + +# Register model +mlflow.register_model("runs://model", "model_name") + +# Load model +model = mlflow.pyfunc.load_model("models:/model_name/Production") +``` + +### Hydra Essential Patterns + +```python +# Basic config +@hydra.main(config_path=".", config_name="config", version_base=None) +def main(cfg: DictConfig): + print(cfg.param) + +# Override from CLI +# python script.py param=value + +# Multirun +# python script.py -m param=1,2,3 +``` + +### Reproducibility Checklist + +- [ ] Set random seeds (Python, NumPy, PyTorch) +- [ ] Use fixed random_state in train_test_split +- [ ] Track data version/hash +- [ ] Log package versions +- [ ] Track preprocessing steps +- [ ] Version control code +- [ ] Use config files (don't hardcode) diff --git a/skills/using-python-engineering/modern-syntax-and-types.md b/skills/using-python-engineering/modern-syntax-and-types.md new file mode 100644 index 0000000..ccfb383 --- /dev/null +++ b/skills/using-python-engineering/modern-syntax-and-types.md @@ -0,0 +1,848 @@ + +# Modern Python Syntax and Types + +## Overview + +**Core Principle:** Type hints make code self-documenting and catch bugs before runtime. Python 3.10-3.12 introduced powerful type system features and syntax improvements. Use them. + +Modern Python is statically typed (optionally), with match statements, structural pattern matching, and cleaner syntax. The type system evolved dramatically: `|` union syntax (3.10), exception groups (3.11), PEP 695 generics (3.12). Master these to write production-quality Python. + +## When to Use + +**Use this skill when:** +- "mypy error: ..." or "pyright error: ..." +- Adding type hints to existing code +- Using Python 3.10+ features (match, | unions, generics) +- Configuring static type checkers +- Type errors with generics, protocols, or TypedDict + +**Don't use when:** +- Setting up project structure (use project-structure-and-tooling) +- Runtime type checking needed (use pydantic or similar) +- Performance optimization (use debugging-and-profiling) + +**Symptoms triggering this skill:** +- "Incompatible type" errors +- "How to type hint X?" +- "Use Python 3.12 features" +- "Configure mypy strict mode" + + +## Type Hints Fundamentals + +### Basic Annotations + +```python +# ❌ WRONG: No type hints +def calculate_total(prices, tax_rate): + return sum(prices) * (1 + tax_rate) + +# ✅ CORRECT: Clear types +def calculate_total(prices: list[float], tax_rate: float) -> float: + return sum(prices) * (1 + tax_rate) + +# Why this matters: Type checker catches calculate_total([1, 2], "0.1") +# immediately instead of failing at runtime with TypeError +``` + +### Built-in Collection Types (Python 3.9+) + +```python +# ❌ WRONG: Using typing.List, typing.Dict (deprecated) +from typing import List, Dict, Tuple + +def process(items: List[str]) -> Dict[str, int]: + return {item: len(item) for item in items} + +# ✅ CORRECT: Use built-in types directly (Python 3.9+) +def process(items: list[str]) -> dict[str, int]: + return {item: len(item) for item in items} + +# ✅ More complex built-ins +def transform(data: dict[str, list[int]]) -> tuple[int, ...]: + all_values = [v for values in data.values() for v in values] + return tuple(all_values) +``` + +**Why this matters**: Python 3.9+ supports `list[T]` directly. Using `typing.List` is deprecated and adds unnecessary imports. + +### Optional and None + +```python +# ❌ WRONG: Using Optional without understanding +from typing import Optional + +def get_user(id: int) -> Optional[dict]: + # Returns dict or None, but which dict structure? + ... + +# ✅ CORRECT: Use | None (Python 3.10+) with specific types +from dataclasses import dataclass + +@dataclass +class User: + id: int + name: str + email: str + +def get_user(id: int) -> User | None: + # Clear: Returns User or None + if user_exists(id): + return User(id=id, name="...", email="...") + return None + +# Using the result +user = get_user(123) +if user is not None: # Type checker knows user is User here + print(user.name) +``` + +**Why this matters**: `Optional[X]` is just `X | None`. Python 3.10+ syntax is clearer. TypedDict or dataclass is better than raw dict. + +### Union Types + +```python +# ❌ WRONG: Old-style Union (Python <3.10) +from typing import Union + +def process(value: Union[str, int, float]) -> Union[str, bool]: + ... + +# ✅ CORRECT: Use | operator (Python 3.10+) +def process(value: str | int | float) -> str | bool: + if isinstance(value, str): + return value.upper() + return value > 0 + +# ✅ Multiple returns with | None +def parse_config(path: str) -> dict[str, str] | None: + try: + with open(path) as f: + return json.load(f) + except FileNotFoundError: + return None +``` + +**Why this matters**: `|` is PEP 604, available Python 3.10+. Cleaner, more readable, Pythonic. No imports needed. + +### Type Aliases + +```python +# ❌ WRONG: Reusing complex types +def process_users(users: list[dict[str, str | int]]) -> dict[str, list[dict[str, str | int]]]: + ... + +# ✅ CORRECT: Type alias for readability +UserDict = dict[str, str | int] +UserMap = dict[str, list[UserDict]] + +def process_users(users: list[UserDict]) -> UserMap: + return {"active": [u for u in users if u.get("active")]} + +# ✅ BETTER: Use TypedDict for structure +from typing import TypedDict + +class User(TypedDict): + id: int + name: str + email: str + active: bool + +def process_users(users: list[User]) -> dict[str, list[User]]: + return {"active": [u for u in users if u["active"]]} +``` + +**Why this matters**: Type aliases improve readability. TypedDict provides structure validation for dict types. + + +## Advanced Typing + +### Generics with TypeVar + +```python +from typing import TypeVar + +T = TypeVar('T') + +# ✅ Generic function +def first(items: list[T]) -> T | None: + return items[0] if items else None + +# Usage: type checker knows the return type +names: list[str] = ["Alice", "Bob"] +first_name: str | None = first(names) # Type checker infers str | None + +numbers: list[int] = [1, 2, 3] +first_num: int | None = first(numbers) # Type checker infers int | None + +# ✅ Generic class (old style) +class Container(Generic[T]): + def __init__(self, value: T) -> None: + self.value = value + + def get(self) -> T: + return self.value + +# Usage +container: Container[int] = Container(42) +value: int = container.get() # Type checker knows it's int +``` + +### Python 3.12+ Generics (PEP 695) + +```python +# ❌ WRONG: Old-style generic syntax (still works but verbose) +from typing import TypeVar, Generic + +T = TypeVar('T') + +class Container(Generic[T]): + def __init__(self, value: T) -> None: + self.value = value + +# ✅ CORRECT: Python 3.12+ PEP 695 syntax +class Container[T]: + def __init__(self, value: T) -> None: + self.value = value + + def get(self) -> T: + return self.value + +# ✅ Generic function with PEP 695 +def first[T](items: list[T]) -> T | None: + return items[0] if items else None + +# ✅ Multiple type parameters +class Pair[T, U]: + def __init__(self, first: T, second: U) -> None: + self.first = first + self.second = second + + def get_first(self) -> T: + return self.first + + def get_second(self) -> U: + return self.second + +# Usage +pair: Pair[str, int] = Pair("answer", 42) +``` + +**Why this matters**: PEP 695 (Python 3.12+) simplifies generic syntax. No TypeVar needed. Cleaner, more readable. + +### Bounded TypeVars + +```python +# ✅ TypeVar with bounds (works with old and new syntax) +from typing import TypeVar + +# Bound to specific type +T_Number = TypeVar('T_Number', bound=int | float) + +def add[T: int | float](a: T, b: T) -> T: # Python 3.12+ syntax + return a + b # Type checker knows a and b support + + +# ✅ Constrained to specific types only +T_Scalar = TypeVar('T_Scalar', int, float, str) + +def format_value(value: T_Scalar) -> str: + return str(value) + +# Usage +result: int = add(1, 2) # OK +result2: float = add(1.5, 2.5) # OK +# result3 = add("a", "b") # mypy error: str not compatible with int | float +``` + +### Protocol (Structural Subtyping) + +```python +from typing import Protocol + +# ✅ Define protocol for duck typing +class Drawable(Protocol): + def draw(self) -> None: ... + +class Circle: + def draw(self) -> None: + print("Drawing circle") + +class Square: + def draw(self) -> None: + print("Drawing square") + +# Works without inheritance - structural typing +def render(shape: Drawable) -> None: + shape.draw() + +# Usage - no need to inherit from Drawable +circle = Circle() +square = Square() +render(circle) # OK +render(square) # OK + +# ❌ WRONG: Using ABC when Protocol is better +from abc import ABC, abstractmethod + +class DrawableABC(ABC): + @abstractmethod + def draw(self) -> None: ... + +# Now Circle must inherit from DrawableABC - too rigid! +``` + +**Why this matters**: Protocol enables structural typing (duck typing with type safety). No inheritance needed. More Pythonic than ABC for many cases. + +### TypedDict + +```python +from typing import TypedDict + +# ✅ Define structured dict types +class UserDict(TypedDict): + id: int + name: str + email: str + active: bool + +def create_user(data: UserDict) -> UserDict: + # Type checker ensures all required keys present + return data + +# Usage +user: UserDict = { + "id": 1, + "name": "Alice", + "email": "alice@example.com", + "active": True +} + +# mypy error: Missing key "active" +# bad_user: UserDict = {"id": 1, "name": "Alice", "email": "alice@example.com"} + +# ✅ Optional fields +class UserDictOptional(TypedDict, total=False): + bio: str + avatar_url: str + +# ✅ Combining required and optional +class User(TypedDict): + id: int + name: str + +class UserWithOptional(User, total=False): + email: str + bio: str +``` + +**Why this matters**: TypedDict provides structure for dict types. Better than `dict[str, Any]`. Type checker validates keys and value types. + + +## Python 3.10+ Features + +### Match Statements (Structural Pattern Matching) + +```python +# ❌ WRONG: Long if-elif chains +def handle_response(response): + if response["status"] == 200: + return response["data"] + elif response["status"] == 404: + return None + elif response["status"] in [500, 502, 503]: + raise ServerError() + else: + raise UnknownError() + +# ✅ CORRECT: Match statement (Python 3.10+) +def handle_response(response: dict[str, Any]) -> Any: + match response["status"]: + case 200: + return response["data"] + case 404: + return None + case 500 | 502 | 503: + raise ServerError() + case _: + raise UnknownError() + +# ✅ Pattern matching with structure +def process_command(command: dict[str, Any]) -> str: + match command: + case {"action": "create", "type": "user", "data": data}: + return create_user(data) + case {"action": "delete", "type": "user", "id": user_id}: + return delete_user(user_id) + case {"action": action, "type": type_}: + return f"Unknown action {action} for {type_}" + case _: + return "Invalid command" + +# ✅ Matching class instances +from dataclasses import dataclass + +@dataclass +class Point: + x: float + y: float + +def describe_point(point: Point) -> str: + match point: + case Point(x=0, y=0): + return "Origin" + case Point(x=0, y=y): + return f"On Y-axis at {y}" + case Point(x=x, y=0): + return f"On X-axis at {x}" + case Point(x=x, y=y) if x == y: + return f"On diagonal at ({x}, {y})" + case Point(x=x, y=y): + return f"At ({x}, {y})" +``` + +**Why this matters**: Match statements are more readable than if-elif chains for complex conditionals. Pattern matching extracts values directly. + + +## Python 3.11 Features + +### Exception Groups + +```python +# ❌ WRONG: Can't handle multiple exceptions from concurrent tasks +async def fetch_all(urls: list[str]) -> list[str]: + results = [] + for url in urls: + try: + results.append(await fetch(url)) + except Exception as e: + # Only logs first error, continues + log.error(f"Failed to fetch {url}: {e}") + return results + +# ✅ CORRECT: Python 3.11 exception groups +async def fetch_all(urls: list[str]) -> list[str]: + async with asyncio.TaskGroup() as tg: + tasks = [tg.create_task(fetch(url)) for url in urls] + # If any fail, TaskGroup raises ExceptionGroup + return [task.result() for task in tasks] + +# Handling exception groups +try: + results = await fetch_all(urls) +except* TimeoutError as e: + # Handle all TimeoutErrors + log.error(f"Timeouts: {e.exceptions}") +except* ConnectionError as e: + # Handle all ConnectionErrors + log.error(f"Connection errors: {e.exceptions}") + +# ✅ Creating exception groups manually +errors = [ValueError("Invalid user 1"), ValueError("Invalid user 2")] +raise ExceptionGroup("Validation errors", errors) +``` + +**Why this matters**: Exception groups handle multiple exceptions from concurrent operations. Essential for structured concurrency (TaskGroup). + +### Better Error Messages + +Python 3.11 improved error messages significantly: + +```python +# Python 3.10 error: +# TypeError: 'NoneType' object is not subscriptable + +# Python 3.11 error with exact location: +# TypeError: 'NoneType' object is not subscriptable +# user["name"] +# ^^^^^^^^^^^^ + +# Helpful for nested expressions +result = data["users"][0]["profile"]["settings"]["theme"] +# Python 3.11 shows exactly which part is None +``` + +**Why this matters**: Better error messages speed up debugging. Exact location highlighted. + + +## Python 3.12 Features + +### PEP 695 Type Parameter Syntax + +Already covered in Generics section above. Key improvement: cleaner syntax for generic classes and functions. + +```python +# Old style (still works) +from typing import TypeVar, Generic +T = TypeVar('T') +class Box(Generic[T]): + ... + +# Python 3.12+ style +class Box[T]: + ... +``` + +### @override Decorator + +```python +from typing import override + +class Base: + def process(self) -> None: + print("Base process") + +class Derived(Base): + @override + def process(self) -> None: # OK - overriding Base.process + print("Derived process") + + @override + def compute(self) -> None: # mypy error: Base has no method 'compute' + print("New method") + +# Why use @override: +# 1. Documents intent explicitly +# 2. Type checker catches typos (processs vs process) +# 3. Catches issues when base class changes +``` + +**Why this matters**: @override makes intent explicit and catches errors when base class changes or method names have typos. + +### f-string Improvements + +```python +# Python 3.12 allows more complex expressions in f-strings + +# ✅ Reusing quotes in f-strings +value = "test" +result = f"Value is {value.replace('t', 'T')}" # Works in 3.12 + +# ✅ Multi-line f-strings with backslashes +message = f"Processing { + len(items) +} items" + +# ✅ f-string debugging with = (since 3.8, improved in 3.12) +x = 42 +print(f"{x=}") # Output: x=42 +print(f"{x * 2=}") # Output: x * 2=84 +``` + + +## Static Analysis Setup + +### mypy Configuration + +**File:** `pyproject.toml` + +```toml +[tool.mypy] +python_version = "3.12" +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_unreachable = true +strict_equality = true +strict = true + +# Per-module options +[[tool.mypy.overrides]] +module = "tests.*" +disallow_untyped_defs = false # Tests can be less strict + +[[tool.mypy.overrides]] +module = "third_party.*" +ignore_missing_imports = true +``` + +**Strict mode breakdown:** + +- `strict = true`: Enables all strict checks +- `disallow_untyped_defs`: All functions must have type hints +- `warn_return_any`: Warn when returning Any type +- `warn_unused_ignores`: Warn on unnecessary `# type: ignore` + +**When to use strict mode:** +- New projects: Start strict from day 1 +- Existing projects: Enable incrementally per module + +### pyright Configuration + +**File:** `pyproject.toml` + +```toml +[tool.pyright] +pythonVersion = "3.12" +typeCheckingMode = "strict" +reportMissingTypeStubs = false +reportUnknownMemberType = false + +# Stricter checks +reportUnusedImport = true +reportUnusedVariable = true +reportDuplicateImport = true + +# Exclude patterns +exclude = [ + "**/__pycache__", + "**/node_modules", + ".venv", +] +``` + +**pyright vs mypy:** +- pyright: Faster, better IDE integration, stricter by default +- mypy: More configurable, wider adoption, plugin ecosystem + +**Recommendation**: Use both if possible. pyright in IDE, mypy in CI. + +### Dealing with Untyped Libraries + +```python +# ❌ WRONG: Silencing all errors +import untyped_lib # type: ignore + +# ✅ CORRECT: Create stub file +# File: stubs/untyped_lib.pyi +def important_function(x: int, y: str) -> bool: ... +class ImportantClass: + def method(self, value: int) -> None: ... + +# Configure mypy to find stubs +# pyproject.toml: +# mypy_path = "stubs" + +# ✅ Use # type: ignore with explanation +from untyped_lib import obscure_function # type: ignore[import] # TODO: Add stub + +# ✅ Use cast when library returns Any +from typing import cast +result = cast(list[int], untyped_lib.get_items()) +``` + +**Why this matters**: Stubs preserve type safety even with untyped libraries. Type: ignore should be specific and documented. + + +## Common Type Errors and Fixes + +### Incompatible Types + +```python +# mypy error: Incompatible types in assignment (expression has type "str | None", variable has type "str") + +# ❌ WRONG: Ignoring the error +name: str = get_name() # type: ignore + +# ✅ CORRECT: Handle None case +name: str | None = get_name() +if name is not None: + process_name(name) + +# ✅ CORRECT: Provide default +name: str = get_name() or "default" + +# ✅ CORRECT: Assert if you're certain +name = get_name() +assert name is not None +process_name(name) +``` + +### List/Dict Invariance + +```python +# mypy error: Argument has incompatible type "list[int]"; expected "list[float]" + +def process_numbers(numbers: list[float]) -> None: + ... + +int_list: list[int] = [1, 2, 3] +# process_numbers(int_list) # mypy error! + +# Why: Lists are mutable. If process_numbers did numbers.append(3.14), +# it would break int_list type safety + +# ✅ CORRECT: Use Sequence for read-only +from collections.abc import Sequence + +def process_numbers(numbers: Sequence[float]) -> None: + # Can't modify, so safe to accept list[int] + ... + +process_numbers(int_list) # OK now +``` + +### Missing Return Type + +```python +# mypy error: Function is missing a return type annotation + +# ❌ WRONG: No return type +def calculate(x, y): + return x + y + +# ✅ CORRECT: Add return type +def calculate(x: int, y: int) -> int: + return x + y + +# ✅ Functions that don't return +def log_message(message: str) -> None: + print(message) +``` + +### Generic Type Issues + +```python +# mypy error: Need type annotation for 'items' + +# ❌ WRONG: No type for empty container +items = [] +items.append(1) # mypy can't infer type + +# ✅ CORRECT: Explicit type annotation +items: list[int] = [] +items.append(1) + +# ✅ CORRECT: Initialize with values +items = [1, 2, 3] # mypy infers list[int] +``` + + +## Anti-Patterns + +### Over-Typing + +```python +# ❌ WRONG: Too specific, breaks flexibility +def process_items(items: list[str]) -> list[str]: + return [item.upper() for item in items] + +# Can't pass tuple, generator, or other iterables + +# ✅ CORRECT: Use abstract types +from collections.abc import Sequence + +def process_items(items: Sequence[str]) -> list[str]: + return [item.upper() for item in items] + +# Now works with list, tuple, etc. +``` + +### Type: Ignore Abuse + +```python +# ❌ WRONG: Blanket ignore +def sketchy_function(data): # type: ignore + return data["key"] + +# ✅ CORRECT: Specific ignore with comment +def legacy_integration(data: dict[str, Any]) -> Any: + # type: ignore[no-untyped-def] # TODO(#123): Add proper types + return data["key"] + +# ✅ BETTER: Fix the issue +def fixed_integration(data: dict[str, str]) -> str: + return data["key"] +``` + +### Using Any Everywhere + +```python +# ❌ WRONG: Any defeats the purpose of types +def process(data: Any) -> Any: + return data.transform() + +# ✅ CORRECT: Use specific types +from typing import Protocol + +class Transformable(Protocol): + def transform(self) -> str: ... + +def process(data: Transformable) -> str: + return data.transform() +``` + +### Incompatible Generics + +```python +# ❌ WRONG: Generic type mismatch +T = TypeVar('T') + +def combine(a: list[T], b: list[T]) -> list[T]: + return a + b + +ints: list[int] = [1, 2] +strs: list[str] = ["a", "b"] +# result = combine(ints, strs) # mypy error: incompatible types + +# ✅ CORRECT: Different type parameters +T1 = TypeVar('T1') +T2 = TypeVar('T2') + +def combine_any[T1, T2](a: list[T1], b: list[T2]) -> list[T1 | T2]: + return a + b # type: ignore[return-value] # Runtime works, typing is complex + +# ✅ BETTER: Keep types consistent +result_ints = combine(ints, [3, 4]) # OK: both list[int] +``` + + +## Decision Trees + +### When to Use Which Type? + +**For functions accepting sequences:** +``` +Read-only? → Sequence[T] +Need indexing? → Sequence[T] +Need mutation? → list[T] +Large data? → Iterator[T] or Generator[T] +``` + +**For dictionary-like types:** +``` +Known structure? → TypedDict +Dynamic keys? → dict[K, V] +Protocol needed? → Mapping[K, V] (read-only) +Need mutation? → MutableMapping[K, V] +``` + +**For optional values:** +``` +Can be None? → T | None +Has default? → T with default parameter +Really optional? → T | None in TypedDict(total=False) +``` + + +## Integration with Other Skills + +**After using this skill:** +- If setting up project → See @project-structure-and-tooling for mypy in pyproject.toml +- If fixing lint → See @systematic-delinting for type-related lint rules +- If testing typed code → See @testing-and-quality for pytest type checking + +**Before using this skill:** +- Setup mypy → Use @project-structure-and-tooling first + + +## Quick Reference + +| Python Version | Key Type Features | +|----------------|-------------------| +| 3.9 | Built-in generics (`list[T]` instead of `List[T]`) | +| 3.10 | Union with `|`, match statements, ParamSpec | +| 3.11 | Exception groups, Self type, better errors | +| 3.12 | PEP 695 generics, @override decorator | + +**Most impactful features:** +1. `| None` instead of `Optional` (3.10+) +2. Built-in generics: `list[T]` not `List[T]` (3.9+) +3. PEP 695: `class Box[T]` not `class Box(Generic[T])` (3.12+) +4. Match statements for complex conditionals (3.10+) +5. @override for explicit method overriding (3.12+) diff --git a/skills/using-python-engineering/project-structure-and-tooling.md b/skills/using-python-engineering/project-structure-and-tooling.md new file mode 100644 index 0000000..2e59b08 --- /dev/null +++ b/skills/using-python-engineering/project-structure-and-tooling.md @@ -0,0 +1,1593 @@ + +# Project Structure and Tooling + +## Overview + +**Core Principle:** Project setup is infrastructure. Good infrastructure is invisible when working, painful when missing. Set it up once, benefit forever. + +Modern Python projects use `pyproject.toml` for all configuration, `ruff` for linting and formatting, `mypy` for type checking, and `pre-commit` for automated quality gates. The choice between src layout and flat layout determines import patterns and package discoverability. + +This skill covers SETUP of tooling. For FIXING lint warnings systematically, see `systematic-delinting`. + +## When to Use + +**Use this skill when:** +- Starting a new Python project +- "How should I structure my project?" +- Setting up pyproject.toml +- Configuring ruff, mypy, or pre-commit +- "What dependency manager should I use?" +- Packaging Python projects for distribution + +**Don't use when:** +- Fixing existing lint warnings (use systematic-delinting) +- Writing type hints (use modern-syntax-and-types) +- Setting up tests (use testing-and-quality) + +**Symptoms triggering this skill:** +- "New Python project setup" +- "Configure ruff/black/mypy" +- "src layout vs flat layout" +- "Poetry vs pip-tools" +- "Package my project" + + +## Project Layout Decisions + +### Src Layout vs Flat Layout + +**Decision tree:** +``` +Distributing as package? → src layout +Testing import behavior? → src layout +Simple script/app? → flat layout +Learning project? → flat layout +Production library? → src layout +``` + +### Flat Layout + +``` +my_project/ +├── pyproject.toml +├── README.md +├── my_package/ +│ ├── __init__.py +│ ├── module1.py +│ └── module2.py +└── tests/ + ├── __init__.py + ├── test_module1.py + └── test_module2.py +``` + +**Pros:** +- Simpler structure +- Easier to understand for beginners +- Fewer directories + +**Cons:** +- Can accidentally import from source instead of installed package +- Harder to test actual install behavior +- Package and project root mixed + +**Use when:** +- Simple applications +- Learning projects +- Not distributing as package + +### Src Layout (Recommended for Libraries) + +``` +my_project/ +├── pyproject.toml +├── README.md +├── src/ +│ └── my_package/ +│ ├── __init__.py +│ ├── module1.py +│ └── module2.py +└── tests/ + ├── __init__.py + ├── test_module1.py + └── test_module2.py +``` + +**Pros:** +- Forces testing against installed package +- Clear separation: src/ is package, tests/ is tests +- Prevents accidental imports from source +- Industry standard for libraries + +**Cons:** +- One extra directory level +- Slightly more complex + +**Use when:** +- Creating a library +- Distributing on PyPI +- Want production-quality setup + +**Why this matters**: Src layout forces you to install your package in editable mode (`pip install -e .`), ensuring tests run against the installed package, not loose Python files. Catches import issues early. + + +## pyproject.toml Fundamentals + +### Basic Structure + +**File:** `pyproject.toml` + +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "my-package" +version = "0.1.0" +description = "A short description" +authors = [ + {name = "Your Name", email = "your.email@example.com"} +] +readme = "README.md" +requires-python = ">=3.12" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "requests>=2.31.0", + "pydantic>=2.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "mypy>=1.5.0", + "ruff>=0.1.0", +] + +[project.urls] +Homepage = "https://github.com/username/my-package" +Documentation = "https://my-package.readthedocs.io" +Repository = "https://github.com/username/my-package" + +[tool.ruff] +target-version = "py312" +line-length = 140 + +[tool.mypy] +python_version = "3.12" +strict = true +``` + +**Why this matters**: Single file for all configuration. No setup.py, setup.cfg, or scattered config files. Modern standard (PEP 621). + +### Build System Selection + +**hatchling (recommended for most projects):** +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" +``` + +**setuptools (traditional, still common):** +```toml +[build-system] +requires = ["setuptools>=68.0.0", "wheel"] +build-backend = "setuptools.build_meta" +``` + +**poetry (if using Poetry for dependencies):** +```toml +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" +``` + +**Decision tree:** +``` +Using Poetry for deps? → poetry-core +Need advanced features? → setuptools +Simple project? → hatchling +``` + +**Why hatchling?** +- Modern, fast, minimal configuration +- Good defaults +- Works with standard tools +- No legacy baggage + +### Version Management + +**Static version:** +```toml +[project] +version = "0.1.0" +``` + +**Dynamic version from file:** +```toml +[project] +dynamic = ["version"] + +[tool.hatch.version] +path = "src/my_package/__init__.py" +``` + +**File:** `src/my_package/__init__.py` +```python +__version__ = "0.1.0" +``` + +**Dynamic version from git tag:** +```toml +[build-system] +requires = ["hatchling", "hatch-vcs"] + +[tool.hatch.version] +source = "vcs" +``` + +**Recommendation**: Start with static version. Add dynamic versioning when you need it. + + +## Ruff Configuration + +### Core Configuration + +**File:** `pyproject.toml` + +```toml +[tool.ruff] +target-version = "py312" +line-length = 140 # Note: 140, not default 88 + +# Exclude patterns +exclude = [ + ".git", + ".venv", + "__pycache__", + "build", + "dist", + "*.egg-info", +] + +[tool.ruff.lint] +# Enable rule sets +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "N", # pep8-naming + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "SIM", # flake8-simplify + "RUF", # ruff-specific +] + +# Ignore specific rules +ignore = [ + "E501", # Line too long (handled by formatter) +] + +# Per-file ignores +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = [ + "S101", # Allow assert in tests +] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +line-ending = "auto" +``` + +**Why line-length = 140?** +- Modern screens are wide +- Default 88 is too restrictive for complex type hints +- 140 balances readability and fitting multiple windows +- Industry trend toward 100-140 + +**Rule set breakdown:** + +| Set | Purpose | Example Rules | +|-----|---------|---------------| +| E/W | PEP 8 style | Whitespace, indentation | +| F | Logical errors | Undefined names, unused imports | +| I | Import sorting | isort compatibility | +| N | Naming | PEP 8 naming conventions | +| UP | Python upgrades | Use Python 3.10+ features | +| B | Bug detection | Likely bugs (mutable defaults) | +| C4 | Comprehensions | Better list/dict comprehensions | +| SIM | Simplification | Simplify complex code | +| RUF | Ruff-specific | Ruff's custom checks | + +### Import Sorting (isort compatibility) + +```toml +[tool.ruff.lint.isort] +known-first-party = ["my_package"] +known-third-party = ["numpy", "pandas"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] +lines-after-imports = 2 +``` + +**Expected import order:** +```python +# Future imports +from __future__ import annotations + +# Standard library +import json +import sys +from pathlib import Path + +# Third-party +import numpy as np +import pandas as pd +import requests + +# First-party +from my_package import utils +from my_package.core import Engine + + +def my_function(): + ... +``` + +**Why this matters**: Consistent import ordering improves readability and prevents merge conflicts. + +### Advanced Configuration + +```toml +[tool.ruff.lint.flake8-bugbear] +# Extend immutable calls (prevent mutation) +extend-immutable-calls = ["fastapi.Depends", "fastapi.Query"] + +[tool.ruff.lint.flake8-quotes] +docstring-quotes = "double" +inline-quotes = "double" + +[tool.ruff.lint.mccabe] +max-complexity = 10 + +[tool.ruff.lint.pydocstyle] +convention = "google" # or "numpy", "pep257" +``` + +**Complexity limit explanation:** +- Complexity < 10: Good +- 10-15: Acceptable, monitor +- 15+: Refactor + + +## Type Checking with mypy + +### Strict Configuration + +**File:** `pyproject.toml` + +```toml +[tool.mypy] +python_version = "3.12" +strict = true + +# Strict mode includes: +# - warn_return_any +# - warn_unused_configs +# - disallow_untyped_defs +# - disallow_any_generics +# - disallow_subclassing_any +# - disallow_untyped_calls +# - disallow_untyped_decorators +# - disallow_incomplete_defs +# - check_untyped_defs +# - warn_redundant_casts +# - warn_unused_ignores +# - warn_no_return +# - warn_unreachable +# - strict_equality + +# Exclude patterns +exclude = [ + "^build/", + "^dist/", +] + +# Per-module overrides +[[tool.mypy.overrides]] +module = "tests.*" +disallow_untyped_defs = false # Tests can be less strict + +[[tool.mypy.overrides]] +module = "third_party.*" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "untyped_library" +ignore_missing_imports = true +``` + +### Incremental Adoption + +**Start lenient, get stricter:** + +```toml +# Phase 1: Basic type checking +[tool.mypy] +python_version = "3.12" +warn_return_any = true +warn_unused_configs = true + +# Phase 2: Add more checks +check_untyped_defs = true +warn_redundant_casts = true +warn_unused_ignores = true + +# Phase 3: Require types +disallow_untyped_defs = true +disallow_incomplete_defs = true + +# Phase 4: Full strict +strict = true +``` + +**Per-module migration:** + +```toml +[tool.mypy] +python_version = "3.12" +# Default: lenient + +[[tool.mypy.overrides]] +module = "my_package.new_module" +strict = true # New code is strict + +[[tool.mypy.overrides]] +module = "my_package.legacy" +ignore_errors = true # TODO: Fix legacy code +``` + +**Why this matters**: Incremental adoption prevents overwhelming backlog of type errors. Strict mode for new code, lenient for legacy. + + +## Dependency Management + +### pip-tools + +**Recommended for most projects. Simple, standard, no lock-in.** + +**Setup:** +```bash +pip install pip-tools +``` + +**File:** `requirements.in` (high-level dependencies) +``` +requests>=2.31.0 +pydantic>=2.0.0 +``` + +**Generate locked requirements:** +```bash +pip-compile requirements.in +# Creates requirements.txt with exact versions +``` + +**File:** `requirements.txt` (auto-generated) +``` +certifi==2023.7.22 + # via requests +charset-normalizer==3.2.0 + # via requests +idna==3.4 + # via requests +pydantic==2.3.0 + # via -r requirements.in +pydantic-core==2.6.3 + # via pydantic +requests==2.31.0 + # via -r requirements.in +urllib3==2.0.4 + # via requests +``` + +**Development dependencies:** + +**File:** `requirements-dev.in` +``` +-c requirements.txt # Constrain to production versions +pytest>=7.4.0 +mypy>=1.5.0 +ruff>=0.1.0 +``` + +**Compile:** +```bash +pip-compile requirements-dev.in +``` + +**Sync environment:** +```bash +pip-sync requirements.txt requirements-dev.txt +``` + +**Why pip-tools?** +- Uses standard requirements.txt format +- No proprietary lock file +- Simple mental model +- Works everywhere +- No lock-in + +### Poetry + +**Better for libraries, more features, heavier.** + +**Setup:** +```bash +curl -sSL https://install.python-poetry.org | python3 - +``` + +**File:** `pyproject.toml` +```toml +[tool.poetry] +name = "my-package" +version = "0.1.0" +description = "" +authors = ["Your Name "] + +[tool.poetry.dependencies] +python = "^3.12" +requests = "^2.31.0" +pydantic = "^2.0.0" + +[tool.poetry.group.dev.dependencies] +pytest = "^7.4.0" +mypy = "^1.5.0" +ruff = "^0.1.0" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" +``` + +**Commands:** +```bash +poetry install # Install dependencies +poetry add requests # Add dependency +poetry add --group dev pytest # Add dev dependency +poetry update # Update dependencies +poetry lock # Update lock file +poetry build # Build package +poetry publish # Publish to PyPI +``` + +**Why Poetry?** +- Manages dependencies AND build system +- Better dependency resolution +- Built-in virtual environment management +- Integrated publishing + +**Why NOT Poetry?** +- Heavier tool +- Proprietary lock format +- Slower than pip-tools +- Lock-in to Poetry workflow + +### Comparison Decision Tree + +``` +Publishing to PyPI? → Poetry (integrated workflow) +Simple project? → pip-tools (minimal) +Need reproducible builds? → Either (both lock) +Team unfamiliar with tools? → pip-tools (simpler) +Complex dependency constraints? → Poetry (better resolver) +CI/CD integration? → pip-tools (faster) +``` + + +## Pre-commit Hooks + +### Setup + +**Install:** +```bash +pip install pre-commit +``` + +**File:** `.pre-commit-config.yaml` + +```yaml +repos: + # Ruff for linting and formatting + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + # Run linter + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + # Run formatter + - id: ruff-format + + # mypy for type checking + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.0 + hooks: + - id: mypy + additional_dependencies: [types-requests] + + # Standard pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + args: [--maxkb=1000] + - id: check-merge-conflict + - id: check-case-conflict + + # Python-specific + - repo: https://github.com/pycqa/isort + rev: 5.12.0 + hooks: + - id: isort + name: isort (python) +``` + +**Install hooks:** +```bash +pre-commit install +``` + +**Run manually:** +```bash +pre-commit run --all-files +``` + +**Update hooks:** +```bash +pre-commit autoupdate +``` + +### Hook Selection Strategy + +**Essential hooks (always use):** +- `ruff` - Linting and formatting +- `trailing-whitespace` - Clean files +- `end-of-file-fixer` - Proper file endings +- `check-yaml` - YAML syntax +- `check-merge-conflict` - Prevent merge markers + +**Recommended hooks:** +- `mypy` - Type checking +- `check-toml` - pyproject.toml syntax +- `check-added-large-files` - Prevent large files + +**Optional hooks:** +- `pytest` - Run tests (slow!) +- `bandit` - Security checks +- `interrogate` - Docstring coverage + +**Why NOT include slow hooks:** +```yaml +# ❌ WRONG: Tests in pre-commit (too slow) +- repo: local + hooks: + - id: pytest + name: pytest + entry: pytest + language: system + pass_filenames: false +``` + +**Why this matters**: Pre-commit hooks run on EVERY commit. Keep them fast (<5 seconds total). Run tests in CI, not pre-commit. + +### Skipping Hooks + +**Skip all hooks (use sparingly):** +```bash +git commit --no-verify -m "Quick fix" +``` + +**Skip specific hook:** +```bash +SKIP=mypy git commit -m "WIP: type errors to fix" +``` + +**When to skip:** +- WIP commits on feature branch (will fix before PR) +- Emergency hotfixes (fix hooks after) +- Known false positives (fix hook config instead) + +**When NOT to skip:** +- Merging to main +- Creating PR +- "Too lazy to fix" ← Never valid reason + + +## Formatting and Linting Workflow + +### Ruff as Formatter and Linter + +**Ruff replaces: black, isort, flake8, pyupgrade, and more.** + +**Format code:** +```bash +ruff format . +``` + +**Check linting:** +```bash +ruff check . +``` + +**Fix auto-fixable issues:** +```bash +ruff check --fix . +``` + +**Show what would fix without changing:** +```bash +ruff check --fix --diff . +``` + +### IDE Integration + +**VS Code** (`.vscode/settings.json`): +```json +{ + "python.linting.enabled": true, + "python.linting.ruffEnabled": true, + "python.formatting.provider": "none", + "editor.formatOnSave": true, + "editor.defaultFormatter": "charliermarsh.ruff", + "[python]": { + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll": true, + "source.organizeImports": true + }, + "editor.defaultFormatter": "charliermarsh.ruff" + } +} +``` + +**PyCharm:** +- Install Ruff plugin +- Settings → Tools → Ruff → Enable +- Settings → Tools → Actions on Save → Ruff format + +**Why this matters**: Format on save prevents formatting commits. Linting in IDE catches issues before commit. + + +## Packaging and Distribution + +### Minimal Package + +**File structure:** +``` +my_package/ +├── pyproject.toml +├── README.md +├── LICENSE +└── src/ + └── my_package/ + ├── __init__.py + └── main.py +``` + +**File:** `pyproject.toml` +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "my-package" +version = "0.1.0" +description = "A short description" +readme = "README.md" +requires-python = ">=3.12" +license = {text = "MIT"} +authors = [ + {name = "Your Name", email = "your.email@example.com"} +] +classifiers = [ + "Programming Language :: Python :: 3.12", +] +dependencies = [] + +[project.urls] +Homepage = "https://github.com/username/my-package" +``` + +**Build:** +```bash +pip install build +python -m build +``` + +**Creates:** +``` +dist/ +├── my_package-0.1.0-py3-none-any.whl +└── my_package-0.1.0.tar.gz +``` + +### Publishing to PyPI + +**Test on TestPyPI first:** + +```bash +pip install twine + +# Upload to TestPyPI +twine upload --repository testpypi dist/* + +# Test install +pip install --index-url https://test.pypi.org/simple/ my-package +``` + +**Publish to real PyPI:** + +```bash +twine upload dist/* +``` + +**Better: Use GitHub Actions** + +**File:** `.github/workflows/publish.yml` +```yaml +name: Publish to PyPI + +on: + release: + types: [published] + +jobs: + publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: "3.12" + + - name: Install build + run: pip install build twine + + - name: Build package + run: python -m build + + - name: Publish to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: twine upload dist/* +``` + +**Why this matters**: Automated publishing on GitHub release. Consistent process, no manual uploads. + +### Entry Points + +**Console scripts:** + +```toml +[project.scripts] +my-cli = "my_package.cli:main" +my-tool = "my_package.tools:run" +``` + +**Creates command-line tools:** +```bash +pip install my-package +my-cli --help # Runs my_package.cli:main() +``` + +**File:** `src/my_package/cli.py` +```python +def main() -> None: + print("Hello from my-cli!") + +if __name__ == "__main__": + main() +``` + + +## Complete Example: Production Project + +### Project Structure + +``` +awesome_project/ +├── .github/ +│ └── workflows/ +│ ├── ci.yml +│ └── publish.yml +├── .pre-commit-config.yaml +├── pyproject.toml +├── README.md +├── LICENSE +├── .gitignore +├── src/ +│ └── awesome_project/ +│ ├── __init__.py +│ ├── core.py +│ ├── utils.py +│ └── py.typed +└── tests/ + ├── __init__.py + ├── test_core.py + └── test_utils.py +``` + +### pyproject.toml (Complete) + +```toml +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "awesome-project" +version = "0.1.0" +description = "An awesome Python project" +readme = "README.md" +requires-python = ">=3.12" +license = {text = "MIT"} +authors = [ + {name = "Your Name", email = "your.email@example.com"} +] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3 :: Only", + "Typing :: Typed", +] +dependencies = [ + "requests>=2.31.0", + "pydantic>=2.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "pytest-cov>=4.1.0", + "mypy>=1.5.0", + "ruff>=0.1.0", + "pre-commit>=3.5.0", + "types-requests>=2.31.0", +] + +[project.urls] +Homepage = "https://github.com/username/awesome-project" +Documentation = "https://awesome-project.readthedocs.io" +Repository = "https://github.com/username/awesome-project" +Issues = "https://github.com/username/awesome-project/issues" + +[project.scripts] +awesome = "awesome_project.cli:main" + +# Ruff configuration +[tool.ruff] +target-version = "py312" +line-length = 140 + +exclude = [ + ".git", + ".venv", + "__pycache__", + "build", + "dist", + "*.egg-info", +] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "W", # pycodestyle warnings + "F", # pyflakes + "I", # isort + "N", # pep8-naming + "UP", # pyupgrade + "B", # flake8-bugbear + "C4", # flake8-comprehensions + "SIM", # flake8-simplify + "RUF", # ruff-specific +] + +ignore = [ + "E501", # Line too long (handled by formatter) +] + +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = [ + "S101", # Allow assert in tests +] + +[tool.ruff.lint.isort] +known-first-party = ["awesome_project"] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" + +# mypy configuration +[tool.mypy] +python_version = "3.12" +strict = true +warn_return_any = true +warn_unused_configs = true +disallow_untyped_defs = true + +[[tool.mypy.overrides]] +module = "tests.*" +disallow_untyped_defs = false + +# pytest configuration +[tool.pytest.ini_options] +testpaths = ["tests"] +python_files = ["test_*.py"] +python_functions = ["test_*"] +addopts = [ + "--strict-markers", + "--strict-config", + "--cov=awesome_project", + "--cov-report=term-missing", +] + +# Coverage configuration +[tool.coverage.run] +source = ["src"] +omit = ["tests/*"] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] +``` + +### .pre-commit-config.yaml (Complete) + +```yaml +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format + + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.7.0 + hooks: + - id: mypy + additional_dependencies: [types-requests, pydantic] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + - id: check-merge-conflict +``` + +### .gitignore (Complete) + +```gitignore +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +venv/ +env/ +ENV/ +.venv + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Testing +.pytest_cache/ +.coverage +htmlcov/ +.tox/ + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Ruff +.ruff_cache/ + +# OS +.DS_Store +Thumbs.db +``` + +### CI Workflow + +**File:** `.github/workflows/ci.yml` + +```yaml +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: ["3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + pip install -e ".[dev]" + + - name: Run ruff (lint) + run: ruff check . + + - name: Run ruff (format check) + run: ruff format --check . + + - name: Run mypy + run: mypy src/ + + - name: Run pytest + run: pytest --cov --cov-report=xml + + - name: Upload coverage + uses: codecov/codecov-action@v3 + if: matrix.os == 'ubuntu-latest' +``` + + +## Anti-Patterns + +### Scattered Configuration Files + +``` +# ❌ WRONG: Configuration in multiple files +setup.py +setup.cfg +requirements.txt +requirements-dev.txt +.flake8 +mypy.ini +pytest.ini +.isort.cfg +``` + +```toml +# ✅ CORRECT: Single pyproject.toml +# All configuration in one place +[tool.ruff] +... + +[tool.mypy] +... + +[tool.pytest.ini_options] +... +``` + +**Why this matters**: Single source of truth. Easier to maintain, version control, and share. + +### Not Using Src Layout for Libraries + +``` +# ❌ WRONG: Flat layout for distributed package +my_package/ +├── my_package/ +│ └── __init__.py +└── tests/ +``` + +**Problem**: Tests might pass locally but fail when installed: +```bash +# Works locally (imports from source) +pytest # PASS + +# Fails when installed (package not installed correctly) +pip install . +python -c "import my_package" # ImportError +``` + +``` +# ✅ CORRECT: Src layout forces proper install +my_package/ +├── src/ +│ └── my_package/ +│ └── __init__.py +└── tests/ +``` + +**Why this matters**: Src layout catches packaging issues early by forcing editable install. + +### Too Many Dependencies + +```toml +# ❌ WRONG: Kitchen sink approach +dependencies = [ + "requests", + "httpx", # Both requests and httpx? + "urllib3", # Already included with requests + "pandas", + "polars", # Both pandas and polars? + "numpy", # Included with pandas + # ... 50 more +] +``` + +```toml +# ✅ CORRECT: Minimal direct dependencies +dependencies = [ + "requests>=2.31.0", # Only what YOU directly use + "pydantic>=2.0.0", +] + +# Transitive deps (requests → urllib3) handled automatically +``` + +**Why this matters**: More dependencies = more conflict risk, slower installs, larger attack surface. + +### Ignoring Lock Files + +```bash +# ❌ WRONG: Install from requirements.in +pip install -r requirements.in +``` + +**Problem**: Gets different versions each time, breaks reproducibility. + +```bash +# ✅ CORRECT: Install from locked requirements +pip install -r requirements.txt +``` + +**Why this matters**: Locked dependencies ensure reproducible builds and deployments. + +### Pre-commit Hooks Too Slow + +```yaml +# ❌ WRONG: Run full test suite on every commit +repos: + - repo: local + hooks: + - id: pytest + name: pytest + entry: pytest tests/ + language: system + pass_filenames: false +``` + +**Problem**: 5-minute test suite blocks every commit. Developers will skip hooks. + +```yaml +# ✅ CORRECT: Fast checks only +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + - id: ruff-format +``` + +**Why this matters**: Pre-commit must be fast (<5s total). Run tests in CI, not pre-commit. + + +## Decision Trees + +### Choosing Project Layout + +``` +├─ Distributing as package? +│ ├─ Yes → src layout +│ └─ No +│ ├─ Complex project? → src layout (future-proof) +│ └─ Simple script? → flat layout +``` + +### Choosing Dependency Manager + +``` +├─ Publishing to PyPI? +│ ├─ Yes → Poetry (integrated workflow) +│ └─ No +│ ├─ Need simple workflow? → pip-tools +│ ├─ Complex constraints? → Poetry +│ └─ Existing requirements.txt? → pip-tools +``` + +### Choosing Build Backend + +``` +├─ Using Poetry? → poetry-core +├─ Need setuptools features? → setuptools +└─ Simple project? → hatchling +``` + +### Line Length Configuration + +``` +├─ Team preference for 88? → 88 +├─ Complex type hints? → 120-140 +├─ Modern screens? → 120-140 +└─ No strong opinion? → 120 +``` + + +## Common Workflows + +### New Project from Scratch + +```bash +# 1. Create structure +mkdir my_project +cd my_project +git init + +# 2. Create directory structure +mkdir -p src/my_project tests + +# 3. Create pyproject.toml (see example above) +# 4. Create .pre-commit-config.yaml (see example above) +# 5. Create .gitignore (see example above) + +# 6. Initialize package +cat > src/my_project/__init__.py << 'EOF' +"""My awesome project.""" +__version__ = "0.1.0" +EOF + +# 7. Create py.typed marker for type checking +touch src/my_project/py.typed + +# 8. Install in editable mode +pip install -e ".[dev]" + +# 9. Install pre-commit hooks +pre-commit install + +# 10. First commit +git add . +git commit -m "feat: Initial project structure" +``` + +### Adding Ruff to Existing Project + +```bash +# 1. Install ruff +pip install ruff + +# 2. Add to pyproject.toml +cat >> pyproject.toml << 'EOF' +[tool.ruff] +target-version = "py312" +line-length = 140 + +[tool.ruff.lint] +select = ["E", "W", "F", "I", "N", "UP", "B", "C4", "SIM", "RUF"] +ignore = ["E501"] +EOF + +# 3. Check what would change +ruff check --diff . + +# 4. Apply fixes +ruff check --fix . + +# 5. Format code +ruff format . + +# 6. Add to pre-commit +cat >> .pre-commit-config.yaml << 'EOF' +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format +EOF + +pre-commit install +``` + +### Migrating from Black/Flake8 to Ruff + +```bash +# 1. Install ruff +pip install ruff + +# 2. Remove old tools +pip uninstall black flake8 isort pyupgrade + +# 3. Convert black config to ruff +# Old .flake8: +# [flake8] +# max-line-length = 88 +# ignore = E203, W503 + +# New pyproject.toml: +[tool.ruff] +line-length = 88 + +[tool.ruff.lint] +ignore = ["E203", "W503"] + +# 4. Remove old config files +rm .flake8 .isort.cfg + +# 5. Update pre-commit +# Replace black, isort, flake8 hooks with ruff + +# 6. Reformat everything +ruff format . +``` + + +## Integration with Other Skills + +**Before using this skill:** +- No prerequisites (start here for new projects) + +**After using this skill:** +- Fix lint warnings → See `systematic-delinting` +- Add type hints → See `modern-syntax-and-types` +- Setup testing → See `testing-and-quality` +- Add CI/CD → (Future skill) + +**Cross-references:** +- Type checking setup → `modern-syntax-and-types` for type hint patterns +- Delinting process → `systematic-delinting` for fixing warnings +- Testing setup → `testing-and-quality` for pytest configuration + + +## Quick Reference + +### Essential Commands + +```bash +# Project setup +pip install -e ".[dev]" # Editable install with dev deps +pre-commit install # Install git hooks + +# Daily workflow +ruff check . # Lint +ruff check --fix . # Lint and auto-fix +ruff format . # Format +mypy src/ # Type check +pytest # Run tests + +# Pre-commit +pre-commit run --all-files # Run all hooks manually +pre-commit autoupdate # Update hook versions + +# Dependency management (pip-tools) +pip-compile requirements.in # Lock dependencies +pip-compile requirements-dev.in # Lock dev dependencies +pip-sync requirements.txt requirements-dev.txt # Sync environment + +# Building and publishing +python -m build # Build package +twine upload dist/* # Upload to PyPI +``` + +### Configuration Checklist + +**Minimum viable pyproject.toml:** +- [x] `[build-system]` - hatchling or setuptools +- [x] `[project]` - name, version, dependencies +- [x] `[tool.ruff]` - target-version, line-length +- [x] `[tool.mypy]` - python_version, strict + +**Production-ready additions:** +- [x] `[project.optional-dependencies]` - dev dependencies +- [x] `[project.scripts]` - console scripts +- [x] `[tool.ruff.lint]` - rule selection +- [x] `[tool.pytest.ini_options]` - test configuration +- [x] `.pre-commit-config.yaml` - automated checks +- [x] `.gitignore` - ignore build artifacts +- [x] `src/package/py.typed` - typed package marker + +### Ruff Rule Sets Quick Reference + +| Code | Name | Purpose | +|------|------|---------| +| E/W | pycodestyle | PEP 8 style | +| F | Pyflakes | Logical errors | +| I | isort | Import ordering | +| N | pep8-naming | Naming conventions | +| UP | pyupgrade | Modern syntax | +| B | flake8-bugbear | Bug detection | +| C4 | flake8-comprehensions | Better comprehensions | +| SIM | flake8-simplify | Code simplification | +| RUF | Ruff | Ruff-specific | + +**Enable progressively:** +1. Start: `["E", "W", "F"]` - Core errors +2. Add: `["I", "N", "UP"]` - Style and modernization +3. Add: `["B", "C4", "SIM"]` - Quality improvements +4. Add: `["RUF"]` - Ruff-specific checks + + +## Why This Matters: Real-World Impact + +**Good tooling setup prevents:** +- ❌ "Works on my machine" - Locked dependencies ensure consistency +- ❌ Import errors in production - Src layout catches packaging issues +- ❌ Style arguments in PRs - Automated formatting ends debates +- ❌ Type errors in production - mypy catches before deploy +- ❌ Breaking dependencies - Lock files ensure reproducibility +- ❌ Manual quality checks - Pre-commit automates enforcement + +**Good tooling setup enables:** +- ✅ Fast onboarding - `pip install -e ".[dev]"` gets developers running +- ✅ Consistent code style - Ruff format ensures uniformity +- ✅ Early bug detection - Type checking and linting catch issues +- ✅ Confident refactoring - Types and tests enable safe changes +- ✅ Automated publishing - CI/CD handles releases +- ✅ Professional polish - Well-configured projects attract contributors + +**Time investment:** +- Initial setup: 1-2 hours +- Saved per month: 10+ hours (no style debates, fewer bugs, faster onboarding) +- ROI: Positive after first month, compounds over project lifetime diff --git a/skills/using-python-engineering/resolving-mypy-errors.md b/skills/using-python-engineering/resolving-mypy-errors.md new file mode 100644 index 0000000..744f07e --- /dev/null +++ b/skills/using-python-engineering/resolving-mypy-errors.md @@ -0,0 +1,1120 @@ + +# Resolving Mypy Errors + +## Overview + +**Core Principle:** Type errors are discovered through static analysis but must be resolved systematically. Don't play whack-a-mole with type errors. Understand the root cause, fix categories of errors together, and build type safety incrementally. + +Mypy errors indicate mismatches between your code's runtime behavior and its static type annotations. Each error is a potential runtime bug caught at development time. Resolving mypy errors is not about silencing the checker—it's about making implicit contracts explicit and catching bugs before they reach production. + +This skill covers the PROCESS of resolving mypy errors. For type hint SYNTAX and patterns, see `modern-syntax-and-types`. For initial mypy SETUP, see `project-structure-and-tooling`. + +## When to Use + +**Use this skill when:** +- Facing mypy errors after running `mypy .` +- "mypy found 150 errors" and need systematic approach +- Don't understand what mypy error means +- Deciding between fixing vs `# type: ignore` +- Adding types to legacy untyped code +- Type errors after refactoring +- Configuring mypy strictness levels + +**Don't use when:** +- Learning type hint syntax (use `modern-syntax-and-types`) +- Initial project setup (use `project-structure-and-tooling`) +- Runtime type checking needed (use pydantic or similar) + +**Symptoms triggering this skill:** +- "error: Incompatible types in assignment" +- "error: Argument has incompatible type" +- "error: Function is missing a return type annotation" +- "How to fix 100+ mypy errors?" +- "When should I use type: ignore?" +- "Add types to legacy code" + + +## Understanding Mypy Error Messages + +### Error Message Anatomy + +```python +# Example code +def greet(name: str) -> str: + return f"Hello, {name.upper()}" + +result: int = greet("Alice") # Type error! +``` + +**Mypy output:** +``` +example.py:4: error: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment] +``` + +**Anatomy breakdown:** +``` +example.py:4: ← File and line number +error: ← Severity (error, note, warning) +Incompatible types... ← Human-readable description +(expression has...) ← Detailed context +[assignment] ← Error code for filtering +``` + +**Use error codes to:** +- Filter specific errors: `mypy --disable-error-code=assignment` +- Ignore specific error types: `# type: ignore[assignment]` +- Research error meaning: Search "mypy assignment error code" + +### Common Error Categories + +**1. Incompatible Types** +```python +# error: Incompatible types in assignment +x: int = "hello" # str assigned to int + +# Fix: Match the types +x: str = "hello" +# OR provide correct type +x: int = 42 +``` + +**2. Missing Type Annotations** +```python +# error: Function is missing a return type annotation +def calculate(x, y): # No types! + return x + y + +# Fix: Add type hints +def calculate(x: int, y: int) -> int: + return x + y +``` + +**3. Argument Type Mismatch** +```python +def process(value: int) -> None: + print(value * 2) + +# error: Argument 1 has incompatible type "str"; expected "int" +process("hello") + +# Fix: Pass correct type +process(42) +# OR change function signature if needed +def process(value: int | str) -> None: + if isinstance(value, int): + print(value * 2) + else: + print(value) +``` + +**4. None/Optional Issues** +```python +def get_user(id: int) -> dict | None: + if id > 0: + return {"name": "Alice"} + return None + +# error: Item "None" has no attribute "get" +user = get_user(1) +name = user.get("name") # user might be None! + +# Fix: Check for None +user = get_user(1) +if user is not None: # Type narrowing + name = user.get("name") # OK: user is dict here +``` + +**5. List/Dict Invariance** +```python +def process_numbers(nums: list[float]) -> None: + nums.append(3.14) + +# error: Argument 1 has incompatible type "list[int]"; expected "list[float]" +int_list: list[int] = [1, 2, 3] +process_numbers(int_list) # Would break int_list! + +# Fix: Use Sequence for read-only +from collections.abc import Sequence + +def process_numbers(nums: Sequence[float]) -> None: + # Can't modify, so safe + total = sum(nums) +``` + + +## Systematic Resolution Process + +### Phase 1: Assess the Scope + +**Run mypy with summary:** +```bash +mypy . --show-error-codes --show-error-context + +# Count errors by type +mypy . --show-error-codes 2>&1 | grep -o '\[.*\]' | sort | uniq -c | sort -rn +``` + +**Output example:** +``` + 45 [assignment] + 32 [arg-type] + 28 [return-value] + 15 [union-attr] + 12 [var-annotated] + 8 [no-untyped-def] +``` + +**Prioritize by:** +1. High-impact errors (no-untyped-def, return-value) +2. High-frequency errors (most common first) +3. Related errors (fix patterns together) + +### Phase 2: Fix by Category + +**Strategy: Fix one error TYPE at a time, not one file at a time.** + +**Category 1: Missing Annotations (no-untyped-def)** + +Easiest to fix, highest impact. These are functions without type hints. + +```python +# ❌ WRONG: No types +def calculate_total(items, tax_rate): + return sum(item.price for item in items) * (1 + tax_rate) + +# ✅ CORRECT: Add types +def calculate_total(items: list[Item], tax_rate: float) -> float: + return sum(item.price for item in items) * (1 + tax_rate) +``` + +**Workflow:** +```bash +# Find all no-untyped-def errors +mypy . 2>&1 | grep '\[no-untyped-def\]' > untyped.txt + +# Fix them systematically +# Use IDE to jump to each location +``` + +**Category 2: Return Type Issues (return-value)** + +Function returns wrong type or inconsistent types. + +```python +# ❌ WRONG: Inconsistent returns +def get_user(id: int) -> dict: # Says always dict + if id < 0: + return None # But sometimes None! + return {"id": id} + +# ✅ CORRECT: Accurate return type +def get_user(id: int) -> dict | None: + if id < 0: + return None + return {"id": id} +``` + +**Category 3: Argument Type Mismatches (arg-type)** + +Called function with wrong argument types. + +```python +# ❌ WRONG: Passing wrong type +def double(x: int) -> int: + return x * 2 + +result = double("5") # String, not int! + +# ✅ FIX 1: Pass correct type +result = double(5) + +# ✅ FIX 2: Convert at call site +result = double(int("5")) + +# ✅ FIX 3: Change function to accept both +def double(x: int | str) -> int: + if isinstance(x, str): + x = int(x) + return x * 2 +``` + +**Category 4: Union/Optional Handling (union-attr)** + +Accessing attributes on union types without narrowing. + +```python +# ❌ WRONG: No type narrowing +def process(value: int | str) -> str: + return value.upper() # Error: int has no upper() + +# ✅ CORRECT: Type narrowing with isinstance +def process(value: int | str) -> str: + if isinstance(value, str): + return value.upper() + return str(value) + +# ✅ CORRECT: Type narrowing with match (Python 3.10+) +def process(value: int | str) -> str: + match value: + case str(): + return value.upper() + case int(): + return str(value) +``` + +### Phase 3: Handle Edge Cases + +After fixing categories, tackle one-off errors: + +**One-off errors strategy:** +1. Read error carefully +2. Understand root cause +3. Fix properly (not with type: ignore) +4. Test the fix + +**Example: Generic type inference failure** +```python +# error: Need type annotation for "items" (hint: "items: list[] = ...") +items = [] # Mypy can't infer type +items.append(1) + +# Fix: Annotate empty containers +items: list[int] = [] +items.append(1) +``` + +### Phase 4: Verify and Test + +```bash +# Run mypy again +mypy . + +# Run tests to ensure types match runtime +pytest + +# Check specific file +mypy path/to/file.py +``` + +**If tests pass but mypy fails: Types are inaccurate.** +**If mypy passes but tests fail: Logic bug (types were correct).** + + +## When to Use `# type: ignore` + +### Decision Tree + +``` +Is this error in your code? +├─ Yes +│ ├─ Can you fix it properly? → Fix it (don't ignore) +│ ├─ Is it a false positive? → Consider refactoring or use type: ignore +│ └─ Is it temporary WIP? → Use type: ignore with TODO +└─ No (external library) + ├─ Library has no types? → Use type: ignore[import] OR create stub + └─ Library types are wrong? → Create stub file +``` + +### Legitimate Uses + +**1. Untyped Third-Party Libraries** + +```python +# ✅ OK: Library has no type stubs +from untyped_lib import magic_function # type: ignore[import] + +# Better: Create stub file (see Stub Files section) +``` + +**2. Known False Positives** + +```python +# ✅ OK: Mypy limitation, you verified behavior +# mypy doesn't understand this pattern but it's correct +result = some_complex_generic_operation() # type: ignore[misc] # False positive, verified behavior +``` + +**3. Temporary WIP** + +```python +# ✅ OK: Will fix, tracking with TODO +def legacy_function(data): # type: ignore[no-untyped-def] # TODO(#123): Add types during refactor + return data.process() +``` + +### Type: Ignore Best Practices + +```python +# ❌ WRONG: Blanket ignore +def sketchy(): # type: ignore + return "something" + +# ❌ WRONG: No explanation +result = some_call() # type: ignore + +# ✅ CORRECT: Specific error code +result = some_call() # type: ignore[arg-type] + +# ✅ CORRECT: Specific error + explanation +result = some_call() # type: ignore[arg-type] # Mypy bug #12345 + +# ✅ CORRECT: Specific error + TODO +result = some_call() # type: ignore[arg-type] # TODO(#789): Fix after library update + +# ✅ CORRECT: Line-specific ignore +x: int = "hello" # type: ignore[assignment] # Test expects str, runtime converts +``` + +**Always use specific error codes:** +```python +# Instead of: # type: ignore +# Use: # type: ignore[assignment] +# Use: # type: ignore[arg-type] +# Use: # type: ignore[return-value] +``` + + +## Typing Legacy Code + +### Incremental Strategy + +**Don't type everything at once. Use phased approach:** + +**Phase 1: Core Types (Public API)** +```python +# Start with public interfaces +class UserManager: + def get_user(self, user_id: int) -> User | None: # Type this + return self._fetch_user(user_id) # Can leave internal untyped for now + + def _fetch_user(self, user_id): # Internal, type later + pass +``` + +**Phase 2: Gradually Enable Strictness** + +**File: `pyproject.toml`** +```toml +[tool.mypy] +python_version = "3.12" + +# Start lenient +warn_return_any = true +warn_unused_configs = true + +# Module overrides - strict for new code +[[tool.mypy.overrides]] +module = "myapp.new_feature" +strict = true + +[[tool.mypy.overrides]] +module = "myapp.legacy" +# No extra strictness yet +disallow_untyped_defs = false +``` + +**Phase 3: Module by Module** + +```bash +# Check coverage +mypy --html-report mypy_report . + +# Focus on one module at a time +mypy myapp/users.py --strict + +# When clean, add to strict modules in pyproject.toml +``` + +### Typing Strategies for Legacy Code + +**Strategy 1: Type from the Bottom Up** + +Start with leaf functions (no dependencies), work up to complex functions. + +```python +# Step 1: Type simple helpers +def format_name(first: str, last: str) -> str: + return f"{last}, {first}" + +# Step 2: Type functions using helpers +def create_user_display(user_data: dict) -> str: + # user_data still untyped, but progress made + return format_name(user_data["first"], user_data["last"]) + +# Step 3: Type the data structures +class User(TypedDict): + first: str + last: str + email: str + +def create_user_display(user_data: User) -> str: + return format_name(user_data["first"], user_data["last"]) +``` + +**Strategy 2: Use Any as Temporary Bridge** + +```python +from typing import Any + +# ❌ WRONG: Leave completely untyped +def process(data): + return data.transform() + +# ✅ INTERMEDIATE: Use Any temporarily +def process(data: Any) -> Any: # TODO: Type this properly + return data.transform() + +# ✅ CORRECT: Proper types +def process(data: Transformable) -> TransformResult: + return data.transform() +``` + +**Use Any to:** +- Mark functions you're aware are untyped +- Track progress (search for "Any" to find what needs typing) +- Enable mypy checking on rest of codebase + +**Don't use Any to:** +- Avoid thinking about types +- Permanently work around typing issues + +**Strategy 3: Stub Out Complex Types First** + +```python +# Create minimal types for complex legacy objects +class LegacyRequest(TypedDict, total=False): + """Minimal type for legacy request object. + + Only includes fields we actually use. + Marked total=False because legacy code is inconsistent. + """ + user_id: int + action: str + data: dict + +def handle_request(req: LegacyRequest) -> None: + # Now type-checked for fields we care about + user_id = req.get("user_id", 0) + action = req.get("action", "") +``` + + +## Stub Files + +### What Are Stubs? + +Stub files (`.pyi`) contain type information without implementation. Used for: +1. Adding types to untyped third-party libraries +2. Separating interface from implementation +3. Type checking compiled extensions + +### Creating Stub Files + +**Example: Untyped library `magic_lib`** + +```python +# Your code: magic_lib has no types +import magic_lib + +result = magic_lib.do_magic("hello", 42) +# mypy error: Library 'magic_lib' has no type hints +``` + +**Solution: Create stub file** + +**File structure:** +``` +myproject/ +├── stubs/ +│ └── magic_lib.pyi ← Stub file +├── pyproject.toml +└── src/ +``` + +**File: `stubs/magic_lib.pyi`** +```python +"""Type stubs for magic_lib.""" + +def do_magic(text: str, count: int) -> list[str]: ... + +class MagicClass: + def __init__(self, value: int) -> None: ... + def transform(self, input: str) -> str: ... +``` + +**Configure mypy to find stubs:** + +**File: `pyproject.toml`** +```toml +[tool.mypy] +mypy_path = "stubs" +``` + +**Now mypy uses your stub types:** +```python +import magic_lib + +result = magic_lib.do_magic("hello", 42) # OK: mypy knows the signature +bad = magic_lib.do_magic(42, "hello") # Error: Arguments swapped! +``` + +### Stub File Best Practices + +```python +# ✅ CORRECT: Minimal stubs - only type what you use +def function_you_use(x: int) -> str: ... +# Don't stub every function in the library + +# ✅ CORRECT: Use ellipsis (...) for body +def some_function(x: int) -> None: ... + +# ✅ CORRECT: Stub classes you interact with +class ImportantClass: + attribute: str + def method(self, x: int) -> bool: ... + +# ✅ CORRECT: Use Any for complex types you don't understand yet +from typing import Any + +def complex_function(x: Any) -> Any: ... +# Better than no stub at all +``` + +### Contributing Stubs Upstream + +Many libraries accept type stubs: + +```bash +# 1. Create complete stub +# 2. Test it +mypy . --strict + +# 3. Contribute to typeshed or library +# Search: "python typeshed contributing" +``` + + +## Advanced Type Checking + +### Narrowing Types + +**Type Guards (isinstance)** + +```python +def process(value: int | str | None) -> str: + # Mypy tracks type narrowing + if value is None: + return "empty" + # value is now int | str + + if isinstance(value, str): + return value.upper() + # value is now int + + return str(value) +``` + +**Custom Type Guards (Python 3.10+)** + +```python +from typing import TypeGuard # Python 3.10+ + +def is_string_list(val: list[object]) -> TypeGuard[list[str]]: + """Type guard to check if all elements are strings.""" + return all(isinstance(x, str) for x in val) + +def process_items(items: list[object]) -> None: + if is_string_list(items): + # items is now list[str] here + result = [item.upper() for item in items] +``` + +### Literal Types + +```python +from typing import Literal + +# Only allow specific values +def set_mode(mode: Literal["read", "write", "append"]) -> None: + pass + +set_mode("read") # OK +set_mode("delete") # mypy error: Argument must be "read", "write", or "append" +``` + +### Overloads + +```python +from typing import overload + +# Define multiple signatures +@overload +def process(value: int) -> int: ... + +@overload +def process(value: str) -> str: ... + +# Implementation +def process(value: int | str) -> int | str: + if isinstance(value, int): + return value * 2 + return value.upper() + +# Mypy knows exact return type +result1: int = process(42) # OK: returns int +result2: str = process("hello") # OK: returns str +``` + +### reveal_type for Debugging + +```python +from typing import reveal_type + +def process(value: int | str) -> None: + if isinstance(value, str): + reveal_type(value) # mypy will print: "Revealed type is 'builtins.str'" + else: + reveal_type(value) # mypy will print: "Revealed type is 'builtins.int'" +``` + +**Use reveal_type to:** +- Debug what mypy thinks a type is +- Verify type narrowing works +- Understand complex type inference + +**Remove reveal_type before committing** - it's for debugging only. + + +## Anti-Patterns + +### Using Any Everywhere + +```python +# ❌ WRONG: Any defeats type checking +def process_user(user: Any) -> Any: + return user.transform() + +# ✅ CORRECT: Specific types +from typing import Protocol + +class Transformable(Protocol): + def transform(self) -> dict: ... + +def process_user(user: Transformable) -> dict: + return user.transform() +``` + +### Type: Ignore Without Error Code + +```python +# ❌ WRONG: Silences all errors +result = some_call() # type: ignore + +# ✅ CORRECT: Specific error code +result = some_call() # type: ignore[arg-type] +``` + +### Casting Instead of Fixing + +```python +# ❌ WRONG: Cast to hide the problem +from typing import cast + +def get_value() -> int | None: + return None + +value = cast(int, get_value()) # Lies to mypy! +result = value + 1 # Runtime error! + +# ✅ CORRECT: Handle None properly +value = get_value() +if value is not None: + result = value + 1 +``` + +### Over-Specific Types + +```python +# ❌ WRONG: Too specific, inflexible +def process_items(items: list[str]) -> list[str]: + return [item.upper() for item in items] + +# Can't pass tuple, set, etc. + +# ✅ CORRECT: Use Sequence for read-only iteration +from collections.abc import Sequence + +def process_items(items: Sequence[str]) -> list[str]: + return [item.upper() for item in items] +``` + +### Ignoring Invariance + +```python +# ❌ WRONG: Ignoring variance rules +def add_float(numbers: list[float]) -> None: + numbers.append(3.14) + +int_list: list[int] = [1, 2, 3] +add_float(int_list) # mypy error! Would add float to int list + +# ✅ CORRECT: Understand variance +# Use Sequence for read-only (covariant) +from collections.abc import Sequence + +def sum_floats(numbers: Sequence[float]) -> float: + return sum(numbers) # Can't modify, so safe + +sum_floats(int_list) # OK: list[int] is valid Sequence[float] +``` + + +## Decision Trees + +### Should I Fix or Ignore This Error? + +``` +Is error in your code? +├─ Yes +│ ├─ Understand the error? +│ │ ├─ Yes → Fix it properly +│ │ └─ No → Read error carefully, search docs, THEN fix +│ └─ False positive? +│ ├─ Verified false positive → type: ignore with explanation +│ └─ Not sure → Fix it (probably not false positive) +└─ No (third-party library) + ├─ Missing types? → Create stub OR type: ignore[import] + └─ Wrong types? → Create stub with correct types +``` + +### Which Type to Use? + +``` +For function parameters: +├─ Read-only sequence? → Sequence[T] +├─ Need to modify? → list[T] +├─ Read-only mapping? → Mapping[K, V] +├─ Need to modify mapping? → MutableMapping[K, V] or dict[K, V] +└─ Union of types? → T1 | T2 | T3 + +For return types: +├─ Can return None? → ReturnType | None +├─ Multiple possible types? → Type1 | Type2 +├─ Always same type? → Specific type +└─ Complex? → Consider TypedDict or dataclass +``` + +### Fixing Legacy Code Order + +``` +1. Public API first + ├─ Public functions and methods + └─ Return types and parameters + +2. Internal implementation later + ├─ Private methods + └─ Helper functions + +3. Complex types last + ├─ Generic classes + └─ Complex unions + +Enable strictness per module after fixing. +``` + + +## Common Error Patterns and Solutions + +### Pattern 1: Optional Chaining + +**Problem:** +```python +def get_name(user_id: int) -> str: + user = database.get_user(user_id) # Returns User | None + return user.name # Error: "None" has no attribute "name" +``` + +**Solutions:** +```python +# Solution 1: Check for None +def get_name(user_id: int) -> str | None: + user = database.get_user(user_id) + if user is None: + return None + return user.name + +# Solution 2: Provide default +def get_name(user_id: int) -> str: + user = database.get_user(user_id) + if user is None: + return "Unknown" + return user.name + +# Solution 3: Raise exception +def get_name(user_id: int) -> str: + user = database.get_user(user_id) + if user is None: + raise ValueError(f"User {user_id} not found") + return user.name +``` + +### Pattern 2: Dict Access + +**Problem:** +```python +def process(data: dict[str, str]) -> str: + # Error: Dict.get returns str | None, but we assign to str + value: str = data.get("key") + return value.upper() +``` + +**Solutions:** +```python +# Solution 1: Handle None +def process(data: dict[str, str]) -> str: + value = data.get("key") + if value is None: + return "" + return value.upper() + +# Solution 2: Provide default +def process(data: dict[str, str]) -> str: + value = data.get("key", "") # Default to empty string + return value.upper() + +# Solution 3: Use __getitem__ if key must exist +def process(data: dict[str, str]) -> str: + value = data["key"] # Raises KeyError if missing + return value.upper() +``` + +### Pattern 3: List Comprehension Type Inference + +**Problem:** +```python +# Mypy can't infer return type +def get_ids(users): + return [user.id for user in users] +``` + +**Solutions:** +```python +# Solution 1: Annotate parameters +def get_ids(users: list[User]) -> list[int]: + return [user.id for user in users] + +# Solution 2: Annotate return +def get_ids(users: list[User]) -> list[int]: + result: list[int] = [user.id for user in users] + return result +``` + +### Pattern 4: Callback Type Hints + +**Problem:** +```python +# How to type this callback? +def process_async(callback) -> None: + result = do_work() + callback(result) +``` + +**Solution:** +```python +from collections.abc import Callable + +def process_async(callback: Callable[[int], None]) -> None: + result: int = do_work() + callback(result) + +# More complex: callback returns value +def process_with_transform(callback: Callable[[int], str]) -> str: + result: int = do_work() + return callback(result) +``` + + +## Integration with Other Skills + +**Before using this skill:** +- Set up mypy → See `project-structure-and-tooling` for mypy configuration +- Understand type syntax → See `modern-syntax-and-types` for type hint patterns + +**After using this skill:** +- Run systematic delinting → See `systematic-delinting` for fixing lint warnings +- Add tests for typed code → See `testing-and-quality` for pytest with types + +**Cross-references:** +- Type hint syntax → `modern-syntax-and-types` +- Mypy configuration → `project-structure-and-tooling` +- Delinting process → `systematic-delinting` +- Testing typed code → `testing-and-quality` + + +## Quick Reference + +### Mypy Commands + +```bash +# Basic check +mypy . + +# With error codes and context +mypy . --show-error-codes --show-error-context + +# Specific file +mypy path/to/file.py + +# Strict mode +mypy . --strict + +# Generate HTML report +mypy --html-report mypy_report . + +# Count errors by type +mypy . --show-error-codes 2>&1 | grep -o '\[.*\]' | sort | uniq -c | sort -rn + +# Disable specific error code +mypy . --disable-error-code=assignment + +# Check specific error code only +mypy . --enable-error-code=unused-awaitable +``` + +### Mypy Plugins for Frameworks + +Popular frameworks have mypy plugins for better type checking: + +```bash +# SQLAlchemy +pip install sqlalchemy[mypy] + +# Django +pip install django-stubs[compatible-mypy] + +# Pydantic (built-in support) +pip install pydantic +``` + +**Configure in pyproject.toml:** + +```toml +[tool.mypy] +plugins = [ + "sqlalchemy.ext.mypy.plugin", + "mypy_django_plugin.main", +] +``` + +**Why use plugins:** + +- SQLAlchemy plugin understands ORM models and relationships +- Django plugin knows about models, querysets, and settings +- Pydantic provides automatic type inference for models + +### Performance Tips for Large Codebases + +```bash +# Use cache directory (enabled by default) +mypy --cache-dir=.mypy_cache . + +# Run mypy daemon for faster repeated checks +dmypy run -- . + +# Incremental mode (enabled by default) +mypy --incremental . + +# Parallel checking (experimental) +mypy --fast-module-lookup . +``` + +**For CI/CD:** + +```yaml +# Cache .mypy_cache directory between runs +- name: Cache mypy + uses: actions/cache@v3 + with: + path: .mypy_cache + key: mypy-${{ hashFiles('**/*.py') }} +``` + +### Common Error Codes + +| Code | Meaning | Common Cause | +|------|---------|--------------| +| `assignment` | Wrong type in assignment | `x: int = "str"` | +| `arg-type` | Wrong argument type | `func(str_val)` expects int | +| `return-value` | Wrong return type | Return str, declared int | +| `union-attr` | Access attr on union | `x.method()` but x is `int \| str` | +| `no-untyped-def` | Missing annotations | Function has no types | +| `var-annotated` | Variable needs annotation | `x = []` needs type | +| `import` | Import from untyped lib | Library has no stubs | +| `no-any-return` | Returning Any | Function returns Any | + +### Type: Ignore Patterns + +```python +# Specific error code (preferred) +x = func() # type: ignore[arg-type] + +# With explanation +x = func() # type: ignore[arg-type] # TODO: Fix after lib update + +# Multiple error codes +x = func() # type: ignore[arg-type, return-value] + +# Unused ignore warning +# If error is fixed, mypy warns about unused ignore +x = func() # type: ignore[arg-type] # Warns if no longer needed +``` + +### Resolution Checklist + +**For each mypy error:** + +- [ ] Read error message carefully +- [ ] Identify error code +- [ ] Understand what mypy thinks vs what code does +- [ ] Decide: Fix or ignore? +- [ ] If fix: Update code and annotations +- [ ] If ignore: Use specific error code + explanation +- [ ] Verify fix: Run mypy again +- [ ] Verify runtime: Run tests + +**For large batch of errors:** + +- [ ] Run mypy with error codes +- [ ] Count errors by category +- [ ] Prioritize: high-impact → high-frequency +- [ ] Fix one category at a time +- [ ] Verify after each category +- [ ] Track progress (errors remaining) + + +## Why This Matters: Real-World Impact + +**Type checking catches bugs before production:** + +- ❌ None handling: Catch `AttributeError` before deploy +- ❌ Wrong argument types: Catch `TypeError` before runtime +- ❌ Missing return: Catch incomplete refactors +- ❌ Union type issues: Catch invalid state handling + +**Type errors indicate:** + +1. **Actual bugs** - Code will fail at runtime +2. **Incomplete refactors** - Changed signature but not all callers +3. **Unclear contracts** - Function doesn't match its documentation +4. **Design issues** - Complex types → simplify design + +**Time investment:** + +- Initial typing: 20-40% time overhead +- Maintenance: 5-10% time overhead +- Bugs prevented: 15-40% reduction in runtime errors +- Refactoring confidence: 50-80% faster with types + +**ROI: Positive after 3-6 months on medium projects. Essential for large codebases.** + +**Don't silence type checkers. Make types match reality.** diff --git a/skills/using-python-engineering/scientific-computing-foundations.md b/skills/using-python-engineering/scientific-computing-foundations.md new file mode 100644 index 0000000..758e07e --- /dev/null +++ b/skills/using-python-engineering/scientific-computing-foundations.md @@ -0,0 +1,981 @@ + +# Scientific Computing Foundations + +## Overview + +**Core Principle:** Vectorize operations, avoid loops. NumPy and pandas are built on C/Fortran code that's orders of magnitude faster than Python loops. The biggest performance gains come from eliminating iteration over rows/elements. + +Scientific computing in Python centers on NumPy (arrays) and pandas (dataframes). These libraries enable fast numerical computation on large datasets through vectorized operations and efficient memory layouts. The most common mistake: using Python loops when vectorized operations exist. + +## When to Use + +**Use this skill when:** +- "NumPy operations" +- "Pandas DataFrame slow" +- "Vectorization" +- "How to avoid loops?" +- "DataFrame iteration" +- "Array performance" +- "Memory usage too high" +- "Large dataset processing" + +**Don't use when:** +- Setting up project (use project-structure-and-tooling) +- Profiling needed first (use debugging-and-profiling) +- ML pipeline orchestration (use ml-engineering-workflows) + +**Symptoms triggering this skill:** +- Slow DataFrame operations +- High memory usage with arrays +- Using loops over DataFrame rows +- Need to process large datasets efficiently + + +## NumPy Fundamentals + +### Array Creation and Types + +```python +import numpy as np + +# ❌ WRONG: Creating arrays from Python lists in loop +data = [] +for i in range(1000000): + data.append(i * 2) +arr = np.array(data) + +# ✅ CORRECT: Use NumPy functions +arr = np.arange(1000000) * 2 + +# ✅ CORRECT: Pre-allocate for known size +arr = np.empty(1000000, dtype=np.int64) +for i in range(1000000): + arr[i] = i * 2 # Still slow, but better than list + +# ✅ BETTER: Fully vectorized +arr = np.arange(1000000, dtype=np.int64) * 2 + +# ✅ CORRECT: Specify dtype for memory efficiency +# float64 (default): 8 bytes per element +# float32: 4 bytes per element +large_arr = np.zeros(1000000, dtype=np.float32) # Half the memory + +# Why this matters: dtype affects both memory usage and performance +# Use smallest dtype that fits your data +``` + +### Vectorized Operations + +```python +# ❌ WRONG: Loop over array elements +arr = np.arange(1000000) +result = np.empty(1000000) +for i in range(len(arr)): + result[i] = arr[i] ** 2 + 2 * arr[i] + 1 + +# ✅ CORRECT: Vectorized operations +arr = np.arange(1000000) +result = arr ** 2 + 2 * arr + 1 + +# Speed difference: ~100x faster with vectorization + +# ❌ WRONG: Element-wise comparison in loop +matches = [] +for val in arr: + if val > 100: + matches.append(val) +result = np.array(matches) + +# ✅ CORRECT: Boolean indexing +result = arr[arr > 100] + +# ✅ CORRECT: Complex conditions +result = arr[(arr > 100) & (arr < 200)] # Note: & not 'and' +result = arr[(arr < 50) | (arr > 150)] # Note: | not 'or' +``` + +**Why this matters**: Vectorized operations run in C, avoiding Python interpreter overhead. 10-100x speedup typical. + +### Broadcasting + +```python +# Broadcasting: Operating on arrays of different shapes + +# ✅ CORRECT: Scalar broadcasting +arr = np.array([1, 2, 3, 4]) +result = arr + 10 # [11, 12, 13, 14] + +# ✅ CORRECT: 1D array broadcast to 2D +matrix = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + +row_vector = np.array([10, 20, 30]) +result = matrix + row_vector +# [[11, 22, 33], +# [14, 25, 36], +# [17, 28, 39]] + +# ✅ CORRECT: Column vector broadcasting +col_vector = np.array([[10], + [20], + [30]]) +result = matrix + col_vector +# [[11, 12, 13], +# [24, 25, 26], +# [37, 38, 39]] + +# ✅ CORRECT: Add axis for broadcasting +row = np.array([1, 2, 3]) +col = row[:, np.newaxis] # Convert to column vector +# col shape: (3, 1) + +# Outer product via broadcasting +outer = row[np.newaxis, :] * col +# [[1, 2, 3], +# [2, 4, 6], +# [3, 6, 9]] + +# ❌ WRONG: Manual broadcasting with loops +result = np.empty_like(matrix) +for i in range(matrix.shape[0]): + for j in range(matrix.shape[1]): + result[i, j] = matrix[i, j] + row_vector[j] + +# Why this matters: Broadcasting eliminates loops and is much faster +``` + +### Memory-Efficient Operations + +```python +# ❌ WRONG: Creating unnecessary copies +large_arr = np.random.rand(10000, 10000) # ~800MB +result1 = large_arr + 1 # Creates new 800MB array +result2 = result1 * 2 # Creates another 800MB array +# Total: 2.4GB memory usage + +# ✅ CORRECT: In-place operations +large_arr = np.random.rand(10000, 10000) +large_arr += 1 # Modifies in-place, no copy +large_arr *= 2 # Modifies in-place, no copy +# Total: 800MB memory usage + +# ✅ CORRECT: Use 'out' parameter +result = np.empty_like(large_arr) +np.add(large_arr, 1, out=result) +np.multiply(result, 2, out=result) + +# ❌ WRONG: Unnecessary array copies +arr = np.arange(1000000) +subset = arr[::2].copy() # Explicit copy needed? Check first +subset[0] = 999 # Doesn't affect arr + +# ✅ CORRECT: Views avoid copies (when possible) +arr = np.arange(1000000) +view = arr[::2] # View, not copy (shares memory) +view[0] = 999 # Modifies arr too! + +# Check if view or copy +print(arr.base is None) # False = view, True = owns memory +``` + +**Why this matters**: Large arrays consume lots of memory. In-place operations and views avoid copies, reducing memory usage significantly. + +### Aggregations and Reductions + +```python +# ✅ CORRECT: Axis-aware aggregations +matrix = np.array([[1, 2, 3], + [4, 5, 6], + [7, 8, 9]]) + +# Sum all elements +total = matrix.sum() # 45 + +# Sum along axis 0 (columns) +col_sums = matrix.sum(axis=0) # [12, 15, 18] + +# Sum along axis 1 (rows) +row_sums = matrix.sum(axis=1) # [6, 15, 24] + +# ❌ WRONG: Manual aggregation +total = 0 +for row in matrix: + for val in row: + total += val + +# ✅ CORRECT: Multiple aggregations +matrix.mean() +matrix.std() +matrix.min() +matrix.max() +matrix.argmin() # Index of minimum +matrix.argmax() # Index of maximum + +# ✅ CORRECT: Conditional aggregations +# Sum only positive values +positive_sum = matrix[matrix > 0].sum() + +# Count elements > 5 +count = (matrix > 5).sum() + +# Percentage > 5 +percentage = (matrix > 5).mean() * 100 +``` + + +## pandas Fundamentals + +### DataFrame Creation + +```python +import pandas as pd + +# ❌ WRONG: Building DataFrame row by row +df = pd.DataFrame() +for i in range(10000): + df = pd.concat([df, pd.DataFrame({'a': [i], 'b': [i*2]})], ignore_index=True) +# Extremely slow: O(n²) complexity + +# ✅ CORRECT: Create from dict of lists +data = { + 'a': list(range(10000)), + 'b': [i * 2 for i in range(10000)] +} +df = pd.DataFrame(data) + +# ✅ BETTER: Use NumPy arrays +df = pd.DataFrame({ + 'a': np.arange(10000), + 'b': np.arange(10000) * 2 +}) + +# ✅ CORRECT: From records +records = [{'a': i, 'b': i*2} for i in range(10000)] +df = pd.DataFrame.from_records(records) +``` + +### The Iteration Anti-Pattern + +```python +# ❌ WRONG: iterrows() - THE MOST COMMON MISTAKE +df = pd.DataFrame({ + 'value': np.random.rand(100000), + 'category': np.random.choice(['A', 'B', 'C'], 100000) +}) + +result = [] +for idx, row in df.iterrows(): # VERY SLOW + if row['value'] > 0.5: + result.append(row['value'] * 2) + +# ✅ CORRECT: Vectorized operations +mask = df['value'] > 0.5 +result = df.loc[mask, 'value'] * 2 + +# Speed difference: ~100x faster + +# ❌ WRONG: apply() on axis=1 (still row-by-row) +df['result'] = df.apply( + lambda row: row['value'] * 2 if row['value'] > 0.5 else 0, + axis=1 +) +# Still slow: applies Python function to each row + +# ✅ CORRECT: Vectorized with np.where +df['result'] = np.where(df['value'] > 0.5, df['value'] * 2, 0) + +# ✅ CORRECT: Boolean indexing + assignment +df['result'] = 0 +df.loc[df['value'] > 0.5, 'result'] = df['value'] * 2 +``` + +**Why this matters**: `iterrows()` is the single biggest DataFrame performance killer. ALWAYS look for vectorized alternatives. + +### Efficient Filtering and Selection + +```python +df = pd.DataFrame({ + 'A': np.random.rand(100000), + 'B': np.random.rand(100000), + 'C': np.random.choice(['X', 'Y', 'Z'], 100000) +}) + +# ❌ WRONG: Chaining filters inefficiently +df_filtered = df[df['A'] > 0.5] +df_filtered = df_filtered[df_filtered['B'] < 0.3] +df_filtered = df_filtered[df_filtered['C'] == 'X'] + +# ✅ CORRECT: Single boolean mask +mask = (df['A'] > 0.5) & (df['B'] < 0.3) & (df['C'] == 'X') +df_filtered = df[mask] + +# ✅ CORRECT: query() for complex filters (cleaner syntax) +df_filtered = df.query('A > 0.5 and B < 0.3 and C == "X"') + +# ✅ CORRECT: isin() for multiple values +df_filtered = df[df['C'].isin(['X', 'Y'])] + +# ❌ WRONG: String matching in loop +matches = [] +for val in df['C']: + if 'X' in val: + matches.append(True) + else: + matches.append(False) +df_filtered = df[matches] + +# ✅ CORRECT: Vectorized string operations +df_filtered = df[df['C'].str.contains('X')] +``` + +### GroupBy Operations + +```python +df = pd.DataFrame({ + 'category': np.random.choice(['A', 'B', 'C'], 100000), + 'value': np.random.rand(100000), + 'count': np.random.randint(1, 100, 100000) +}) + +# ❌ WRONG: Manual grouping +groups = {} +for idx, row in df.iterrows(): + cat = row['category'] + if cat not in groups: + groups[cat] = [] + groups[cat].append(row['value']) + +results = {cat: sum(vals) / len(vals) for cat, vals in groups.items()} + +# ✅ CORRECT: GroupBy +results = df.groupby('category')['value'].mean() + +# ✅ CORRECT: Multiple aggregations +results = df.groupby('category').agg({ + 'value': ['mean', 'std', 'min', 'max'], + 'count': 'sum' +}) + +# ✅ CORRECT: Named aggregations (pandas 0.25+) +results = df.groupby('category').agg( + mean_value=('value', 'mean'), + std_value=('value', 'std'), + total_count=('count', 'sum') +) + +# ✅ CORRECT: Custom aggregation function +def range_func(x): + return x.max() - x.min() + +results = df.groupby('category')['value'].agg(range_func) + +# ✅ CORRECT: Transform (keeps original shape) +df['value_centered'] = df.groupby('category')['value'].transform( + lambda x: x - x.mean() +) +``` + +**Why this matters**: GroupBy is highly optimized. Much faster than manual grouping. Use built-in aggregations when possible. + + +## Performance Anti-Patterns + +### Anti-Pattern 1: DataFrame Iteration + +```python +# ❌ WRONG: Iterating over rows +for idx, row in df.iterrows(): + df.at[idx, 'new_col'] = row['a'] + row['b'] + +# ✅ CORRECT: Vectorized column operation +df['new_col'] = df['a'] + df['b'] + +# ❌ WRONG: Itertuples (better than iterrows, but still slow) +for row in df.itertuples(): + # Process row... + +# ✅ CORRECT: Use vectorized operations or apply to columns +``` + +### Anti-Pattern 2: Repeated Concatenation + +```python +# ❌ WRONG: Growing DataFrame in loop +df = pd.DataFrame() +for i in range(10000): + df = pd.concat([df, new_row_df], ignore_index=True) +# O(n²) complexity, extremely slow + +# ✅ CORRECT: Collect data, then create DataFrame +data = [] +for i in range(10000): + data.append({'a': i, 'b': i*2}) +df = pd.DataFrame(data) + +# ✅ CORRECT: Pre-allocate NumPy array +arr = np.empty((10000, 2)) +for i in range(10000): + arr[i] = [i, i*2] +df = pd.DataFrame(arr, columns=['a', 'b']) +``` + +### Anti-Pattern 3: Using apply When Vectorized Exists + +```python +# ❌ WRONG: apply() for simple operations +df['result'] = df['value'].apply(lambda x: x * 2) + +# ✅ CORRECT: Direct vectorized operation +df['result'] = df['value'] * 2 + +# ❌ WRONG: apply() for conditions +df['category'] = df['value'].apply(lambda x: 'high' if x > 0.5 else 'low') + +# ✅ CORRECT: np.where or pd.cut +df['category'] = np.where(df['value'] > 0.5, 'high', 'low') + +# ✅ CORRECT: pd.cut for binning +df['category'] = pd.cut(df['value'], bins=[0, 0.5, 1.0], labels=['low', 'high']) + +# When apply IS appropriate: +# - Complex logic not vectorizable +# - Need to call external function per row +# But verify vectorization truly impossible first +``` + +### Anti-Pattern 4: Not Using Categorical Data + +```python +# ❌ WRONG: String columns for repeated values +df = pd.DataFrame({ + 'category': ['A'] * 10000 + ['B'] * 10000 + ['C'] * 10000 +}) +# Memory: ~240KB (each string stored separately) + +# ✅ CORRECT: Categorical type +df['category'] = pd.Categorical(df['category']) +# Memory: ~30KB (integers + small string table) + +# ✅ CORRECT: Define categories at creation +df = pd.DataFrame({ + 'category': pd.Categorical( + ['A'] * 10000 + ['B'] * 10000, + categories=['A', 'B', 'C'] + ) +}) + +# When to use categorical: +# - Limited number of unique values (< 50% of rows) +# - Repeated string/object values +# - Memory constraints +# - Faster groupby operations +``` + + +## Memory Optimization + +### Choosing Appropriate dtypes + +```python +# ❌ WRONG: Default dtypes waste memory +df = pd.DataFrame({ + 'int_col': [1, 2, 3, 4, 5], # int64 by default + 'float_col': [1.0, 2.0, 3.0], # float64 by default + 'str_col': ['a', 'b', 'c', 'd', 'e'] # object dtype +}) + +print(df.memory_usage(deep=True)) + +# ✅ CORRECT: Optimize dtypes +df = pd.DataFrame({ + 'int_col': pd.array([1, 2, 3, 4, 5], dtype='int8'), # -128 to 127 + 'float_col': pd.array([1.0, 2.0, 3.0], dtype='float32'), + 'str_col': pd.Categorical(['a', 'b', 'c', 'd', 'e']) +}) + +# ✅ CORRECT: Downcast after loading +df = pd.read_csv('data.csv') +df['int_col'] = pd.to_numeric(df['int_col'], downcast='integer') +df['float_col'] = pd.to_numeric(df['float_col'], downcast='float') + +# Integer dtype ranges: +# int8: -128 to 127 +# int16: -32,768 to 32,767 +# int32: -2.1B to 2.1B +# int64: -9.2E18 to 9.2E18 + +# Float dtype precision: +# float16: ~3 decimal digits (rarely used) +# float32: ~7 decimal digits +# float64: ~15 decimal digits +``` + +### Chunked Processing for Large Files + +```python +# ❌ WRONG: Loading entire file into memory +df = pd.read_csv('huge_file.csv') # 10GB file, OOM! +df_processed = process_dataframe(df) + +# ✅ CORRECT: Process in chunks +chunk_size = 100000 +results = [] + +for chunk in pd.read_csv('huge_file.csv', chunksize=chunk_size): + processed = process_dataframe(chunk) + results.append(processed) + +df_final = pd.concat(results, ignore_index=True) + +# ✅ CORRECT: Streaming aggregation +totals = {'A': 0, 'B': 0, 'C': 0} + +for chunk in pd.read_csv('huge_file.csv', chunksize=chunk_size): + for col in totals: + totals[col] += chunk[col].sum() + +# ✅ CORRECT: Only load needed columns +df = pd.read_csv('huge_file.csv', usecols=['col1', 'col2', 'col3']) +``` + +### Using Sparse Data Structures + +```python +# ❌ WRONG: Dense array for sparse data +# Data with 99% zeros +dense = np.zeros(1000000) +dense[::100] = 1 # Only 1% non-zero +# Memory: 8MB (float64 * 1M) + +# ✅ CORRECT: Sparse array +from scipy.sparse import csr_matrix +sparse = csr_matrix(dense) +# Memory: ~80KB (only stores non-zero values + indices) + +# ✅ CORRECT: Sparse DataFrame +df = pd.DataFrame({ + 'A': pd.arrays.SparseArray([0] * 100 + [1] + [0] * 100), + 'B': pd.arrays.SparseArray([0] * 50 + [2] + [0] * 150) +}) +``` + + +## Data Pipeline Patterns + +### Method Chaining + +```python +# ❌ WRONG: Many intermediate variables +df = pd.read_csv('data.csv') +df = df[df['value'] > 0] +df = df.groupby('category')['value'].mean() +df = df.reset_index() +df = df.rename(columns={'value': 'mean_value'}) + +# ✅ CORRECT: Method chaining +df = ( + pd.read_csv('data.csv') + .query('value > 0') + .groupby('category')['value'] + .mean() + .reset_index() + .rename(columns={'value': 'mean_value'}) +) + +# ✅ CORRECT: Pipe for custom functions +def remove_outliers(df, column, n_std=3): + mean = df[column].mean() + std = df[column].std() + return df[ + (df[column] > mean - n_std * std) & + (df[column] < mean + n_std * std) + ] + +df = ( + pd.read_csv('data.csv') + .pipe(remove_outliers, 'value', n_std=2) + .groupby('category')['value'] + .mean() +) +``` + +### Efficient Merges and Joins + +```python +# ❌ WRONG: Multiple small merges +for small_df in list_of_dfs: + main_df = main_df.merge(small_df, on='key') +# Inefficient: creates many intermediate copies + +# ✅ CORRECT: Merge all at once +df_merged = pd.concat(list_of_dfs, ignore_index=True) + +# ✅ CORRECT: Optimize merge with sorted/indexed data +df1 = df1.set_index('key').sort_index() +df2 = df2.set_index('key').sort_index() +result = df1.merge(df2, left_index=True, right_index=True) + +# ✅ CORRECT: Use indicator to track merge sources +result = df1.merge(df2, on='key', how='outer', indicator=True) +print(result['_merge'].value_counts()) +# Shows: left_only, right_only, both + +# ❌ WRONG: Cartesian product by accident +# df1: 1000 rows, df2: 1000 rows +result = df1.merge(df2, on='wrong_key') +# result: 1,000,000 rows! (if all keys match) + +# ✅ CORRECT: Validate merge +result = df1.merge(df2, on='key', validate='1:1') +# Raises error if not one-to-one relationship +``` + +### Handling Missing Data + +```python +# ❌ WRONG: Dropping all rows with any NaN +df_clean = df.dropna() # Might lose most of data + +# ✅ CORRECT: Drop rows with NaN in specific columns +df_clean = df.dropna(subset=['important_col1', 'important_col2']) + +# ✅ CORRECT: Fill NaN with appropriate values +df['numeric_col'] = df['numeric_col'].fillna(df['numeric_col'].mean()) +df['category_col'] = df['category_col'].fillna('Unknown') + +# ✅ CORRECT: Forward/backward fill for time series +df['value'] = df['value'].fillna(method='ffill') # Forward fill + +# ✅ CORRECT: Interpolation +df['value'] = df['value'].interpolate(method='linear') + +# ❌ WRONG: Not checking for NaN before operations +result = df['value'].mean() # NaN propagates, might return NaN + +# ✅ CORRECT: Explicit NaN handling +result = df['value'].mean(skipna=True) # Default, but explicit is better +``` + + +## Advanced NumPy Techniques + +### Universal Functions (ufuncs) + +```python +# ✅ CORRECT: Using built-in ufuncs +arr = np.random.rand(1000000) + +# Trigonometric +result = np.sin(arr) +result = np.cos(arr) + +# Exponential +result = np.exp(arr) +result = np.log(arr) + +# Comparison +result = np.maximum(arr, 0.5) # Element-wise max with scalar +result = np.minimum(arr, 0.5) + +# ✅ CORRECT: Custom ufunc with @vectorize +from numba import vectorize + +@vectorize +def custom_func(x): + if x > 0.5: + return x ** 2 + else: + return x ** 3 + +result = custom_func(arr) # Runs at C speed +``` + +### Advanced Indexing + +```python +# ✅ CORRECT: Fancy indexing +arr = np.arange(100) +indices = [0, 5, 10, 15, 20] +result = arr[indices] # Select specific indices + +# ✅ CORRECT: Boolean indexing with multiple conditions +arr = np.random.rand(1000000) +mask = (arr > 0.3) & (arr < 0.7) +result = arr[mask] + +# ✅ CORRECT: np.where for conditional replacement +arr = np.random.rand(1000) +result = np.where(arr > 0.5, arr, 0) # Replace values <= 0.5 with 0 + +# ✅ CORRECT: Multi-dimensional indexing +matrix = np.random.rand(100, 100) +rows = [0, 10, 20] +cols = [5, 15, 25] +result = matrix[rows, cols] # Select specific elements + +# Get diagonal +diagonal = matrix[np.arange(100), np.arange(100)] +# Or use np.diag +diagonal = np.diag(matrix) +``` + +### Linear Algebra Operations + +```python +# ✅ CORRECT: Matrix multiplication +A = np.random.rand(1000, 500) +B = np.random.rand(500, 200) +C = A @ B # Python 3.5+ matrix multiply operator + +# Or +C = np.dot(A, B) +C = np.matmul(A, B) + +# ✅ CORRECT: Solve linear system Ax = b +A = np.random.rand(100, 100) +b = np.random.rand(100) +x = np.linalg.solve(A, b) + +# ✅ CORRECT: Eigenvalues and eigenvectors +eigenvalues, eigenvectors = np.linalg.eig(A) + +# ✅ CORRECT: SVD (Singular Value Decomposition) +U, s, Vt = np.linalg.svd(A) + +# ✅ CORRECT: Inverse +A_inv = np.linalg.inv(A) + +# ❌ WRONG: Using inverse for solving Ax = b +x = np.linalg.inv(A) @ b # Slower and less numerically stable + +# ✅ CORRECT: Use solve directly +x = np.linalg.solve(A, b) +``` + + +## Type Hints for NumPy and pandas + +### NumPy Type Hints + +```python +import numpy as np +from numpy.typing import NDArray + +# ✅ CORRECT: Type hint for NumPy arrays +def process_array(arr: NDArray[np.float64]) -> NDArray[np.float64]: + return arr * 2 + +# ✅ CORRECT: Generic array type +def normalize(arr: NDArray) -> NDArray: + return (arr - arr.mean()) / arr.std() + +# ✅ CORRECT: Shape-specific type hints (Python 3.11+) +from typing import TypeAlias + +Vector: TypeAlias = NDArray[np.float64] # 1D array +Matrix: TypeAlias = NDArray[np.float64] # 2D array + +def matrix_multiply(A: Matrix, B: Matrix) -> Matrix: + return A @ B +``` + +### pandas Type Hints + +```python +import pandas as pd + +# ✅ CORRECT: Type hints for Series and DataFrame +def process_series(s: pd.Series) -> pd.Series: + return s * 2 + +def process_dataframe(df: pd.DataFrame) -> pd.DataFrame: + return df[df['value'] > 0] + +# ✅ CORRECT: More specific DataFrame types (using Protocols) +from typing import Protocol + +class DataFrameWithColumns(Protocol): + """DataFrame with specific columns.""" + def __getitem__(self, key: str) -> pd.Series: ... + +def analyze_data(df: DataFrameWithColumns) -> float: + return df['value'].mean() +``` + + +## Real-World Patterns + +### Time Series Operations + +```python +# ✅ CORRECT: Efficient time series resampling +df = pd.DataFrame({ + 'timestamp': pd.date_range('2024-01-01', periods=1000000, freq='1s'), + 'value': np.random.rand(1000000) +}) + +df = df.set_index('timestamp') + +# Resample to 1-minute intervals +df_resampled = df.resample('1min').agg({ + 'value': ['mean', 'std', 'min', 'max'] +}) + +# ✅ CORRECT: Rolling window operations +df['rolling_mean'] = df['value'].rolling(window=60).mean() +df['rolling_std'] = df['value'].rolling(window=60).std() + +# ✅ CORRECT: Lag features +df['value_lag1'] = df['value'].shift(1) +df['value_lag60'] = df['value'].shift(60) + +# ✅ CORRECT: Difference for stationarity +df['value_diff'] = df['value'].diff() +``` + +### Multi-Index Operations + +```python +# ✅ CORRECT: Creating multi-index DataFrame +df = pd.DataFrame({ + 'country': ['USA', 'USA', 'UK', 'UK'], + 'city': ['NYC', 'LA', 'London', 'Manchester'], + 'value': [100, 200, 150, 175] +}) + +df = df.set_index(['country', 'city']) + +# Accessing with multi-index +df.loc['USA'] # All USA cities +df.loc[('USA', 'NYC')] # Specific city + +# ✅ CORRECT: Cross-section +df.xs('USA', level='country') +df.xs('London', level='city') + +# ✅ CORRECT: GroupBy with multi-index +df.groupby(level='country').mean() +``` + +### Parallel Processing with Dask + +```python +# For datasets larger than memory, use Dask (not in plan detail, but worth mentioning) +import dask.dataframe as dd + +# ✅ CORRECT: Dask for out-of-core processing +df = dd.read_csv('huge_file.csv') +result = df.groupby('category')['value'].mean().compute() + +# Dask uses same API as pandas, but lazy evaluation +# Only computes when .compute() is called +``` + + +## Anti-Pattern Summary + +### Top 5 Performance Killers + +1. **iterrows()** - Use vectorized operations +2. **Growing DataFrame in loop** - Collect data, then create DataFrame +3. **apply() for simple operations** - Use vectorized alternatives +4. **Not using categorical for strings** - Convert to categorical +5. **Loading entire file when chunking works** - Use chunksize parameter + +### Memory Usage Mistakes + +1. **Using float64 when float32 sufficient** - Halves memory +2. **Not using categorical for repeated strings** - 10x memory savings +3. **Creating unnecessary copies** - Use in-place operations +4. **Loading all columns when few needed** - Use usecols parameter + + +## Decision Trees + +### Should I Use NumPy or pandas? + +``` +What's your data structure? +├─ Homogeneous numeric array → NumPy +├─ Heterogeneous tabular data → pandas +├─ Time series → pandas +└─ Linear algebra → NumPy +``` + +### How to Optimize DataFrame Operation? + +``` +Can I vectorize? +├─ Yes → Use vectorized pandas/NumPy operations +└─ No → Can I use groupby? + ├─ Yes → Use groupby with built-in aggregations + └─ No → Can I use apply on columns (not rows)? + ├─ Yes → Use apply on Series + └─ No → Use itertuples (last resort) +``` + +### Memory Optimization Strategy + +``` +Is memory usage high? +├─ Yes → Check dtypes (downcast if possible) +│ └─ Still high? → Use categorical for strings +│ └─ Still high? → Process in chunks +└─ No → Continue with current approach +``` + + +## Integration with Other Skills + +**After using this skill:** +- If need ML pipelines → See @ml-engineering-workflows for experiment tracking +- If performance issues persist → See @debugging-and-profiling for profiling +- If type hints needed → See @modern-syntax-and-types for advanced typing + +**Before using this skill:** +- If unsure if slow → Use @debugging-and-profiling to profile first +- If setting up project → Use @project-structure-and-tooling for dependencies + + +## Quick Reference + +### NumPy Quick Wins + +```python +# Vectorization +result = arr ** 2 + 2 * arr + 1 # Not: for loop + +# Boolean indexing +result = arr[arr > 0] # Not: list comprehension + +# Broadcasting +result = matrix + row_vector # Not: loop over rows + +# In-place operations +arr += 1 # Not: arr = arr + 1 +``` + +### pandas Quick Wins + +```python +# Never iterrows +df['new'] = df['a'] + df['b'] # Not: iterrows + +# Vectorized conditions +df['category'] = np.where(df['value'] > 0.5, 'high', 'low') + +# Categorical for strings +df['category'] = pd.Categorical(df['category']) + +# Query for complex filters +df.query('A > 0.5 and B < 0.3') # Not: multiple [] +``` + +### Memory Optimization Checklist + +- [ ] Use smallest dtype that fits data +- [ ] Convert repeated strings to categorical +- [ ] Use chunking for files > available RAM +- [ ] Avoid unnecessary copies (use views or in-place ops) +- [ ] Only load needed columns (usecols in read_csv) diff --git a/skills/using-python-engineering/systematic-delinting.md b/skills/using-python-engineering/systematic-delinting.md new file mode 100644 index 0000000..ffbe39e --- /dev/null +++ b/skills/using-python-engineering/systematic-delinting.md @@ -0,0 +1,1506 @@ + +# Systematic Delinting + +## Overview + +**Core Principle:** Fix warnings systematically, NEVER disable them. Delinting is about making existing code compliant with standards through minimal, focused changes. It is NOT refactoring. + +Lint warnings represent technical debt and code quality issues. A systematic delinting process tackles this debt incrementally, starting with high-value, low-effort fixes. The goal: clean, standards-compliant code without architectural changes or risky refactoring. + +**This skill teaches the PROCESS of delinting, not just how to configure linters.** + +## When to Use + +**Use this skill when:** +- Codebase has 50+ lint warnings +- Inheriting legacy code with no linting +- Enabling strict linting on existing projects +- Team wants to adopt linting standards systematically +- Need to reduce technical debt incrementally +- Want to fix warnings without disabling rules + +**Don't use when:** +- Setting up linting for NEW projects (use project-structure-and-tooling) +- Less than 50 warnings (just fix them directly) +- Need to refactor code (that's architecture work, not delinting) +- Want quick fixes via disable comments (anti-pattern) + +**Symptoms triggering this skill:** +- "1000+ lint warnings, where to start?" +- "Legacy code needs linting cleanup" +- "How to fix warnings without breaking code?" +- "Systematic approach to reducing lint debt" +- "Team pushback on enabling linting" + + +## Delinting Philosophy + +### Fix, Never Disable + +**The Golden Rule**: Fix warnings by changing code to comply with the rule. NEVER disable warnings with `# noqa`, `# type: ignore`, `# pylint: disable`, or configuration exclusions. + +```python +# ❌ WRONG: Disabling warnings +def calculateTotal(prices): # noqa: N802 - disable naming warning + total = 0 + for price in prices: + total += price # noqa: PERF401 - disable perf warning + return total + +# ✅ CORRECT: Fixing warnings +def calculate_total(prices: list[float]) -> float: + return sum(prices) +``` + +**Why this matters**: +- Disabling hides problems, doesn't fix them +- Disabled warnings accumulate as more technical debt +- Future developers don't know if disable is still valid +- Linters lose effectiveness when widely disabled + +**The only exception**: Third-party code you can't modify (and even then, prefer type stubs). + +### Delinting ≠ Refactoring + +**Critical distinction**: Delinting makes minimal changes to satisfy linting rules. Refactoring changes architecture, algorithms, or design. + +```python +# Example: E501 (line too long) violation +# Original line (150 chars): +result = some_function(very_long_argument_name, another_long_argument, third_argument, fourth_argument, fifth_argument) + +# ❌ WRONG: Refactoring during delinting +# This changes the API and logic - NOT delinting +def process_items(items): + processor = ItemProcessor(items) + return processor.process() + +# ✅ CORRECT: Minimal fix for E501 +result = some_function( + very_long_argument_name, + another_long_argument, + third_argument, + fourth_argument, + fifth_argument, +) +``` + +**Why this matters**: +- Refactoring introduces risk and requires testing +- Delinting should be low-risk, mechanical fixes +- Mixing delinting and refactoring makes code review impossible +- Delinting can be done incrementally; refactoring often can't + +**When refactoring IS needed**: Create separate tickets/PRs for refactoring. Don't hide refactoring in "delinting" commits. + +### Technical Debt Perspective + +Lint warnings = measurable technical debt. Each warning represents: +- Code that doesn't follow team standards +- Potential bugs (unused imports, variables) +- Maintenance burden (complex functions, long lines) +- Onboarding friction (inconsistent style) + +**Debt paydown strategy**: +1. Stop accumulating (enable linting in CI for new code) +2. Measure baseline (count warnings by rule) +3. Pay down systematically (fix highest-value rules first) +4. Track progress (watch warning count decrease) + +**Like financial debt**: Small, consistent payments compound. One file/rule at a time. + + +## Ruff Configuration + +### Core Configuration for Delinting + +**File**: `pyproject.toml` + +```toml +[tool.ruff] +# Line length: 140 chars (NOT ignored, enforced at 140) +line-length = 140 + +# Python version +target-version = "py312" + +[tool.ruff.lint] +# Start with safe, auto-fixable rules +select = [ + "F", # Pyflakes (undefined names, unused imports) + "E", # pycodestyle errors + "W", # pycodestyle warnings + "I", # isort (import sorting) + "N", # pep8-naming + "UP", # pyupgrade (modern Python syntax) + "YTT", # flake8-2020 (sys.version) + "S", # flake8-bandit (security) + "B", # flake8-bugbear (bug patterns) + "A", # flake8-builtins (shadowing) + "C4", # flake8-comprehensions + "T10", # flake8-debugger + "ISC", # flake8-implicit-str-concat + "ICN", # flake8-import-conventions + "PIE", # flake8-pie (misc lints) + "PT", # flake8-pytest-style + "Q", # flake8-quotes + "RSE", # flake8-raise + "RET", # flake8-return + "SIM", # flake8-simplify + "TID", # flake8-tidy-imports + "ARG", # flake8-unused-arguments + "PTH", # flake8-use-pathlib + "ERA", # eradicate (commented code) + "PL", # Pylint + "PERF", # Perflint (performance) + "RUF", # Ruff-specific rules +] + +# Don't ignore any rules during delinting +# (You can ignore specific rules TEMPORARILY while working on others) +ignore = [] + +# Auto-fix settings +fix = true +fixable = ["ALL"] +unfixable = [] + +[tool.ruff.lint.per-file-ignores] +# Tests can have different standards +"tests/**/*.py" = [ + "S101", # Allow assert in tests + "ARG", # Allow unused arguments (fixtures) + "PLR2004", # Allow magic values +] + +[tool.ruff.lint.isort] +known-first-party = ["your_package_name"] + +[tool.ruff.lint.mccabe] +max-complexity = 10 # Warn on complex functions +``` + +**Key decisions**: +- **Line length = 140**: Not 79, not 88, not ignored. 140 is enforced. +- **Fix = true**: Auto-fix what's safe to auto-fix +- **Select all rules**: Enable comprehensive rule set +- **Ignore = []**: Start with nothing ignored (ignore temporarily during delinting) + +### Rule Sets by Project Maturity + +**New project (no legacy code)**: +```toml +select = ["ALL"] # Everything +ignore = [] # Nothing +``` + +**Existing project (legacy code)**: +```toml +# Phase 1: Critical rules only +select = ["F", "E", "W"] + +# Phase 2: Add safety and bugs +select = ["F", "E", "W", "S", "B"] + +# Phase 3: Add style and best practices +select = ["F", "E", "W", "S", "B", "N", "UP", "I"] + +# Phase 4: Add performance and complexity +select = ["F", "E", "W", "S", "B", "N", "UP", "I", "PERF", "PL"] + +# Phase 5: Everything +select = ["ALL"] +``` + +**Large legacy codebase (5000+ lines)**: +```toml +# Start MINIMAL - only undefined names and syntax errors +select = ["F821", "E999"] + +# Expand rule-by-rule based on triage +``` + +### Why Line-Length = 140, Not 79/88 + +```python +# 79 chars is too restrictive for modern codebases +def process_user_data(user_id: int, + user_name: str, # Already line-wrapping at 79! + user_email: str) -> UserData: + pass + +# 88 (Black default) is better but still restrictive +def process_user_data( + user_id: int, user_name: str, user_email: str +) -> UserData: # Forces awkward wrapping + pass + +# 140 is pragmatic - allows natural code flow +def process_user_data(user_id: int, user_name: str, user_email: str, user_preferences: dict[str, Any]) -> UserData: + return UserData(id=user_id, name=user_name, email=user_email, preferences=user_preferences) +``` + +**Why 140**: +- Modern screens are wide (1920+ pixels) +- Reduces unnecessary line wrapping +- Improves readability for function signatures +- Still fits in side-by-side diffs +- Matches common team conventions (120-140 range) + +**Why NOT ignore line-length**: Code without any line-length limit becomes unreadable. 140 is the enforced limit. + + +## Pylint Configuration + +### When to Use Pylint vs Ruff + +**Use Ruff for**: +- Fast, auto-fixable rules (imports, formatting, syntax) +- Performance linting +- Modern Python (3.12+) features +- CI/CD (faster execution) + +**Use Pylint for**: +- Complex code quality checks (too-many-arguments, too-many-locals) +- Design linting (abstract methods, inheritance) +- Documentation checking (missing docstrings) +- Deep analysis (slower, more thorough) + +**Recommended approach**: +1. Use Ruff as primary linter +2. Add Pylint for specific quality checks Ruff doesn't cover +3. Run Ruff in CI (fast), Pylint locally or nightly (slow) + +### Pylint Configuration + +**File**: `pyproject.toml` + +```toml +[tool.pylint.main] +py-version = "3.12" +jobs = 0 # Auto-detect CPU cores + +[tool.pylint.messages_control] +# Start with minimal set +enable = [ + "too-many-arguments", + "too-many-locals", + "too-many-branches", + "too-many-statements", + "too-complex", + "missing-module-docstring", + "missing-class-docstring", + "missing-function-docstring", +] + +disable = [ + "C0103", # invalid-name (Ruff N-rules handle this) + "C0114", # missing-module-docstring (enable selectively) + "C0115", # missing-class-docstring (enable selectively) + "C0116", # missing-function-docstring (enable selectively) + "R0903", # too-few-public-methods (often wrong) + "W0212", # protected-access (too strict for tests) +] + +[tool.pylint.design] +max-args = 5 +max-locals = 15 +max-branches = 12 +max-statements = 50 +max-complexity = 10 + +[tool.pylint.format] +max-line-length = 140 # Match Ruff +``` + +**Key decisions**: +- Disable rules that Ruff handles better (naming, formatting) +- Enable complexity metrics (too-many-X) +- Match line-length with Ruff (140) +- Relax docstring requirements initially + + +## Triage Methodology + +### Step 1: Baseline Assessment + +Run linting and capture baseline statistics: + +```bash +# Ruff statistics +ruff check . --statistics > lint-baseline.txt + +# Example output: +# 423 F401 [*] `module` imported but unused +# 156 E501 [*] Line too long (152 > 140 characters) +# 89 N806 Variable `userId` in function should be lowercase +# 67 B008 Do not perform function calls in argument defaults +# 45 ARG001 Unused function argument: `kwargs` +# ... +``` + +**Analyze baseline**: +1. Total warning count +2. Rules with most violations +3. Rules marked `[*]` (auto-fixable) +4. Rules by severity (errors vs warnings) + +**Document baseline**: Commit `lint-baseline.txt` to track progress. + +### Step 2: Categorize by Effort + +| Category | Effort | Examples | Strategy | +|----------|--------|----------|----------| +| **Auto-fixable** | Low | F401 (unused imports), I001 (import sorting), W291 (whitespace) | Fix immediately with `--fix` | +| **Mechanical** | Low | N806 (naming), E501 (line-length), Q000 (quotes) | Fix systematically, file-by-file | +| **Requires thought** | Medium | ARG001 (unused args), B008 (mutable defaults), RET504 (unnecessary assignment) | Review case-by-case | +| **Architectural** | High | C901 (too complex), PLR0913 (too many args), B006 (mutable default) | Defer or create refactoring tickets | + +**Triage output**: +``` +Quick wins (auto-fixable): 423 warnings +Mechanical fixes: 245 warnings +Requires review: 112 warnings +Architectural (defer): 45 warnings +Total: 825 warnings +``` + +### Step 3: Prioritize by Value + +**High value** (fix first): +1. **Bugs and security**: S, B-rules (actual bugs) +2. **Unused code**: F401, F841, ERA (dead code removal) +3. **Import problems**: F401, I001 (import hygiene) +4. **Type issues**: Type-related rules + +**Medium value**: +1. **Naming**: N-rules (consistency) +2. **Style**: E, W rules (readability) +3. **Performance**: PERF rules (optimizations) + +**Low value** (fix last): +1. **Complexity**: C901, PLR-rules (architectural) +2. **Docstrings**: D-rules (documentation) +3. **Comments**: ERA rules (cleanup) + +### Step 4: Create Delinting Plan + +**Template**: +```markdown +# Delinting Plan + +## Baseline +- Total warnings: 825 +- Baseline file: lint-baseline.txt +- Date: 2025-11-03 + +## Phase 1: Auto-fixable (Target: 1 day) +- [ ] F401: Remove unused imports (423 warnings) +- [ ] I001: Sort imports (45 warnings) +- [ ] W291: Remove trailing whitespace (23 warnings) + +## Phase 2: Mechanical fixes (Target: 3 days) +- [ ] N806: Fix variable naming (89 warnings) +- [ ] E501: Fix line-length (156 warnings) +- [ ] Q000: Fix quote style (34 warnings) + +## Phase 3: Requires review (Target: 5 days) +- [ ] ARG001: Review unused arguments (45 warnings) +- [ ] B008: Fix function call in defaults (67 warnings) +- [ ] RET504: Remove unnecessary assignments (15 warnings) + +## Phase 4: Deferred (Create tickets) +- [ ] C901: Reduce complexity (30 warnings) → Ticket #123 +- [ ] PLR0913: Reduce arguments (15 warnings) → Ticket #124 + +## Progress Tracking +- Day 1: 825 → 357 warnings (-468) +- Day 2: 357 → 268 warnings (-89) +- ... +``` + + +## Systematic Workflow + +### Rule-by-Rule Approach (Recommended) + +**Process**: Fix one rule type completely across entire codebase, commit, repeat. + +```bash +# Step 1: Baseline +ruff check . --statistics + +# Step 2: Pick highest-volume auto-fixable rule +# Example: F401 (unused imports) - 423 violations + +# Step 3: Fix that rule only +ruff check . --select F401 --fix + +# Step 4: Review changes +git diff + +# Step 5: Run tests +pytest + +# Step 6: Commit +git add . +git commit -m "fix: Remove unused imports (F401) + +Removed 423 unused import statements across codebase. +Auto-fixed with ruff --fix. + +Before: 825 total warnings +After: 402 total warnings" + +# Step 7: Repeat with next rule +ruff check . --statistics # Get updated counts +ruff check . --select I001 --fix # Fix next rule +``` + +**Why rule-by-rule**: +- Small, reviewable commits +- Easy to revert if something breaks +- Clear progress tracking +- Focused changes (one problem at a time) +- Easier for team to review + +### File-by-File Approach (Alternative) + +**Process**: Fix all warnings in one file, commit, repeat. + +```bash +# Step 1: List files with most warnings +ruff check . --output-format=concise | cut -d':' -f1 | sort | uniq -c | sort -rn + +# Example output: +# 45 src/core/processor.py +# 32 src/utils/helpers.py +# 28 src/models/user.py + +# Step 2: Fix one file +ruff check src/core/processor.py --fix + +# Step 3: Manual fixes for non-auto-fixable +# Edit src/core/processor.py + +# Step 4: Verify file is clean +ruff check src/core/processor.py # Should show 0 warnings + +# Step 5: Test +pytest tests/test_processor.py + +# Step 6: Commit +git add src/core/processor.py +git commit -m "fix: Delint src/core/processor.py + +Fixed all lint warnings in processor module: +- Removed unused imports (F401) +- Fixed line-length violations (E501) +- Renamed variables to snake_case (N806) + +Before: 45 warnings in file +After: 0 warnings in file" + +# Step 7: Repeat with next file +``` + +**When to use file-by-file**: +- Small codebase (<5000 lines) +- Modular architecture (isolated files) +- Want to fully clean specific modules +- Team owns specific files + +**Disadvantages**: +- Harder to track progress by rule type +- May mix different types of fixes in one commit +- Less systematic than rule-by-rule + +### Hybrid Approach + +Combine both approaches: + +1. **Phase 1**: Rule-by-rule for auto-fixable (F401, I001, W291) +2. **Phase 2**: File-by-file for core modules +3. **Phase 3**: Rule-by-rule for remaining mechanical fixes + + +## Common Rule Fixes + +### F401: Module Imported But Unused + +```python +# ❌ WRONG: Unused import +import json +import sys +import os # Unused! + +def process(data: str) -> dict: + return json.loads(data) + +# ✅ CORRECT: Remove unused +import json +import sys + +def process(data: str) -> dict: + return json.loads(data) + +# ❌ WRONG: Import used in type hint but looks unused +from typing import List +def get_items() -> List[int]: # List is used! + return [1, 2, 3] + +# ✅ CORRECT: Use TYPE_CHECKING to fix false positives +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import List + +def get_items() -> List[int]: + return [1, 2, 3] + +# ✅ BETTER: Use built-in (Python 3.9+) +def get_items() -> list[int]: + return [1, 2, 3] +``` + +**Auto-fix**: `ruff check --select F401 --fix` + +### E501: Line Too Long + +```python +# ❌ WRONG: Line exceeds 140 chars (this is 145 chars) +result = some_function(very_long_argument_name, another_long_argument, third_argument, fourth_argument, fifth_argument, sixth_argument) + +# ✅ CORRECT: Break at function call +result = some_function( + very_long_argument_name, + another_long_argument, + third_argument, + fourth_argument, + fifth_argument, + sixth_argument, +) + +# ✅ CORRECT: Break in dictionary +config = { + "very_long_key_name": "very_long_value_that_makes_line_too_long", + "another_key": "another_value", +} + +# ✅ CORRECT: Break in list comprehension +filtered_items = [ + item.process() + for item in long_list_of_items + if item.matches_very_long_condition_name() +] + +# ❌ WRONG: Breaking string literals incorrectly +message = "This is a very long string that exceeds the line limit and should be broken up somehow but I don't know how" + +# ✅ CORRECT: Implicit string concatenation +message = ( + "This is a very long string that exceeds the line limit " + "and should be broken up somehow but I don't know how" +) + +# ✅ CORRECT: f-strings stay readable +message = ( + f"Processing {user_name} with ID {user_id} " + f"and email {user_email} for tenant {tenant_name}" +) +``` + +**NOT auto-fixable**: Requires manual judgment on where to break. + +### N806: Variable in Function Should Be Lowercase + +```python +# ❌ WRONG: camelCase variable name +def process_user(userId: int, userName: str) -> None: + userEmail = f"{userName}@example.com" + print(f"User {userId}: {userEmail}") + +# ✅ CORRECT: snake_case +def process_user(user_id: int, user_name: str) -> None: + user_email = f"{user_name}@example.com" + print(f"User {user_id}: {user_email}") + +# ❌ WRONG: PascalCase for variable +def calculate(): + TotalAmount = 100 + return TotalAmount * 1.1 + +# ✅ CORRECT: snake_case +def calculate(): + total_amount = 100 + return total_amount * 1.1 +``` + +**NOT auto-fixable**: Requires renaming variables (affects all usage sites). + +**Systematic fix process**: +1. Identify all N806 violations: `ruff check --select N806` +2. Fix one file at a time (easier than one variable at a time) +3. Use IDE refactoring (rename symbol) if available +4. Test after each file +5. Commit per file or per module + +### ARG001: Unused Function Argument + +```python +# ❌ WRONG: Unused argument (but might be part of API) +def process_data(data: str, format: str, encoding: str) -> dict: + # encoding is never used! + return json.loads(data) + +# ✅ CORRECT: Remove if truly unused +def process_data(data: str, format: str) -> dict: + return json.loads(data) + +# ✅ CORRECT: Prefix with _ if required by API/interface +def process_data(data: str, format: str, _encoding: str) -> dict: + # _encoding signals intentionally unused + return json.loads(data) + +# ✅ CORRECT: Use *args/**kwargs if API needs flexibility +def process_data(data: str, **kwargs) -> dict: + # format and encoding available in kwargs if needed + return json.loads(data) + +# ❌ WRONG: Callback with unused parameter +def on_click(event, extra_param): + print("Clicked!") # extra_param unused + +# ✅ CORRECT: Prefix with _ +def on_click(event, _extra_param): + print("Clicked!") +``` + +**Requires thought**: Determine if argument is truly unused or required by interface. + +### B006: Mutable Default Argument + +```python +# ❌ WRONG: Mutable default (list) +def add_item(item: str, items: list[str] = []) -> list[str]: + items.append(item) + return items + +# Bug: Same list reused across calls! +print(add_item("a")) # ['a'] +print(add_item("b")) # ['a', 'b'] - NOT ['b']! + +# ✅ CORRECT: Use None and create inside +def add_item(item: str, items: list[str] | None = None) -> list[str]: + if items is None: + items = [] + items.append(item) + return items + +# ❌ WRONG: Mutable default (dict) +def configure(options: dict[str, Any] = {}) -> dict[str, Any]: + options["processed"] = True + return options + +# ✅ CORRECT: Use None +def configure(options: dict[str, Any] | None = None) -> dict[str, Any]: + if options is None: + options = {} + options["processed"] = True + return options + +# ✅ CORRECT: Immutable defaults are fine +def greet(name: str = "World", count: int = 1) -> str: + return f"Hello {name}!" * count +``` + +**Mechanical fix**: Replace `= []` with `= None`, add `if param is None: param = []`. + +### B008: Function Call in Argument Default + +```python +# ❌ WRONG: Function call in default (evaluated once at definition) +def log_event(timestamp: datetime = datetime.now()) -> None: + print(f"Event at {timestamp}") + +# Bug: timestamp is fixed at function definition time! + +# ✅ CORRECT: Use None and call inside +def log_event(timestamp: datetime | None = None) -> None: + if timestamp is None: + timestamp = datetime.now() + print(f"Event at {timestamp}") + +# ❌ WRONG: List comprehension in default +def process(items: list[int] = [x * 2 for x in range(10)]) -> list[int]: + return items + +# ✅ CORRECT: Compute inside +def process(items: list[int] | None = None) -> list[int]: + if items is None: + items = [x * 2 for x in range(10)] + return items +``` + +**Mechanical fix**: Move function call inside function body, use None as default. + +### E741: Ambiguous Variable Name + +```python +# ❌ WRONG: Single letter 'l' looks like '1' or 'I' +l = [1, 2, 3] +for l in items: # Very confusing! + print(l) + +# ✅ CORRECT: Use descriptive name +items_list = [1, 2, 3] +for item in items: + print(item) + +# ❌ WRONG: Capital 'O' looks like zero +O = 0 # Is this O or 0? + +# ✅ CORRECT: Descriptive name +offset = 0 +``` + +**Mechanical fix**: Rename `l` → `item`/`items`, `O` → `offset`/`obj`, `I` → `index`/`iterator`. + +### C901 / PLR0912: Function Too Complex + +```python +# ❌ WRONG: Too complex (complexity > 10) +def process_order(order_type: str, items: list, user: User) -> Order: + if order_type == "standard": + if user.is_premium: + if len(items) > 10: + discount = 0.2 + else: + discount = 0.1 + else: + discount = 0 + elif order_type == "express": + if user.is_premium: + if user.has_express: + discount = 0.15 + else: + discount = 0.05 + else: + discount = 0 + else: + discount = 0 + # ... more nested conditions + return calculate_order(items, discount) + +# ✅ CORRECT: Extract helper functions +def calculate_discount(order_type: str, user: User, item_count: int) -> float: + if order_type == "standard": + return _standard_discount(user, item_count) + elif order_type == "express": + return _express_discount(user) + return 0.0 + +def _standard_discount(user: User, item_count: int) -> float: + if not user.is_premium: + return 0.0 + return 0.2 if item_count > 10 else 0.1 + +def _express_discount(user: User) -> float: + if not user.is_premium: + return 0.0 + return 0.15 if user.has_express else 0.05 + +def process_order(order_type: str, items: list, user: User) -> Order: + discount = calculate_discount(order_type, user, len(items)) + return calculate_order(items, discount) +``` + +**NOT mechanical**: Requires refactoring (architectural). Create separate ticket. + +### RET504: Unnecessary Variable Assignment Before Return + +```python +# ❌ WRONG: Unnecessary variable +def calculate_total(prices: list[float]) -> float: + total = sum(prices) + return total + +# ✅ CORRECT: Return directly +def calculate_total(prices: list[float]) -> float: + return sum(prices) + +# ❌ WRONG: Unnecessary variable in expression +def is_valid(value: int) -> bool: + result = value > 0 and value < 100 + return result + +# ✅ CORRECT: Return directly +def is_valid(value: int) -> bool: + return 0 < value < 100 + +# ✅ CORRECT: Keep variable if it improves readability +def calculate_price(base: float, tax_rate: float, discount: float) -> float: + subtotal = base - discount + total_with_tax = subtotal * (1 + tax_rate) + return total_with_tax # OK: shows calculation steps +``` + +**Auto-fixable** in simple cases: `ruff check --select RET504 --fix` + +### PERF401: Use List Comprehension Instead of For Loop + +```python +# ❌ WRONG: Manual loop to build list +items = [] +for x in range(10): + items.append(x * 2) + +# ✅ CORRECT: List comprehension +items = [x * 2 for x in range(10)] + +# ❌ WRONG: Manual loop with condition +result = [] +for item in items: + if item.is_valid(): + result.append(item.value) + +# ✅ CORRECT: List comprehension with filter +result = [item.value for item in items if item.is_valid()] + +# ✅ CORRECT: Keep loop if more readable +# Complex logic - loop is clearer +result = [] +for item in items: + processed = item.process() + if processed.is_valid() and processed.score > threshold: + transformed = transform(processed) + result.append(transformed) +# This is fine - don't force into comprehension +``` + +**Mostly auto-fixable**: `ruff check --select PERF401 --fix` + + +## CI Integration + +### Ratcheting: Don't Get Worse + +**Principle**: New code must be clean. Existing warnings can be fixed incrementally. + +**Strategy**: +1. Capture baseline warning count +2. CI fails if warning count INCREASES +3. CI passes if warning count stays same or DECREASES + +**Implementation** (GitHub Actions): + +```yaml +name: Lint Ratcheting + +on: [push, pull_request] + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Need history for baseline + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install dependencies + run: | + pip install ruff + + - name: Get baseline warning count (main branch) + run: | + git checkout main + ruff check . --output-format=concise | wc -l > /tmp/baseline_count.txt + echo "Baseline warnings: $(cat /tmp/baseline_count.txt)" + + - name: Get current warning count + run: | + git checkout ${{ github.sha }} + ruff check . --output-format=concise | wc -l > /tmp/current_count.txt + echo "Current warnings: $(cat /tmp/current_count.txt)" + + - name: Compare counts + run: | + baseline=$(cat /tmp/baseline_count.txt) + current=$(cat /tmp/current_count.txt) + + if [ "$current" -gt "$baseline" ]; then + echo "❌ Lint warnings increased: $baseline → $current" + echo "New warnings introduced. Please fix or remove." + exit 1 + elif [ "$current" -lt "$baseline" ]; then + echo "✅ Lint warnings decreased: $baseline → $current" + echo "Great work reducing technical debt!" + exit 0 + else + echo "✅ Lint warnings unchanged: $baseline" + echo "No new warnings introduced." + exit 0 + fi +``` + +**Why ratcheting**: +- Allows incremental improvement +- Doesn't block work on legacy code +- Prevents accumulating more debt +- Clear progress metric (warnings decreasing) + +### Pre-commit for New Code Only + +**Goal**: All NEW code is clean. Don't auto-fix existing code on commit. + +**File**: `.pre-commit-config.yaml` + +```yaml +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.6 + hooks: + - id: ruff + args: [--fix] # Auto-fix what we can + - id: ruff-format # Format new code + + # Only lint files changed in this commit + - repo: local + hooks: + - id: ruff-check-changed + name: Ruff lint changed files only + entry: bash -c 'ruff check "$@" --fix' -- + language: system + types: [python] + pass_filenames: true # Only run on staged files +``` + +**Install**: +```bash +pip install pre-commit +pre-commit install +``` + +**Behavior**: +- Only runs on files you're committing +- Auto-fixes what it can +- Fails if unfixable warnings in your changes +- Doesn't touch other files + +**Why pre-commit**: +- Catches issues before code review +- Ensures new code is clean +- Doesn't reformat entire codebase +- Fast feedback loop + +### Blocking New Violations + +**CI configuration to block new violations**: + +```yaml +# Only fail on files changed in PR +- name: Lint changed files only + run: | + # Get list of changed Python files + git diff --name-only origin/main...HEAD | grep '\.py$' > changed_files.txt + + # Lint only changed files + if [ -s changed_files.txt ]; then + ruff check $(cat changed_files.txt) + exit_code=$? + + if [ $exit_code -ne 0 ]; then + echo "❌ Lint violations in changed files" + exit 1 + fi + fi +``` + +**Alternative: Use diff-cover**: + +```bash +pip install diff-cover + +# Generate baseline +ruff check . --output-format=json > baseline.json + +# Check only changes +git diff origin/main | diff-quality --violations=ruff --fail-under=100 +``` + + +## Team Adoption + +### Introducing Linting to Legacy Project + +**Phase 1: Discussion (Week 1)** +- Present linting value proposition +- Show example of bugs caught by linting +- Discuss team standards (line-length, naming) +- Get buy-in before enforcing + +**Phase 2: Configuration (Week 2)** +- Set up `pyproject.toml` with agreed rules +- Start with MINIMAL rules (F, E only) +- Document exceptions (tests, third-party) +- No CI enforcement yet + +**Phase 3: Baseline (Week 3)** +- Run linting on entire codebase +- Generate baseline report +- Triage warnings by category +- Create delinting plan + +**Phase 4: Quick Wins (Week 4-5)** +- Fix auto-fixable rules (F401, I001) +- Commit frequently +- Demonstrate reduced warning count +- Build momentum + +**Phase 5: CI Integration (Week 6)** +- Add ratcheting CI check +- Block new violations only +- Monitor for false positives +- Adjust rules as needed + +**Phase 6: Gradual Strictness (Weeks 7+)** +- Add one rule set per week +- Fix existing violations before enabling +- Incrementally increase strictness +- Track progress publicly + +### Handling Team Pushback + +**Objection**: "Linting is just bikeshedding" +**Response**: Show bug caught by linter (unused imports, wrong variable names). Linting finds real bugs, not just style issues. + +**Objection**: "We don't have time to fix 1000 warnings" +**Response**: We don't fix them all at once. Ratcheting prevents NEW warnings. Fix old ones incrementally (10 mins/day). + +**Objection**: "Linter is too strict" +**Response**: We control the rules. Start minimal (F, E). Add rules when team is ready. Always fix, never disable. + +**Objection**: "My code is readable, why change it?" +**Response**: Consistency across team > individual preference. Linting enforces team standards, not personal opinion. + +**Objection**: "This will slow down development" +**Response**: Short-term: yes (fixing warnings). Long-term: no (fewer bugs, easier onboarding, faster reviews). + +### Gradual Strictness Strategy + +```toml +# Week 1: Absolute minimum +select = ["F821", "E999"] # Undefined names, syntax errors + +# Week 2: Add imports +select = ["F", "E999"] # All pyflakes + +# Week 3: Add style +select = ["F", "E", "W"] # Add pycodestyle + +# Week 4: Add imports sorting +select = ["F", "E", "W", "I"] # Add isort + +# Week 5: Add naming +select = ["F", "E", "W", "I", "N"] # Add pep8-naming + +# Week 6: Add bugs +select = ["F", "E", "W", "I", "N", "B"] # Add bugbear + +# Week 8: Add security +select = ["F", "E", "W", "I", "N", "B", "S"] # Add bandit + +# Week 10: Add performance +select = ["F", "E", "W", "I", "N", "B", "S", "PERF"] + +# Week 12: Add everything +select = ["ALL"] +``` + +**Each week**: +1. Add new rule set to config +2. Run linting to see new violations +3. Fix violations before merging config change +4. Update CI to enforce new rules + +**Pace**: Adjust based on team capacity. Some teams can do this in 4 weeks, others need 6 months. + + +## Progress Tracking + +### Tracking Warning Counts + +**Daily/Weekly tracking**: + +```bash +#!/bin/bash +# track-lint-progress.sh + +date=$(date +%Y-%m-%d) +count=$(ruff check . --output-format=concise | wc -l) + +echo "$date,$count" >> lint-progress.csv + +echo "Lint warnings: $count" +``` + +**Visualize progress**: + +```python +import pandas as pd +import matplotlib.pyplot as plt + +df = pd.read_csv("lint-progress.csv", names=["date", "count"]) +df["date"] = pd.to_datetime(df["date"]) + +plt.figure(figsize=(10, 6)) +plt.plot(df["date"], df["count"]) +plt.title("Lint Warning Reduction Progress") +plt.xlabel("Date") +plt.ylabel("Warning Count") +plt.grid(True) +plt.savefig("lint-progress.png") +``` + +### Dashboard Metrics + +**Key metrics to track**: + +1. **Total warnings**: Overall count +2. **Warnings by rule**: Which rules have most violations +3. **Warnings by file**: Which files need most work +4. **Auto-fixable %**: Percentage that can be auto-fixed +5. **Time to zero**: Estimated date to reach zero warnings +6. **Burn-down rate**: Warnings fixed per day/week + +**Example dashboard**: +``` +Lint Status Dashboard (2025-11-03) +==================================== +Total warnings: 387 (was 825 on Oct 1) +Reduction: -438 (-53%) +Daily rate: -13.3 warnings/day +Est. zero date: 2025-12-02 (29 days) + +By Rule: + E501 (line-length): 156 (40%) + N806 (naming): 89 (23%) + ARG001 (unused): 45 (12%) + B008 (defaults): 67 (17%) + Other: 30 (8%) + +By File: + src/core/processor.py: 45 + src/utils/helpers.py: 32 + src/models/user.py: 28 + Other: 282 +``` + +### Commit Message Convention + +**Template**: +``` +fix: [Delinting] - + + + +Rule: () +Fixed: violations +Impact: + +Before: +After: +``` + +**Examples**: + +``` +fix: [Delinting] F401 - Remove unused imports + +Removed unused import statements across entire codebase. +Auto-fixed with `ruff check --select F401 --fix`. + +Rule: F401 (unused-import) +Fixed: 423 violations +Impact: Cleaner imports, faster import time, reduced confusion + +Before: 825 total warnings +After: 402 total warnings +``` + +``` +fix: [Delinting] N806 - Rename variables to snake_case + +Renamed variables in src/core/processor.py to follow snake_case +convention. Changes limited to local variables, no API changes. + +Rule: N806 (non-lowercase-variable-in-function) +Fixed: 45 violations (in one file) +Impact: Consistent naming convention, improved readability + +Before: 89 N806 violations +After: 44 N806 violations +``` + + +## Anti-Patterns + +### Disabling Instead of Fixing + +```python +# ❌ WRONG: Disabling warnings +def calculateTotal(prices): # noqa: N802 + total = 0 + for price in prices: # noqa: PERF401 + total += price + return total + +# ✅ CORRECT: Fixing warnings +def calculate_total(prices: list[float]) -> float: + return sum(prices) +``` + +**Why wrong**: Disabling accumulates as technical debt. Every `# noqa` is a warning you're ignoring forever. + +**Exception**: Temporarily disable during delinting: +```toml +# pyproject.toml - temporarily during delinting +[tool.ruff.lint] +ignore = ["E501"] # Ignoring line-length WHILE fixing other rules + +# Remove this after fixing other rules! +``` + +### Over-Refactoring During Delinting + +```python +# Original code with E501 violation (line too long) +result = calculate_user_statistics(user_id, include_deleted=False, include_inactive=True, date_range="last_30_days") + +# ❌ WRONG: Over-refactoring (NOT delinting) +class UserStatisticsCalculator: + def __init__(self, user_id: int): + self.user_id = user_id + + def calculate(self, options: StatisticsOptions) -> Statistics: + # Completely rewrote the code! + ... + +# ✅ CORRECT: Minimal fix for E501 +result = calculate_user_statistics( + user_id, + include_deleted=False, + include_inactive=True, + date_range="last_30_days", +) +``` + +**Why wrong**: Refactoring introduces risk. Delinting should be mechanical and safe. Save refactoring for separate PRs. + +### Fixing Everything at Once + +```bash +# ❌ WRONG: Fix all 825 warnings in one commit +ruff check . --fix +git add . +git commit -m "Fix all lint warnings" +``` + +**Why wrong**: +- Impossible to review +- High risk of breaking something +- Hard to revert if needed +- No tracking of progress + +**✅ CORRECT**: Fix one rule type at a time, commit, repeat. + +### Ignoring Tests in Linting + +```toml +# ❌ WRONG: Excluding all tests +[tool.ruff.lint] +exclude = ["tests/"] +``` + +**Why wrong**: Tests need linting too! They're code, not special. + +**✅ CORRECT**: Different standards for tests, not no standards: +```toml +[tool.ruff.lint.per-file-ignores] +"tests/**/*.py" = [ + "S101", # Allow assert + "ARG001", # Allow unused fixtures + "PLR2004", # Allow magic values in tests +] +``` + +### Batch Fixing Without Testing + +```bash +# ❌ WRONG: Fix multiple rules without testing +ruff check --select F401,F841,I001,W291 --fix +git commit -m "Fix lint" # No testing! +``` + +**Why wrong**: Auto-fixes can break code (rare but possible). Always test. + +**✅ CORRECT**: +```bash +ruff check --select F401 --fix +pytest # Test! +git commit -m "fix: Remove unused imports (F401)" + +ruff check --select I001 --fix +pytest # Test again! +git commit -m "fix: Sort imports (I001)" +``` + + +## Decision Trees + +### Should I Fix This Warning? + +``` +Is it a real bug? (unused variable, undefined name) +└─ YES → Fix immediately +└─ NO ↓ + +Is it auto-fixable? +└─ YES → Run `ruff --fix`, test, commit +└─ NO ↓ + +Is it mechanical? (naming, line-length) +└─ YES → Fix systematically, file-by-file +└─ NO ↓ + +Does it require refactoring? (too-complex, too-many-args) +└─ YES → Create separate ticket, defer +└─ NO ↓ + +Is it a false positive? +└─ YES → Investigate rule, maybe adjust config +└─ NO → Fix case-by-case +``` + +### Rule-by-Rule vs File-by-File? + +``` +Codebase size: +├─ < 5,000 lines → File-by-file works fine +├─ 5,000 - 50,000 lines → Rule-by-rule recommended +└─ > 50,000 lines → Rule-by-rule mandatory + +Auto-fixable ratio: +├─ > 50% auto-fixable → Rule-by-rule (fix all auto-fixable first) +└─ < 50% auto-fixable → File-by-file (mix of auto and manual) + +Team structure: +├─ Modular ownership (each dev owns files) → File-by-file +└─ Shared codebase → Rule-by-rule +``` + +### When to Enable Rule in CI? + +``` +Rule violations in codebase: +├─ 0 violations → Enable immediately +├─ 1-10 violations → Fix first, then enable +├─ 10-50 violations → Fix in 1-2 PRs, then enable +├─ 50-200 violations → Fix incrementally over 1-2 weeks, then enable +└─ > 200 violations → Use ratcheting, don't enable full rule yet +``` + + +## Integration with Other Skills + +**After using this skill:** +- If need to set up linting → See @project-structure-and-tooling +- If refactoring needed → See architecture patterns (separate from delinting) +- If tests failing → See @testing-and-quality + +**Before using this skill:** +- Should have linting configured → Use @project-structure-and-tooling first +- Should have CI setup → Use project tooling docs + +**Related skills:** +- @modern-syntax-and-types for type-related lint rules +- @testing-and-quality for test linting standards +- @debugging-and-profiling for finding issues that linting misses + + +## Quick Reference + +### Essential Commands + +```bash +# Baseline assessment +ruff check . --statistics + +# Fix specific rule +ruff check --select F401 --fix + +# Fix all auto-fixable +ruff check . --fix + +# Check specific file +ruff check path/to/file.py + +# Output formats +ruff check . --output-format=concise # File:line:code +ruff check . --output-format=json # JSON output +ruff check . --output-format=github # GitHub Actions +``` + +### Rule Categories Quick Reference + +| Code | Category | Auto-fix | Priority | +|------|----------|----------|----------| +| F | Pyflakes (imports, names) | Mostly | HIGH | +| E/W | Style (pycodestyle) | Mostly | MEDIUM | +| N | Naming (pep8-naming) | No | MEDIUM | +| I | Import sorting | Yes | HIGH | +| B | Bug patterns (bugbear) | Some | HIGH | +| S | Security (bandit) | No | HIGH | +| PERF | Performance | Mostly | MEDIUM | +| ARG | Unused arguments | No | MEDIUM | +| C901 | Complexity | No | LOW (defer) | +| PLR | Pylint refactor | No | LOW (defer) | + +### Common Fix Patterns + +| Rule | Pattern | Fix | +|------|---------|-----| +| F401 | Unused import | Remove import | +| E501 | Line too long | Break at call/comprehension | +| N806 | Wrong variable name | Rename to snake_case | +| B006 | Mutable default | Use None, create inside | +| B008 | Call in default | Use None, call inside | +| ARG001 | Unused argument | Remove or prefix with _ | +| RET504 | Unnecessary variable | Return directly | +| PERF401 | Manual loop | Use list comprehension | + +### Delinting Checklist + +**Before starting**: +- [ ] Linting configured in pyproject.toml +- [ ] Baseline captured (`ruff check . --statistics > baseline.txt`) +- [ ] Tests passing +- [ ] Git working tree clean + +**For each rule type**: +- [ ] Fix violations (`ruff check --select RULE --fix`) +- [ ] Review changes (`git diff`) +- [ ] Run tests (`pytest`) +- [ ] Commit (`git commit -m "fix: [Delinting] RULE - description"`) +- [ ] Update progress tracking + +**After delinting**: +- [ ] All targeted rules fixed +- [ ] Tests passing +- [ ] CI passing +- [ ] Documentation updated +- [ ] Team notified of new standards diff --git a/skills/using-python-engineering/testing-and-quality.md b/skills/using-python-engineering/testing-and-quality.md new file mode 100644 index 0000000..ffd69ce --- /dev/null +++ b/skills/using-python-engineering/testing-and-quality.md @@ -0,0 +1,1848 @@ + +# Testing and Quality + +## Overview + +**Core Principle:** Test behavior, not implementation. Tests are executable documentation that ensure code works as expected and continues to work as it evolves. + +Modern Python testing centers on pytest: simple syntax, powerful fixtures, comprehensive plugins. Good tests enable confident refactoring, catch regressions early, and document expected behavior. Bad tests are brittle, slow, and create maintenance burden without providing value. + +## When to Use + +**Use this skill when:** +- "Tests are failing" +- "How to write pytest tests?" +- "Fixture scope issues" +- "Mock not working" +- "Flaky tests" +- "Improve test coverage" +- "Tests too slow" +- "How to test X?" + +**Don't use when:** +- Setting up testing infrastructure (use project-structure-and-tooling first) +- Debugging production code (use debugging-and-profiling) +- Performance optimization (use debugging-and-profiling to profile first) + +**Symptoms triggering this skill:** +- pytest errors or failures +- Need to add tests to existing code +- Tests passing locally but failing in CI +- Coverage gaps identified +- Difficulty testing complex scenarios + + +## pytest Fundamentals + +### Basic Test Structure + +```python +# ❌ WRONG: Using unittest (verbose, requires class) +import unittest + +class TestCalculator(unittest.TestCase): + def test_addition(self): + self.assertEqual(add(2, 3), 5) + + def test_subtraction(self): + self.assertEqual(subtract(5, 3), 2) + +if __name__ == '__main__': + unittest.main() + +# ✅ CORRECT: Using pytest (simple, clear) +def test_addition(): + assert add(2, 3) == 5 + +def test_subtraction(): + assert subtract(5, 3) == 2 + +# Why this matters: pytest uses plain assert, no class needed, cleaner syntax +``` + +### Test Discovery + +```python +# pytest discovers tests automatically using these conventions: + +# ✅ Test file naming +# test_*.py or *_test.py +test_calculator.py # ✓ +calculator_test.py # ✓ +tests.py # ✗ Won't be discovered + +# ✅ Test function naming +def test_addition(): # ✓ Discovered + pass + +def addition_test(): # ✗ Not discovered (must start with test_) + pass + +def testAddition(): # ✗ Not discovered (use snake_case) + pass + +# ✅ Test class naming (optional) +class TestCalculator: # Must start with Test + def test_add(self): # Method must start with test_ + pass +``` + +### Assertions and Error Messages + +```python +# ❌ WRONG: No context for failure +def test_user_creation(): + user = create_user("alice", "alice@example.com") + assert user.name == "alice" + assert user.email == "alice@example.com" + +# ✅ CORRECT: Descriptive assertions +def test_user_creation(): + user = create_user("alice", "alice@example.com") + + # pytest shows actual vs expected on failure + assert user.name == "alice", f"Expected name 'alice', got '{user.name}'" + assert user.email == "alice@example.com" + assert user.active is True # Boolean assertions are clear + +# ✅ CORRECT: Using pytest helpers for better errors +import pytest + +def test_exception_raised(): + with pytest.raises(ValueError, match="Invalid email"): + create_user("alice", "not-an-email") + +def test_approximate_equality(): + # For floats, use approx + result = calculate_pi() + assert result == pytest.approx(3.14159, rel=1e-5) + +# ✅ CORRECT: Testing multiple conditions +def test_user_validation(): + with pytest.raises(ValueError) as exc_info: + create_user("", "alice@example.com") + + assert "name cannot be empty" in str(exc_info.value) +``` + +**Why this matters:** Clear assertions make test failures immediately understandable. pytest's introspection shows actual values without manual formatting. + +### Test Organization + +```python +# ✅ CORRECT: Group related tests in classes +class TestUserCreation: + """Tests for user creation logic.""" + + def test_valid_user(self): + user = create_user("alice", "alice@example.com") + assert user.name == "alice" + + def test_invalid_email(self): + with pytest.raises(ValueError): + create_user("alice", "invalid") + + def test_empty_name(self): + with pytest.raises(ValueError): + create_user("", "alice@example.com") + +class TestUserUpdate: + """Tests for user update logic.""" + + def test_update_email(self): + user = create_user("alice", "old@example.com") + user.update_email("new@example.com") + assert user.email == "new@example.com" + +# ✅ Directory structure +tests/ +├── __init__.py +├── conftest.py # Shared fixtures +├── test_users.py # User-related tests +├── test_auth.py # Auth-related tests +└── integration/ + ├── __init__.py + └── test_api.py # Integration tests +``` + + +## Fixtures + +### Basic Fixtures + +```python +import pytest + +# ❌ WRONG: Repeating setup in each test +def test_user_creation(): + db = Database("test.db") + db.connect() + user = create_user(db, "alice", "alice@example.com") + assert user.name == "alice" + db.disconnect() + +def test_user_deletion(): + db = Database("test.db") + db.connect() + user = create_user(db, "alice", "alice@example.com") + delete_user(db, user.id) + db.disconnect() + +# ✅ CORRECT: Use fixture for shared setup +@pytest.fixture +def db(): + """Provide a test database connection.""" + database = Database("test.db") + database.connect() + yield database # Test runs here + database.disconnect() # Cleanup + +def test_user_creation(db): + user = create_user(db, "alice", "alice@example.com") + assert user.name == "alice" + +def test_user_deletion(db): + user = create_user(db, "alice", "alice@example.com") + delete_user(db, user.id) + assert not db.get_user(user.id) +``` + +**Why this matters:** Fixtures reduce duplication, ensure cleanup happens, and make test intent clear. + +### Fixture Scopes + +```python +# ❌ WRONG: Function scope for expensive setup (slow tests) +@pytest.fixture # Default scope="function" - runs for each test +def expensive_resource(): + resource = ExpensiveResource() # Takes 5 seconds to initialize + resource.initialize() + yield resource + resource.cleanup() + +# 100 tests × 5 seconds = 500 seconds just for setup! + +# ✅ CORRECT: Appropriate scope for resource lifecycle +@pytest.fixture(scope="session") # Once per test session +def expensive_resource(): + """Expensive resource initialized once for all tests.""" + resource = ExpensiveResource() + resource.initialize() + yield resource + resource.cleanup() + +@pytest.fixture(scope="module") # Once per test module +def database(): + """Database connection shared across test module.""" + db = Database("test.db") + db.connect() + yield db + db.disconnect() + +@pytest.fixture(scope="class") # Once per test class +def api_client(): + """API client for test class.""" + client = APIClient() + yield client + client.close() + +@pytest.fixture(scope="function") # Once per test (default) +def user(): + """Fresh user for each test.""" + return create_user("test", "test@example.com") +``` + +**Scope Guidelines:** +- `function` (default): Fresh state for each test, slow but safe +- `class`: Share across test class, balance speed and isolation +- `module`: Share across test file, faster but less isolation +- `session`: Share across entire test run, fastest but needs careful cleanup + +**Critical Rule:** Higher scopes must reset state between tests or be read-only! + +### Fixture Factories + +```python +# ❌ WRONG: Creating fixtures for every variation +@pytest.fixture +def user_alice(): + return create_user("alice", "alice@example.com") + +@pytest.fixture +def user_bob(): + return create_user("bob", "bob@example.com") + +@pytest.fixture +def admin_user(): + return create_user("admin", "admin@example.com", is_admin=True) + +# ✅ CORRECT: Use fixture factory pattern +@pytest.fixture +def user_factory(): + """Factory for creating test users.""" + created_users = [] + + def _create_user(name: str, email: str | None = None, **kwargs): + if email is None: + email = f"{name}@example.com" + user = create_user(name, email, **kwargs) + created_users.append(user) + return user + + yield _create_user + + # Cleanup all created users + for user in created_users: + delete_user(user.id) + +# Usage +def test_user_permissions(user_factory): + alice = user_factory("alice") + bob = user_factory("bob") + admin = user_factory("admin", is_admin=True) + + assert not alice.is_admin + assert admin.is_admin +``` + +**Why this matters:** Factories provide flexibility without fixture explosion. Automatic cleanup tracks all created resources. + +### Fixture Composition + +```python +# ✅ CORRECT: Compose fixtures to build complex setups +@pytest.fixture +def database(): + db = Database("test.db") + db.connect() + yield db + db.disconnect() + +@pytest.fixture +def user(database): # Uses database fixture + user = create_user(database, "alice", "alice@example.com") + yield user + delete_user(database, user.id) + +@pytest.fixture +def authenticated_client(user): # Uses user fixture (which uses database) + client = APIClient() + client.authenticate(user.id) + yield client + client.close() + +# Test uses only the highest-level fixture it needs +def test_api_call(authenticated_client): + response = authenticated_client.get("/profile") + assert response.status_code == 200 +``` + +**Why this matters:** Composition creates clear dependency chains. Tests request only what they need, fixtures handle the rest. + +### conftest.py + +```python +# File: tests/conftest.py +# Fixtures defined here are available to all tests + +import pytest + +@pytest.fixture(scope="session") +def database(): + """Session-scoped database for all tests.""" + db = Database("test.db") + db.connect() + db.migrate() + yield db + db.disconnect() + +@pytest.fixture +def clean_database(database): + """Reset database state before each test.""" + yield database + database.truncate_all_tables() + +# File: tests/integration/conftest.py +# Fixtures here available only to integration tests + +@pytest.fixture +def api_server(): + """Start API server for integration tests.""" + server = TestServer() + server.start() + yield server + server.stop() +``` + +**conftest.py locations:** +- `tests/conftest.py`: Available to all tests +- `tests/integration/conftest.py`: Available only to tests in integration/ +- Fixtures can reference fixtures from parent conftest.py files + + +## Parametrization + +### Basic Parametrization + +```python +# ❌ WRONG: Repeating tests for different inputs +def test_addition_positive(): + assert add(2, 3) == 5 + +def test_addition_negative(): + assert add(-2, -3) == -5 + +def test_addition_zero(): + assert add(0, 0) == 0 + +def test_addition_mixed(): + assert add(-2, 3) == 1 + +# ✅ CORRECT: Parametrize test +import pytest + +@pytest.mark.parametrize("a,b,expected", [ + (2, 3, 5), + (-2, -3, -5), + (0, 0, 0), + (-2, 3, 1), +]) +def test_addition(a, b, expected): + assert add(a, b) == expected + +# pytest output shows each case: +# test_addition[2-3-5] PASSED +# test_addition[-2--3--5] PASSED +# test_addition[0-0-0] PASSED +# test_addition[-2-3-1] PASSED +``` + +### Parametrize with IDs + +```python +# ✅ CORRECT: Add readable test IDs +@pytest.mark.parametrize("a,b,expected", [ + pytest.param(2, 3, 5, id="positive"), + pytest.param(-2, -3, -5, id="negative"), + pytest.param(0, 0, 0, id="zero"), + pytest.param(-2, 3, 1, id="mixed"), +]) +def test_addition(a, b, expected): + assert add(a, b) == expected + +# Output: +# test_addition[positive] PASSED +# test_addition[negative] PASSED +# test_addition[zero] PASSED +# test_addition[mixed] PASSED +``` + +**Why this matters:** Readable test IDs make failures immediately understandable. Instead of "test_addition[2-3-5]", you see "test_addition[positive]". + +### Multiple Parametrize + +```python +# ✅ CORRECT: Multiple parametrize creates cartesian product +@pytest.mark.parametrize("operation", [add, subtract, multiply]) +@pytest.mark.parametrize("a,b", [(2, 3), (-2, 3), (0, 0)]) +def test_operations(operation, a, b): + result = operation(a, b) + assert isinstance(result, (int, float)) + +# Creates 3 × 3 = 9 test combinations +``` + +### Parametrize Fixtures + +```python +# ✅ CORRECT: Parametrize fixtures for different configurations +@pytest.fixture(params=["sqlite", "postgres", "mysql"]) +def database(request): + """Test against multiple database backends.""" + db_type = request.param + + if db_type == "sqlite": + db = SQLiteDatabase("test.db") + elif db_type == "postgres": + db = PostgresDatabase("test") + elif db_type == "mysql": + db = MySQLDatabase("test") + + db.connect() + yield db + db.disconnect() + +# All tests using this fixture run against all database types +def test_user_creation(database): + user = create_user(database, "alice", "alice@example.com") + assert user.name == "alice" + +# Runs 3 times: with sqlite, postgres, mysql +``` + +**Why this matters:** Fixture parametrization tests against multiple implementations/configurations without changing test code. + + +## Mocking and Patching + +### When to Mock + +```python +# ❌ WRONG: Mocking business logic (test implementation, not behavior) +def get_user_score(user_id: int) -> int: + user = get_user(user_id) + score = calculate_score(user.actions) + return score + +# Bad test - mocking internal implementation +def test_get_user_score(mocker): + mocker.patch("module.get_user") + mocker.patch("module.calculate_score", return_value=100) + + result = get_user_score(1) + assert result == 100 # Testing mock, not real logic! + +# ✅ CORRECT: Mock external dependencies only +import httpx + +def fetch_user_data(user_id: int) -> dict: + """Fetch user from external API.""" + response = httpx.get(f"https://api.example.com/users/{user_id}") + return response.json() + +# Good test - mocking external API +def test_fetch_user_data(mocker): + mock_response = mocker.Mock() + mock_response.json.return_value = {"id": 1, "name": "alice"} + + mocker.patch("httpx.get", return_value=mock_response) + + result = fetch_user_data(1) + assert result == {"id": 1, "name": "alice"} +``` + +**When to mock:** +- External APIs/services +- Database calls (sometimes - prefer test database) +- File system operations +- Time/date (freezing time for tests) +- Random number generation + +**When NOT to mock:** +- Business logic +- Internal functions +- Simple calculations +- Data transformations + +### pytest-mock Basics + +```python +# Install: pip install pytest-mock + +import pytest + +# ✅ CORRECT: Using mocker fixture +def test_api_call(mocker): + # Mock external HTTP call + mock_get = mocker.patch("requests.get") + mock_get.return_value.json.return_value = {"status": "ok"} + mock_get.return_value.status_code = 200 + + result = fetch_data("https://api.example.com/data") + + # Verify mock was called correctly + mock_get.assert_called_once_with("https://api.example.com/data") + assert result == {"status": "ok"} + +# ✅ CORRECT: Mock return value +def test_database_query(mocker): + mock_db = mocker.patch("module.database") + mock_db.query.return_value = [{"id": 1, "name": "alice"}] + + users = get_all_users() + + assert len(users) == 1 + assert users[0]["name"] == "alice" + +# ✅ CORRECT: Mock side effect (different return per call) +def test_retry_logic(mocker): + mock_api = mocker.patch("module.api_call") + mock_api.side_effect = [ + Exception("Network error"), + Exception("Timeout"), + {"status": "ok"} # Succeeds on third try + ] + + result = retry_api_call() + + assert result == {"status": "ok"} + assert mock_api.call_count == 3 + +# ✅ CORRECT: Mock exception +def test_error_handling(mocker): + mock_api = mocker.patch("module.api_call") + mock_api.side_effect = ConnectionError("Network down") + + with pytest.raises(ConnectionError): + fetch_data() +``` + +### Patching Strategies + +```python +# ✅ CORRECT: Patch where it's used, not where it's defined +# File: module.py +from datetime import datetime + +def create_timestamp(): + return datetime.now() + +# ❌ WRONG: Patching in datetime module +def test_timestamp_wrong(mocker): + mocker.patch("datetime.datetime.now") # Doesn't work! + # ... + +# ✅ CORRECT: Patch in module where it's imported +def test_timestamp_correct(mocker): + fixed_time = datetime(2025, 1, 1, 12, 0, 0) + mocker.patch("module.datetime.now", return_value=fixed_time) + + result = create_timestamp() + assert result == fixed_time + +# ✅ CORRECT: Patch class method +def test_database_method(mocker): + mocker.patch.object(Database, "query", return_value=[]) + + db = Database() + result = db.query("SELECT * FROM users") + assert result == [] + +# ✅ CORRECT: Patch with context manager +def test_temporary_patch(mocker): + with mocker.patch("module.api_call", return_value={"status": "ok"}): + result = fetch_data() + assert result["status"] == "ok" + + # Patch automatically removed after context +``` + +### Mocking Time + +```python +# ✅ CORRECT: Freeze time for deterministic tests +def test_expiration(mocker): + from datetime import datetime, timedelta + + fixed_time = datetime(2025, 1, 1, 12, 0, 0) + mocker.patch("module.datetime.now", return_value=fixed_time) + + # Create session that expires in 1 hour + session = create_session(expires_in=timedelta(hours=1)) + + # Session not expired at creation time + assert not session.is_expired() + + # Advance time by 2 hours + future_time = fixed_time + timedelta(hours=2) + mocker.patch("module.datetime.now", return_value=future_time) + + # Session now expired + assert session.is_expired() + +# ✅ BETTER: Use freezegun library (pip install freezegun) +from freezegun import freeze_time + +@freeze_time("2025-01-01 12:00:00") +def test_expiration_freezegun(): + session = create_session(expires_in=timedelta(hours=1)) + assert not session.is_expired() + + # Move time forward + with freeze_time("2025-01-01 14:00:00"): + assert session.is_expired() +``` + +### Mocking Anti-Patterns + +```python +# ❌ WRONG: Mock every dependency (brittle test) +def test_process_user_data_wrong(mocker): + mocker.patch("module.validate_user") + mocker.patch("module.transform_data") + mocker.patch("module.calculate_score") + mocker.patch("module.save_result") + + process_user_data({"id": 1}) + # Test proves nothing - all logic is mocked! + +# ✅ CORRECT: Test real logic, mock only external dependencies +def test_process_user_data_correct(mocker): + # Mock only external dependency + mock_save = mocker.patch("module.save_to_database") + + # Test real validation, transformation, calculation + result = process_user_data({"id": 1, "name": "alice"}) + + # Verify real logic ran correctly + assert result["score"] > 0 + mock_save.assert_called_once() + +# ❌ WRONG: Asserting internal implementation details +def test_implementation_details(mocker): + spy = mocker.spy(module, "internal_helper") + + process_data([1, 2, 3]) + + # Brittle - breaks if refactored + assert spy.call_count == 3 + spy.assert_called_with(3) + +# ✅ CORRECT: Assert behavior, not implementation +def test_behavior(mocker): + result = process_data([1, 2, 3]) + + # Test output, not how it was calculated + assert result == [2, 4, 6] + +# ❌ WRONG: Over-specifying mock expectations +def test_over_specified(mocker): + mock_api = mocker.patch("module.api_call") + mock_api.return_value = {"status": "ok"} + + result = fetch_data() + + # Too specific - breaks if parameter order changes + mock_api.assert_called_once_with( + url="https://api.example.com", + method="GET", + headers={"User-Agent": "Test"}, + timeout=30, + retry=3 + ) + +# ✅ CORRECT: Assert only important arguments +def test_appropriate_assertions(mocker): + mock_api = mocker.patch("module.api_call") + mock_api.return_value = {"status": "ok"} + + result = fetch_data() + + # Assert only critical behavior + assert mock_api.called + assert "https://api.example.com" in str(mock_api.call_args) +``` + + +## Coverage + +### pytest-cov Setup + +```bash +# Install +pip install pytest-cov + +# Run with coverage +pytest --cov=mypackage --cov-report=term-missing + +# Generate HTML report +pytest --cov=mypackage --cov-report=html + +# Coverage with branch coverage (recommended) +pytest --cov=mypackage --cov-branch --cov-report=term-missing +``` + +### Configuration + +```toml +# File: pyproject.toml + +[tool.pytest.ini_options] +addopts = [ + "--cov=mypackage", + "--cov-branch", + "--cov-report=term-missing:skip-covered", + "--cov-report=html", + "--cov-fail-under=80", +] + +[tool.coverage.run] +source = ["mypackage"] +branch = true +omit = [ + "*/tests/*", + "*/test_*.py", + "*/__init__.py", +] + +[tool.coverage.report] +precision = 2 +show_missing = true +skip_covered = false +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "raise AssertionError", + "raise NotImplementedError", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", + "@abstractmethod", +] +``` + +### Coverage Targets + +```python +# ❌ WRONG: Chasing 100% coverage +# File: utils.py +def format_user(user: dict) -> str: + if user.get("middle_name"): # Rare edge case + return f"{user['first_name']} {user['middle_name']} {user['last_name']}" + return f"{user['first_name']} {user['last_name']}" + +def __repr__(self): # Debug helper + return f"User({self.name})" + +# Writing tests just for coverage: +def test_format_user_with_middle_name(): # Low-value test + result = format_user({"first_name": "A", "middle_name": "B", "last_name": "C"}) + assert result == "A B C" + +# ✅ CORRECT: Pragmatic coverage with exclusions +# File: utils.py +def format_user(user: dict) -> str: + if user.get("middle_name"): + return f"{user['first_name']} {user['middle_name']} {user['last_name']}" + return f"{user['first_name']} {user['last_name']}" + +def __repr__(self): # pragma: no cover + return f"User({self.name})" + +# Test main path, exclude rare edge cases +def test_format_user(): + result = format_user({"first_name": "Alice", "last_name": "Smith"}) + assert result == "Alice Smith" +``` + +**Coverage Guidelines:** +- **80% overall coverage:** Good target for most projects +- **100% for critical paths:** Payment, auth, security logic +- **Exclude boilerplate:** `__repr__`, type checking, debug code +- **Branch coverage:** More valuable than line coverage +- **Don't game metrics:** Tests should verify behavior, not boost numbers + +### Branch Coverage + +```python +# Line coverage: 100%, but missing edge case! +def process_payment(amount: float, currency: str) -> bool: + if currency == "USD": # Line covered + return charge_usd(amount) # Line covered + return charge_other(amount, currency) # Line not covered! + +def test_process_payment(): + result = process_payment(100.0, "USD") + assert result is True +# Line coverage: 3/3 = 100% ✓ +# Branch coverage: 1/2 = 50% ✗ + +# ✅ CORRECT: Test both branches +def test_process_payment_usd(): + result = process_payment(100.0, "USD") + assert result is True + +def test_process_payment_other(): + result = process_payment(100.0, "EUR") + assert result is True +# Line coverage: 3/3 = 100% ✓ +# Branch coverage: 2/2 = 100% ✓ +``` + +**Why this matters:** Branch coverage catches untested code paths. Line coverage can show 100% while missing edge cases. + + +## Property-Based Testing + +### Hypothesis Basics + +```python +# Install: pip install hypothesis + +from hypothesis import given, strategies as st + +# ❌ WRONG: Only testing specific examples +def test_reverse_twice(): + assert reverse(reverse([1, 2, 3])) == [1, 2, 3] + assert reverse(reverse([])) == [] + assert reverse(reverse([1])) == [1] + +# ✅ CORRECT: Property-based test +from hypothesis import given +from hypothesis import strategies as st + +@given(st.lists(st.integers())) +def test_reverse_twice_property(lst): + """Reversing a list twice returns the original list.""" + assert reverse(reverse(lst)) == lst +# Hypothesis generates hundreds of test cases automatically + +# ✅ CORRECT: Test mathematical properties +@given(st.integers(), st.integers()) +def test_addition_commutative(a, b): + """Addition is commutative: a + b == b + a""" + assert add(a, b) == add(b, a) + +@given(st.integers()) +def test_addition_identity(a): + """Adding zero is identity: a + 0 == a""" + assert add(a, 0) == a + +@given(st.lists(st.integers())) +def test_sort_idempotent(lst): + """Sorting twice gives same result as sorting once.""" + assert sorted(sorted(lst)) == sorted(lst) +``` + +### Hypothesis Strategies + +```python +from hypothesis import given, strategies as st + +# ✅ Basic strategies +@given(st.integers()) # Any integer +def test_abs_positive(n): + assert abs(n) >= 0 + +@given(st.integers(min_value=0, max_value=100)) # Bounded integers +def test_percentage(n): + assert 0 <= n <= 100 + +@given(st.floats(allow_nan=False, allow_infinity=False)) +def test_float_calculation(x): + result = calculate(x) + assert not math.isnan(result) + +@given(st.text()) # Any unicode string +def test_encode_decode(s): + assert decode(encode(s)) == s + +@given(st.text(alphabet=st.characters(whitelist_categories=("Lu", "Ll")))) +def test_letters_only(s): # Only upper/lowercase letters + assert s.isalpha() or len(s) == 0 + +# ✅ Composite strategies +@given(st.lists(st.integers(), min_size=1, max_size=10)) +def test_list_operations(lst): + assert len(lst) >= 1 + assert len(lst) <= 10 + +@given(st.dictionaries(keys=st.text(), values=st.integers())) +def test_dict_operations(d): + serialized = json.dumps(d) + assert json.loads(serialized) == d + +# ✅ Custom strategies +from hypothesis import composite + +@composite +def users(draw): + """Generate test user dictionaries.""" + return { + "name": draw(st.text(min_size=1, max_size=50)), + "age": draw(st.integers(min_value=0, max_value=120)), + "email": draw(st.emails()), + } + +@given(users()) +def test_user_validation(user): + validate_user(user) # Should not raise +``` + +### When to Use Property-Based Testing + +```python +# ✅ Good use cases: + +# 1. Round-trip properties (encode/decode, serialize/deserialize) +@given(st.dictionaries(st.text(), st.integers())) +def test_json_round_trip(data): + assert json.loads(json.dumps(data)) == data + +# 2. Invariants (properties that always hold) +@given(st.lists(st.integers())) +def test_sorted_is_ordered(lst): + sorted_lst = sorted(lst) + for i in range(len(sorted_lst) - 1): + assert sorted_lst[i] <= sorted_lst[i + 1] + +# 3. Comparison with reference implementation +@given(st.lists(st.integers())) +def test_custom_sort_matches_builtin(lst): + assert custom_sort(lst) == sorted(lst) + +# 4. Finding edge cases +@given(st.text()) +def test_parse_never_crashes(text): + # Should handle any input without crashing + result = parse(text) + assert isinstance(result, (dict, None)) + +# ❌ Don't use for: +# - Testing exact output (use example-based tests) +# - Complex business logic (hard to express as properties) +# - External API calls (use mocking with examples) +``` + +**Why this matters:** Property-based tests find edge cases humans miss. Hypothesis generates thousands of test cases, including corner cases like empty lists, negative numbers, unicode edge cases. + + +## Test Architecture + +### Test Pyramid + +``` + /\ + / \ E2E (few) + /----\ + / \ Integration (some) + /--------\ + / \ Unit (many) + /------------\ +``` + +**Unit Tests (70-80%):** +- Test individual functions/classes in isolation +- Fast (milliseconds) +- No external dependencies +- Use mocks for dependencies + +**Integration Tests (15-25%):** +- Test components working together +- Slower (seconds) +- Real database/services when possible +- Test critical paths + +**E2E Tests (5-10%):** +- Test entire system +- Slowest (minutes) +- Full stack: UI → API → Database +- Test critical user journeys only + +### Unit vs Integration vs E2E + +```python +# Unit test: Test function in isolation +def test_calculate_discount_unit(): + price = 100.0 + discount_percent = 20 + + result = calculate_discount(price, discount_percent) + + assert result == 80.0 + +# Integration test: Test components together +def test_apply_discount_integration(database): + # Uses real database + product = database.create_product(name="Widget", price=100.0) + coupon = database.create_coupon(code="SAVE20", discount_percent=20) + + result = apply_discount_to_product(product.id, coupon.code) + + assert result.final_price == 80.0 + assert database.get_product(product.id).price == 100.0 # Original unchanged + +# E2E test: Test through API +def test_checkout_with_discount_e2e(api_client, database): + # Setup test data + api_client.post("/products", json={"name": "Widget", "price": 100.0}) + api_client.post("/coupons", json={"code": "SAVE20", "discount": 20}) + + # User journey + api_client.post("/cart/add", json={"product_id": 1, "quantity": 1}) + api_client.post("/cart/apply-coupon", json={"code": "SAVE20"}) + response = api_client.post("/checkout") + + assert response.status_code == 200 + assert response.json()["total"] == 80.0 +``` + +### Test Organization Strategies + +```python +# Strategy 1: Mirror source structure +mypackage/ + users.py + auth.py + payments.py +tests/ + test_users.py + test_auth.py + test_payments.py + +# Strategy 2: Separate by test type +tests/ + unit/ + test_users.py + test_auth.py + integration/ + test_user_auth_flow.py + test_payment_flow.py + e2e/ + test_checkout.py + +# Strategy 3: Feature-based (for larger projects) +tests/ + users/ + test_registration.py + test_authentication.py + test_profile.py + payments/ + test_checkout.py + test_refunds.py +``` + +**Recommendation:** Start with Strategy 1 (mirror structure). Move to Strategy 2 when you have many integration/E2E tests. Use Strategy 3 for large projects with complex features. + + +## Flaky Tests + +### Identifying Flaky Tests + +```bash +# Run tests multiple times to identify flakiness +pytest --count=100 # Requires pytest-repeat + +# Run tests in random order +pytest --random-order # Requires pytest-randomly + +# Run tests in parallel (exposes race conditions) +pytest -n 4 # Requires pytest-xdist +``` + +### Common Causes and Fixes + +#### 1. Test Order Dependencies + +```python +# ❌ WRONG: Test depends on state from previous test +class TestUser: + user = None + + def test_create_user(self): + self.user = create_user("alice") + assert self.user.name == "alice" + + def test_update_user(self): + # Fails if run before test_create_user! + self.user.name = "bob" + assert self.user.name == "bob" + +# ✅ CORRECT: Each test is independent +class TestUser: + @pytest.fixture + def user(self): + return create_user("alice") + + def test_create_user(self): + user = create_user("alice") + assert user.name == "alice" + + def test_update_user(self, user): + user.name = "bob" + assert user.name == "bob" +``` + +#### 2. Time-Dependent Tests + +```python +# ❌ WRONG: Test depends on current time +def test_expiration_wrong(): + from datetime import datetime, timedelta + + session = create_session(expires_in=timedelta(seconds=1)) + time.sleep(1) # Flaky - might not be exactly 1 second + + assert session.is_expired() + +# ✅ CORRECT: Mock time for deterministic tests +def test_expiration_correct(mocker): + from datetime import datetime, timedelta + + start_time = datetime(2025, 1, 1, 12, 0, 0) + mocker.patch("module.datetime.now", return_value=start_time) + + session = create_session(expires_in=timedelta(hours=1)) + assert not session.is_expired() + + # Advance time + future_time = start_time + timedelta(hours=2) + mocker.patch("module.datetime.now", return_value=future_time) + + assert session.is_expired() +``` + +#### 3. Async/Concurrency Issues + +```python +# ❌ WRONG: Race condition with async code +async def test_concurrent_updates_wrong(): + counter = Counter(value=0) + + # These run concurrently, order undefined + await asyncio.gather( + counter.increment(), + counter.increment(), + ) + + # Flaky - might be 1 or 2 depending on timing + assert counter.value == 2 + +# ✅ CORRECT: Test with proper synchronization +async def test_concurrent_updates_correct(): + counter = ThreadSafeCounter(value=0) + + await asyncio.gather( + counter.increment(), + counter.increment(), + ) + + assert counter.value == 2 # ThreadSafeCounter ensures correctness + +# ✅ CORRECT: Test for race conditions explicitly +async def test_detects_race_condition(): + unsafe_counter = Counter(value=0) + + # Run many times to trigger race condition + for _ in range(100): + await asyncio.gather( + unsafe_counter.increment(), + unsafe_counter.increment(), + ) + + # This should fail, proving there's a race condition + # (Or pass if the code is actually thread-safe) +``` + +#### 4. External Dependencies + +```python +# ❌ WRONG: Test depends on external service +def test_fetch_user_data_wrong(): + # Flaky - network issues, rate limits, service downtime + response = requests.get("https://api.example.com/users/1") + assert response.status_code == 200 + +# ✅ CORRECT: Mock external service +def test_fetch_user_data_correct(mocker): + mock_response = mocker.Mock() + mock_response.status_code = 200 + mock_response.json.return_value = {"id": 1, "name": "alice"} + + mocker.patch("requests.get", return_value=mock_response) + + response = fetch_user_data(1) + assert response["name"] == "alice" +``` + +#### 5. Resource Leaks + +```python +# ❌ WRONG: Not cleaning up resources +def test_file_operations_wrong(): + f = open("test.txt", "w") + f.write("test") + # File not closed - subsequent tests might fail + + assert os.path.exists("test.txt") + +# ✅ CORRECT: Always cleanup +def test_file_operations_correct(tmp_path): + test_file = tmp_path / "test.txt" + + with test_file.open("w") as f: + f.write("test") + + assert test_file.exists() + # File automatically closed, tmp_path automatically cleaned up + +# ✅ CORRECT: Use fixtures for cleanup +@pytest.fixture +def test_file(tmp_path): + file_path = tmp_path / "test.txt" + yield file_path + # Cleanup happens automatically via tmp_path +``` + +#### 6. Non-Deterministic Data + +```python +# ❌ WRONG: Random or time-based data +def test_user_id_generation_wrong(): + user = create_user("alice") + # Flaky - ID might be random or timestamp-based + assert user.id == 1 + +# ✅ CORRECT: Mock or control randomness +def test_user_id_generation_correct(mocker): + mocker.patch("module.generate_id", return_value="fixed-id-123") + + user = create_user("alice") + assert user.id == "fixed-id-123" + +# ✅ CORRECT: Use fixtures with deterministic data +@pytest.fixture +def fixed_random(): + import random + random.seed(42) + yield random + # Reset seed if needed +``` + +### Debugging Flaky Tests + +```python +# ✅ Strategy 1: Add retry decorator to identify flakiness +import pytest + +@pytest.mark.flaky(reruns=3) # Requires pytest-rerunfailures +def test_potentially_flaky(): + # Test that occasionally fails + result = fetch_data() + assert result is not None + +# ✅ Strategy 2: Add logging to understand failures +import logging + +def test_with_logging(caplog): + caplog.set_level(logging.DEBUG) + + result = complex_operation() + + # Logs captured automatically + assert "Expected step completed" in caplog.text + assert result.success + +# ✅ Strategy 3: Use test markers +@pytest.mark.flaky +def test_known_flaky(): + # Mark test as flaky while investigating + ... + +# Skip flaky tests in CI +pytest -m "not flaky" +``` + + +## CI Integration + +### GitHub Actions Example + +```yaml +# File: .github/workflows/test.yml +name: Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11", "3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run tests + run: | + pytest --cov=mypackage --cov-report=xml --cov-report=term-missing + + - name: Upload coverage + uses: codecov/codecov-action@v3 + with: + file: ./coverage.xml + fail_ci_if_error: true +``` + +### Parallel Testing in CI + +```yaml +# Run tests in parallel +- name: Run tests in parallel + run: | + pytest -n auto --dist loadscope + +# Split tests across multiple jobs +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + test-group: [unit, integration, e2e] + + steps: + - name: Run ${{ matrix.test-group }} tests + run: | + pytest tests/${{ matrix.test-group }} +``` + +### Test Configuration for CI + +```toml +# File: pyproject.toml + +[tool.pytest.ini_options] +# CI-friendly settings +addopts = [ + "--strict-markers", # Fail on unknown markers + "--strict-config", # Fail on config errors + "--cov=mypackage", + "--cov-branch", + "--cov-report=term-missing", + "--cov-report=xml", + "--cov-fail-under=80", # Fail if coverage below 80% + "-v", # Verbose output +] + +markers = [ + "slow: marks tests as slow (deselect with '-m \"not slow\"')", + "integration: integration tests", + "e2e: end-to-end tests", + "flaky: known flaky tests", +] + +# Run fast tests first +testpaths = ["tests"] +python_files = ["test_*.py"] +python_classes = ["Test*"] +python_functions = ["test_*"] +``` + +### Environment-Specific Test Behavior + +```python +import os +import pytest + +# ✅ Skip tests in CI that require local resources +@pytest.mark.skipif( + os.getenv("CI") == "true", + reason="Requires local database" +) +def test_local_only(): + ... + +# ✅ Use different fixtures in CI +@pytest.fixture +def database(): + if os.getenv("CI"): + # Use containerized database in CI + return DockerDatabase() + else: + # Use local database in development + return LocalDatabase() + +# ✅ Stricter timeouts in CI +@pytest.mark.timeout(10 if os.getenv("CI") else 30) +def test_with_timeout(): + ... +``` + + +## Advanced Patterns + +### Snapshot Testing + +```python +# Install: pip install syrupy + +def test_api_response_snapshot(snapshot): + """Test API response matches saved snapshot.""" + response = api.get_user(123) + + # First run: saves snapshot + # Future runs: compares against snapshot + assert response == snapshot + +# Update snapshots when intentionally changed: +# pytest --snapshot-update +``` + +### Mutation Testing + +```python +# Install: pip install mutmut + +# Run mutation testing +# mutmut run + +# Mutation testing changes your code and runs tests +# If tests still pass, you have inadequate coverage + +# Example: +def is_even(n: int) -> bool: + return n % 2 == 0 + +# Bad test: +def test_is_even(): + assert is_even(2) is True # Passes even if mutant changes 2 to 0 + +# Good test: +def test_is_even(): + assert is_even(2) is True + assert is_even(3) is False # Would catch mutations + assert is_even(0) is True +``` + +### Test Fixtures as Contract + +```python +# ✅ Pattern: Fixtures define test contracts +@pytest.fixture +def valid_user() -> dict: + """Fixture provides valid user that passes validation.""" + return { + "name": "alice", + "email": "alice@example.com", + "age": 30, + } + +def test_user_validation_accepts_valid(valid_user): + """Valid user fixture must pass validation.""" + validate_user(valid_user) # Should not raise + +def test_user_creation(valid_user): + """Can create user from valid fixture.""" + user = create_user(**valid_user) + assert user.name == "alice" + +# If validation rules change, update fixture once +# All tests using fixture automatically get the update +``` + + +## Decision Trees + +### Which Test Type? + +``` +Unit test if: + - Testing single function/class + - No external dependencies (or can mock them) + - Fast (<10ms) + +Integration test if: + - Testing multiple components + - Real database/services involved + - Moderate speed (<1s) + +E2E test if: + - Testing full user journey + - Multiple systems involved + - Slow (>1s acceptable) +``` + +### When to Mock? + +``` +Mock if: + - External API/service + - Slow operation (network, disk I/O) + - Non-deterministic (time, random) + - Not the focus of the test + +Don't mock if: + - Business logic under test + - Fast pure functions + - Simple data transformations + - Integration test (testing interaction) +``` + +### Fixture Scope? + +``` +function (default): + - Different state per test needed + - Cheap to create (<10ms) + +class: + - Tests in class share setup + - Moderate creation cost + +module: + - All tests in file can share + - Expensive setup (database) + - State reset between tests + +session: + - One-time setup for all tests + - Very expensive (>1s) + - Read-only or stateless +``` + + +## Anti-Patterns + +### Testing Implementation Details + +```python +# ❌ WRONG: Testing private methods +class UserService: + def _validate_email(self, email: str) -> bool: + return "@" in email + + def create_user(self, name: str, email: str) -> User: + if not self._validate_email(email): + raise ValueError("Invalid email") + return User(name, email) + +def test_validate_email_wrong(): + service = UserService() + assert service._validate_email("test@example.com") # Testing private method! + +# ✅ CORRECT: Test public interface +def test_create_user_with_invalid_email(): + service = UserService() + with pytest.raises(ValueError, match="Invalid email"): + service.create_user("alice", "not-an-email") +``` + +### Tautological Tests + +```python +# ❌ WRONG: Test that only proves code runs +def test_get_user(): + user = get_user(1) + assert user == get_user(1) # Proves nothing! + +# ✅ CORRECT: Test expected behavior +def test_get_user(): + user = get_user(1) + assert user.id == 1 + assert user.name is not None + assert isinstance(user.email, str) +``` + +### Fragile Selectors + +```python +# ❌ WRONG: Testing exact string matches (fragile) +def test_error_message(): + with pytest.raises(ValueError) as exc: + validate_user({"name": ""}) + + assert str(exc.value) == "Validation error: name must not be empty" + # Breaks if message wording changes slightly + +# ✅ CORRECT: Test meaningful parts +def test_error_message(): + with pytest.raises(ValueError) as exc: + validate_user({"name": ""}) + + error_msg = str(exc.value).lower() + assert "name" in error_msg + assert "empty" in error_msg or "required" in error_msg +``` + +### Slow Tests + +```python +# ❌ WRONG: Sleeping in tests +def test_async_operation(): + start_operation() + time.sleep(5) # Waiting for operation to complete + assert operation_complete() + +# ✅ CORRECT: Poll with timeout +def test_async_operation(): + start_operation() + + timeout = 5 + start = time.time() + while time.time() - start < timeout: + if operation_complete(): + return + time.sleep(0.1) + + pytest.fail("Operation did not complete within timeout") + +# ✅ BETTER: Use async properly or mock +async def test_async_operation(): + await start_operation() + assert await operation_complete() +``` + + +## Integration with Other Skills + +**After using this skill:** +- If tests are slow → See @debugging-and-profiling for profiling tests +- If setting up CI → See @project-structure-and-tooling for CI configuration +- If testing async code → See @async-patterns-and-concurrency for async testing patterns + +**Before using this skill:** +- Set up pytest → Use @project-structure-and-tooling for pytest configuration in pyproject.toml + + +## Quick Reference + +### Essential pytest Commands + +```bash +# Run all tests +pytest + +# Run specific file +pytest tests/test_users.py + +# Run specific test +pytest tests/test_users.py::test_create_user + +# Run tests matching pattern +pytest -k "user and not admin" + +# Run with coverage +pytest --cov=mypackage --cov-report=term-missing + +# Run in parallel +pytest -n auto + +# Verbose output +pytest -v + +# Stop on first failure +pytest -x + +# Show local variables on failure +pytest -l + +# Run last failed tests +pytest --lf + +# Run failed, then all +pytest --ff +``` + +### pytest Markers + +```python +import pytest + +@pytest.mark.skip(reason="Not implemented yet") +def test_future_feature(): + ... + +@pytest.mark.skipif(sys.version_info < (3, 12), reason="Requires Python 3.12+") +def test_new_syntax(): + ... + +@pytest.mark.xfail(reason="Known bug #123") +def test_buggy_feature(): + ... + +@pytest.mark.parametrize("input,expected", [(1, 2), (2, 3)]) +def test_increment(input, expected): + ... + +@pytest.mark.slow +def test_expensive_operation(): + ... + +# Run: pytest -m "not slow" # Skip slow tests +``` + +### Fixture Cheatsheet + +```python +@pytest.fixture +def simple(): + return "value" + +@pytest.fixture +def with_cleanup(): + resource = setup() + yield resource + cleanup(resource) + +@pytest.fixture(scope="session") +def expensive(): + return expensive_setup() + +@pytest.fixture +def factory(): + items = [] + def _create(**kwargs): + item = create_item(**kwargs) + items.append(item) + return item + yield _create + for item in items: + cleanup(item) + +@pytest.fixture(params=["a", "b", "c"]) +def parametrized(request): + return request.param +``` + +### Coverage Targets + +| Coverage Type | Good Target | Critical Code | Acceptable Minimum | +|---------------|-------------|---------------|-------------------| +| Line Coverage | 80% | 100% | 70% | +| Branch Coverage | 75% | 100% | 65% | +| Function Coverage | 90% | 100% | 80% | + +**Priority order:** +1. Critical paths (auth, payments, security) → 100% +2. Business logic → 80-90% +3. Utility functions → 70-80% +4. Boilerplate → Can exclude + + +## Why This Matters + +**Tests enable:** +- **Confident refactoring:** Change code knowing tests catch regressions +- **Living documentation:** Tests show how code is meant to be used +- **Design feedback:** Hard-to-test code often indicates design problems +- **Faster debugging:** Tests isolate problems to specific components + +**Good tests are:** +- **Fast:** Milliseconds for unit tests, seconds for integration +- **Isolated:** No dependencies between tests +- **Repeatable:** Same result every time +- **Self-checking:** Pass/fail without manual inspection +- **Timely:** Written with or before code (TDD) + +**Test smells:** +- Tests slower than code being tested +- Tests breaking from unrelated changes +- Need to change many tests for one feature change +- Tests that sometimes fail for no reason (flaky) +- Coverage gaps in critical paths + +**Testing is not:** +- Proof of correctness (only proof of presence of bugs tested for) +- Replacement for code review +- Substitute for good design +- Way to catch all bugs + +**Testing is:** +- Safety net for refactoring +- Documentation of expected behavior +- Quick feedback on code quality +- Regression prevention