Initial commit

2025-11-29 18:49:17 +08:00
commit 7e037098e3
5 changed files with 1314 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
+{
+  "name": "engineer-expertise-extractor",
+  "description": "Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise.",
+  "version": "0.0.0-2025.11.28",
+  "author": {
+    "name": "James Rochabrun",
+    "email": "jamesrochabrun@gmail.com"
+  },
+  "skills": [
+    "./skills/engineer-expertise-extractor"
+  ]
+}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+# engineer-expertise-extractor
+
+Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise.
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,48 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:jamesrochabrun/skills:engineer-expertise-extractor",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "e1dc9f46a4e54755eb600ba5700333189314c16e",
+    "treeHash": "139f221c3d9b2cff87f4e1e23a2cfb0ae12c0fe294380a4962cf33ae213b605b",
+    "generatedAt": "2025-11-28T10:17:47.269090Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "engineer-expertise-extractor",
+    "description": "Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise."
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "de3fadb15ae28e93c2efecce5a06f656992b3a90f01b1124972737c2cc93443e"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "04e70b25fc0c5f7fc0c295534f24f47b36672e2e150995772eb361697a1969a2"
+      },
+      {
+        "path": "skills/engineer-expertise-extractor/SKILL.md",
+        "sha256": "1fa0e2fcc60ac60e7b4db7b1e93aba93624ad8fd34ba25601b29f149d384d2ff"
+      },
+      {
+        "path": "skills/engineer-expertise-extractor/scripts/extract_engineer.sh",
+        "sha256": "3fd76ef39495e703e11478102693accc0d8180db767482215dd48f8639b88d52"
+      }
+    ],
+    "dirSha256": "139f221c3d9b2cff87f4e1e23a2cfb0ae12c0fe294380a4962cf33ae213b605b"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
--- a/skills/engineer-expertise-extractor/SKILL.md
+++ b/skills/engineer-expertise-extractor/SKILL.md
@@ -0,0 +1,638 @@
+---
+name: engineer-expertise-extractor
+description: Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise.
+---
+
+# Engineer Expertise Extractor
+
+Extract and document an engineer's coding expertise by analyzing their GitHub contributions, creating a structured knowledge base that captures their coding style, patterns, best practices, and architectural decisions.
+
+## What This Skill Does
+
+Researches an engineer's work to create a "digital mentor" by:
+- **Analyzing Pull Requests** - Extract code patterns, review style, decisions
+- **Extracting Coding Style** - Document their preferences and conventions
+- **Identifying Patterns** - Common solutions and approaches they use
+- **Capturing Best Practices** - Their quality standards and guidelines
+- **Organizing Examples** - Real code samples from their work
+- **Documenting Decisions** - Architectural choices and reasoning
+
+## Why This Matters
+
+**Knowledge Preservation:**
+- Capture expert knowledge before they leave
+- Document tribal knowledge
+- Create mentorship materials
+- Onboard new engineers faster
+
+**Consistency:**
+- Align team coding standards
+- Replicate expert approaches
+- Maintain code quality
+- Scale expertise across team
+
+**Learning:**
+- Learn from senior engineers
+- Understand decision-making
+- See real-world patterns
+- Improve code quality
+
+## How It Works
+
+### 1. Research Phase
+Using GitHub CLI (`gh`), the skill:
+- Fetches engineer's pull requests
+- Analyzes code changes
+- Reviews their comments and feedback
+- Extracts patterns and conventions
+- Identifies their expertise areas
+
+### 2. Analysis Phase
+Categorizes findings into:
+- **Coding Style** - Formatting, naming, structure
+- **Patterns** - Common solutions and approaches
+- **Best Practices** - Quality guidelines
+- **Architecture** - Design decisions
+- **Testing** - Testing approaches
+- **Code Review** - Feedback patterns
+- **Documentation** - Doc style and practices
+
+### 3. Organization Phase
+Creates structured folders:
+```
+engineer_profiles/
+└── [engineer_name]/
+    ├── README.md (overview)
+    ├── coding_style/
+    │   ├── languages/
+    │   ├── naming_conventions.md
+    │   ├── code_structure.md
+    │   └── formatting_preferences.md
+    ├── patterns/
+    │   ├── common_solutions.md
+    │   ├── design_patterns.md
+    │   └── code_examples/
+    ├── best_practices/
+    │   ├── code_quality.md
+    │   ├── testing_approach.md
+    │   ├── performance.md
+    │   └── security.md
+    ├── architecture/
+    │   ├── design_decisions.md
+    │   ├── tech_choices.md
+    │   └── trade_offs.md
+    ├── code_review/
+    │   ├── feedback_style.md
+    │   ├── common_suggestions.md
+    │   └── review_examples.md
+    └── examples/
+        ├── by_language/
+        ├── by_pattern/
+        └── notable_prs/
+```
+
+## Output Structure
+
+### Engineer Profile README
+
+**Contains:**
+- Engineer overview
+- Areas of expertise
+- Languages and technologies
+- Key contributions
+- Coding philosophy
+- How to use this profile
+
+### Coding Style Documentation
+
+**Captures:**
+- Naming conventions (variables, functions, classes)
+- Code structure preferences
+- File organization
+- Comment style
+- Formatting preferences
+- Language-specific idioms
+
+**Example:**
+```markdown
+# Coding Style: [Engineer Name]
+
+## Naming Conventions
+
+### Variables
+- Use descriptive names: `userAuthentication` not `ua`
+- Boolean variables: `isActive`, `hasPermission`, `canEdit`
+- Collections: plural names `users`, `items`, `transactions`
+
+### Functions
+- Verb-first: `getUserById`, `validateInput`, `calculateTotal`
+- Pure functions preferred
+- Single responsibility
+
+### Classes
+- PascalCase: `UserService`, `PaymentProcessor`
+- Interface prefix: `IUserRepository`
+- Concrete implementations: `MongoUserRepository`
+
+## Code Structure
+
+### File Organization
+- One class per file
+- Related functions grouped together
+- Tests alongside implementation
+- Clear separation of concerns
+
+### Function Length
+- Max 20-30 lines preferred
+- Extract helper functions
+- Single level of abstraction
+```
+
+### Patterns Documentation
+
+**Captures:**
+- Recurring solutions
+- Design patterns used
+- Architectural patterns
+- Problem-solving approaches
+
+**Example:**
+```markdown
+# Common Patterns: [Engineer Name]
+
+## Dependency Injection
+
+Used consistently across services:
+
+\`\`\`typescript
+// Pattern: Constructor injection
+class UserService {
+  constructor(
+    private readonly userRepo: IUserRepository,
+    private readonly logger: ILogger
+  ) {}
+}
+\`\`\`
+
+**Why:** Testability, loose coupling, clear dependencies
+
+## Error Handling
+
+Consistent error handling approach:
+
+\`\`\`typescript
+// Pattern: Custom error types + global handler
+class ValidationError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = 'ValidationError';
+  }
+}
+
+// Usage
+if (!isValid(input)) {
+  throw new ValidationError('Invalid input format');
+}
+\`\`\`
+
+**Why:** Type-safe errors, centralized handling, clear debugging
+```
+
+### Best Practices Documentation
+
+**Captures:**
+- Quality standards
+- Testing approaches
+- Performance guidelines
+- Security practices
+- Documentation standards
+
+**Example:**
+```markdown
+# Best Practices: [Engineer Name]
+
+## Testing
+
+### Unit Test Structure
+- AAA pattern (Arrange, Act, Assert)
+- One assertion per test preferred
+- Test names describe behavior
+- Mock external dependencies
+
+\`\`\`typescript
+describe('UserService', () => {
+  describe('createUser', () => {
+    it('should create user with valid data', async () => {
+      // Arrange
+      const userData = { email: 'test@example.com', name: 'Test' };
+      const mockRepo = createMockRepository();
+
+      // Act
+      const result = await userService.createUser(userData);
+
+      // Assert
+      expect(result.id).toBeDefined();
+      expect(result.email).toBe(userData.email);
+    });
+  });
+});
+\`\`\`
+
+### Test Coverage
+- Aim for 80%+ coverage
+- 100% coverage for critical paths
+- Integration tests for APIs
+- E2E tests for user flows
+
+## Code Review Standards
+
+### What to Check
+- [ ] Tests included and passing
+- [ ] No console.logs remaining
+- [ ] Error handling present
+- [ ] Comments explain "why" not "what"
+- [ ] No hardcoded values
+- [ ] Security considerations addressed
+```
+
+### Architecture Documentation
+
+**Captures:**
+- Design decisions
+- Technology choices
+- Trade-offs made
+- System design approaches
+
+**Example:**
+```markdown
+# Architectural Decisions: [Engineer Name]
+
+## Decision: Microservices vs Monolith
+
+**Context:** Scaling user service
+**Decision:** Start monolith, extract services when needed
+**Reasoning:**
+- Team size: 5 engineers
+- Product stage: MVP
+- Premature optimization risk
+- Easier debugging and deployment
+
+**Trade-offs:**
+- Monolith pros: Simpler, faster development
+- Monolith cons: Harder to scale later
+- Decision: Optimize for current needs, refactor when hitting limits
+
+## Decision: REST vs GraphQL
+
+**Context:** API design for mobile app
+**Decision:** REST with versioning
+**Reasoning:**
+- Team familiar with REST
+- Simple use cases
+- Caching easier
+- Over-fetching not a problem yet
+
+**When to reconsider:** If frontend needs complex queries
+```
+
+### Code Review Documentation
+
+**Captures:**
+- Feedback patterns
+- Review approach
+- Common suggestions
+- Communication style
+
+**Example:**
+```markdown
+# Code Review Style: [Engineer Name]
+
+## Review Approach
+
+### Priority Order
+1. Security vulnerabilities
+2. Logic errors
+3. Test coverage
+4. Code structure
+5. Naming and style
+
+### Feedback Style
+- Specific and constructive
+- Explains "why" behind suggestions
+- Provides examples
+- Asks questions to understand reasoning
+
+### Common Suggestions
+
+**Security:**
+- "Consider input validation here"
+- "This query is vulnerable to SQL injection"
+- "Should we rate-limit this endpoint?"
+
+**Performance:**
+- "This N+1 query could be optimized with a join"
+- "Consider caching this expensive operation"
+- "Memoize this pure function"
+
+**Testing:**
+- "Can we add a test for the error case?"
+- "What happens if the API returns null?"
+- "Let's test the boundary conditions"
+
+**Code Quality:**
+- "Can we extract this into a helper function?"
+- "This function is doing too many things"
+- "Consider a more descriptive variable name"
+```
+
+## Using This Skill
+
+### Extract Engineer Profile
+
+```bash
+./scripts/extract_engineer.sh [github-username]
+```
+
+**Interactive workflow:**
+1. Enter GitHub username
+2. Select repository scope (all/specific org)
+3. Choose analysis depth (last N PRs)
+4. Specify focus areas (languages, topics)
+5. Extract and organize findings
+
+**Output:** Structured profile in `engineer_profiles/[username]/`
+
+### Analyze Specific Repository
+
+```bash
+./scripts/analyze_repo.sh [repo-url] [engineer-username]
+```
+
+Focuses analysis on specific repository contributions.
+
+### Update Existing Profile
+
+```bash
+./scripts/update_profile.sh [engineer-username]
+```
+
+Adds new PRs and updates existing profile.
+
+## Research Sources
+
+### GitHub CLI Queries
+
+**Pull Requests:**
+```bash
+gh pr list --author [username] --limit 100 --state all
+gh pr view [pr-number] --json title,body,files,reviews,comments
+```
+
+**Code Changes:**
+```bash
+gh pr diff [pr-number]
+gh api repos/{owner}/{repo}/pulls/{pr}/files
+```
+
+**Reviews:**
+```bash
+gh pr view [pr-number] --comments
+gh api repos/{owner}/{repo}/pulls/{pr}/reviews
+```
+
+**Commits:**
+```bash
+gh api search/commits --author [username]
+```
+
+### Analysis Techniques
+
+**Pattern Recognition:**
+- Identify recurring code structures
+- Extract common solutions
+- Detect naming patterns
+- Find architectural choices
+
+**Style Extraction:**
+- Analyze formatting consistency
+- Extract naming conventions
+- Identify comment patterns
+- Detect structural preferences
+
+**Best Practice Identification:**
+- Look for testing patterns
+- Find error handling approaches
+- Identify security practices
+- Extract performance optimizations
+
+## Use Cases
+
+### 1. Onboarding New Engineers
+
+**Problem:** New engineer needs to learn team standards
+**Solution:** Provide senior engineer's profile as reference
+
+**Benefits:**
+- Real examples from codebase
+- Understand team conventions
+- See decision-making process
+- Learn best practices
+
+### 2. Code Review Training
+
+**Problem:** Teaching good code review skills
+**Solution:** Study experienced reviewer's feedback patterns
+
+**Benefits:**
+- Learn what to look for
+- Understand feedback style
+- See common issues
+- Improve review quality
+
+### 3. Knowledge Transfer
+
+**Problem:** Senior engineer leaving, knowledge lost
+**Solution:** Extract their expertise before departure
+
+**Benefits:**
+- Preserve tribal knowledge
+- Document decisions
+- Maintain code quality
+- Reduce bus factor
+
+### 4. Establishing Team Standards
+
+**Problem:** Inconsistent coding styles across team
+**Solution:** Extract patterns from best engineers, create standards
+
+**Benefits:**
+- Evidence-based standards
+- Real-world examples
+- Buy-in from team
+- Consistent codebase
+
+### 5. AI Agent Training
+
+**Problem:** Agent needs to code like specific engineer
+**Solution:** Provide extracted profile to agent
+
+**Benefits:**
+- Match expert's style
+- Follow their patterns
+- Apply their best practices
+- Maintain consistency
+
+## Profile Usage by Agents
+
+When an agent has access to an engineer profile, it can:
+
+**Code Generation:**
+- Follow extracted naming conventions
+- Use identified patterns
+- Apply documented best practices
+- Match architectural style
+
+**Code Review:**
+- Provide feedback in engineer's style
+- Check for common issues they'd catch
+- Apply their quality standards
+- Match their priorities
+
+**Problem Solving:**
+- Use their common solutions
+- Follow their architectural approach
+- Apply their design patterns
+- Consider their trade-offs
+
+**Example Agent Prompt:**
+```
+"Using the profile at engineer_profiles/senior_dev/, write a user service
+following their coding style, patterns, and best practices. Pay special
+attention to their error handling approach and testing standards."
+```
+
+## Best Practices
+
+### Research Ethics
+
+**DO:**
+- ✅ Get permission before extracting
+- ✅ Focus on public contributions
+- ✅ Respect privacy
+- ✅ Use for learning and improvement
+
+**DON'T:**
+- ❌ Extract without permission
+- ❌ Share profiles externally
+- ❌ Include sensitive information
+- ❌ Use for performance reviews
+
+### Profile Maintenance
+
+**Regular Updates:**
+- Refresh every quarter
+- Add new significant PRs
+- Update with latest patterns
+- Archive outdated practices
+
+**Quality Control:**
+- Verify extracted patterns
+- Review examples for relevance
+- Update documentation
+- Remove deprecated practices
+
+### Effective Usage
+
+**For Learning:**
+- Study patterns with context
+- Understand reasoning behind choices
+- Practice applying techniques
+- Ask questions when unclear
+
+**For Replication:**
+- Start with style guide
+- Reference patterns for similar problems
+- Adapt to current context
+- Don't blindly copy
+
+## Limitations
+
+**What This Extracts:**
+- ✅ Coding style and conventions
+- ✅ Common patterns and approaches
+- ✅ Best practices and guidelines
+- ✅ Architectural decisions
+- ✅ Review feedback patterns
+
+**What This Doesn't Capture:**
+- ❌ Real-time problem-solving process
+- ❌ Verbal communication style
+- ❌ Meeting discussions
+- ❌ Design phase thinking
+- ❌ Interpersonal mentoring
+
+## Future Enhancements
+
+**Potential additions:**
+- Slack message analysis (communication style)
+- Design doc extraction (design thinking)
+- Meeting notes analysis (decision process)
+- Video analysis (pair programming sessions)
+- Code metrics tracking (evolution over time)
+
+## Example Output
+
+```
+engineer_profiles/
+└── senior_dev/
+    ├── README.md
+    │   # Senior Dev - Staff Engineer
+    │   Expertise: TypeScript, Node.js, System Design
+    │   Focus: API design, performance optimization
+    │
+    ├── coding_style/
+    │   ├── typescript_style.md
+    │   ├── naming_conventions.md
+    │   └── code_structure.md
+    │
+    ├── patterns/
+    │   ├── dependency_injection.md
+    │   ├── error_handling.md
+    │   └── examples/
+    │       ├── service_pattern.ts
+    │       └── repository_pattern.ts
+    │
+    ├── best_practices/
+    │   ├── testing_strategy.md
+    │   ├── code_quality.md
+    │   └── performance.md
+    │
+    ├── architecture/
+    │   ├── api_design.md
+    │   ├── database_design.md
+    │   └── scaling_approach.md
+    │
+    ├── code_review/
+    │   ├── feedback_examples.md
+    │   └── review_checklist.md
+    │
+    └── examples/
+        └── notable_prs/
+            ├── pr_1234_auth_refactor.md
+            └── pr_5678_performance_fix.md
+```
+
+## Summary
+
+This skill transforms an engineer's GitHub contributions into a structured, reusable knowledge base. It captures their expertise in a format that:
+
+- **Humans can learn from** - Clear documentation with examples
+- **Agents can replicate** - Structured patterns and guidelines
+- **Teams can adopt** - Evidence-based best practices
+- **Organizations can preserve** - Knowledge that survives turnover
+
+**The goal:** Make expertise scalable, learnable, and replicable.
+
+---
+
+**"The best way to learn is from those who have already mastered it."**
--- a/skills/engineer-expertise-extractor/scripts/extract_engineer.sh
+++ b/skills/engineer-expertise-extractor/scripts/extract_engineer.sh
@@ -0,0 +1,613 @@
+#!/bin/bash
+
+# Engineer Expertise Extractor
+# Research and document an engineer's coding style, patterns, and best practices
+
+set -e
+
+# Colors
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+MAGENTA='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+echo -e "${BLUE}╔══════════════════════════════════════════════════╗${NC}"
+echo -e "${BLUE}║      Engineer Expertise Extractor                ║${NC}"
+echo -e "${BLUE}╚══════════════════════════════════════════════════╝${NC}"
+echo ""
+echo -e "${YELLOW}Extract coding style, patterns, and best practices from GitHub${NC}"
+echo ""
+
+# Check for gh CLI
+if ! command -v gh &> /dev/null; then
+    echo -e "${RED}Error: GitHub CLI (gh) is not installed${NC}"
+    echo "Install from: https://cli.github.com/"
+    exit 1
+fi
+
+# Check authentication
+if ! gh auth status &> /dev/null; then
+    echo -e "${RED}Error: Not authenticated with GitHub CLI${NC}"
+    echo "Run: gh auth login"
+    exit 1
+fi
+
+# Helper function
+prompt_input() {
+    local prompt_text="$1"
+    local var_name="$2"
+    local required="$3"
+
+    while true; do
+        echo -e "${CYAN}${prompt_text}${NC}"
+        read -r input
+
+        if [ -n "$input" ]; then
+            eval "$var_name=\"$input\""
+            break
+        elif [ "$required" != "true" ]; then
+            eval "$var_name=\"\""
+            break
+        else
+            echo -e "${RED}This field is required.${NC}"
+        fi
+    done
+}
+
+# Step 1: Engineer Info
+echo -e "${MAGENTA}━━━ Step 1: Engineer Information ━━━${NC}"
+echo ""
+
+prompt_input "GitHub username to research:" ENGINEER_USERNAME true
+
+# Verify user exists
+echo -e "${BLUE}Verifying GitHub user...${NC}"
+if ! gh api users/$ENGINEER_USERNAME > /dev/null 2>&1; then
+    echo -e "${RED}Error: GitHub user '$ENGINEER_USERNAME' not found${NC}"
+    exit 1
+fi
+
+# Get user info
+USER_INFO=$(gh api users/$ENGINEER_USERNAME)
+USER_NAME=$(echo "$USER_INFO" | grep '"name"' | cut -d'"' -f4)
+USER_BIO=$(echo "$USER_INFO" | grep '"bio"' | cut -d'"' -f4)
+
+echo -e "${GREEN}✓ Found: $USER_NAME${NC}"
+[ -n "$USER_BIO" ] && echo -e "  Bio: $USER_BIO"
+echo ""
+
+# Step 2: Scope
+echo -e "${MAGENTA}━━━ Step 2: Research Scope ━━━${NC}"
+echo ""
+
+prompt_input "Organization to focus on (or 'all'):" ORG_FILTER false
+ORG_FILTER=${ORG_FILTER:-all}
+
+echo ""
+echo "How many PRs to analyze?"
+echo "1) Recent 20 (quick scan)"
+echo "2) Recent 50 (good coverage)"
+echo "3) Recent 100 (comprehensive)"
+echo "4) Custom"
+echo ""
+
+prompt_input "Select (1-4):" PR_COUNT_NUM true
+
+case $PR_COUNT_NUM in
+    1) PR_LIMIT=20 ;;
+    2) PR_LIMIT=50 ;;
+    3) PR_LIMIT=100 ;;
+    4) prompt_input "Enter custom number:" PR_LIMIT true ;;
+    *) PR_LIMIT=50 ;;
+esac
+
+# Step 3: Focus Areas
+echo ""
+echo -e "${MAGENTA}━━━ Step 3: Focus Areas ━━━${NC}"
+echo ""
+
+echo "What to extract? (y/n for each)"
+echo ""
+
+prompt_input "Extract coding style? (y/n):" EXTRACT_STYLE false
+EXTRACT_STYLE=${EXTRACT_STYLE:-y}
+
+prompt_input "Extract common patterns? (y/n):" EXTRACT_PATTERNS false
+EXTRACT_PATTERNS=${EXTRACT_PATTERNS:-y}
+
+prompt_input "Extract best practices? (y/n):" EXTRACT_PRACTICES false
+EXTRACT_PRACTICES=${EXTRACT_PRACTICES:-y}
+
+prompt_input "Extract code review style? (y/n):" EXTRACT_REVIEWS false
+EXTRACT_REVIEWS=${EXTRACT_REVIEWS:-y}
+
+prompt_input "Extract architectural decisions? (y/n):" EXTRACT_ARCH false
+EXTRACT_ARCH=${EXTRACT_ARCH:-y}
+
+# Create output directory
+OUTPUT_DIR="engineer_profiles/${ENGINEER_USERNAME}"
+mkdir -p "$OUTPUT_DIR"
+mkdir -p "$OUTPUT_DIR/coding_style"
+mkdir -p "$OUTPUT_DIR/patterns"
+mkdir -p "$OUTPUT_DIR/best_practices"
+mkdir -p "$OUTPUT_DIR/architecture"
+mkdir -p "$OUTPUT_DIR/code_review"
+mkdir -p "$OUTPUT_DIR/examples/notable_prs"
+mkdir -p "$OUTPUT_DIR/raw_data"
+
+echo ""
+echo -e "${BLUE}━━━ Starting Extraction ━━━${NC}"
+echo ""
+
+# Step 4: Fetch Pull Requests
+echo -e "${YELLOW}Fetching pull requests...${NC}"
+
+# Build search query
+SEARCH_QUERY="is:pr author:$ENGINEER_USERNAME"
+if [ "$ORG_FILTER" != "all" ]; then
+    SEARCH_QUERY="$SEARCH_QUERY org:$ORG_FILTER"
+fi
+
+# Fetch PRs
+echo "Query: $SEARCH_QUERY (limit: $PR_LIMIT)"
+gh search prs "$SEARCH_QUERY" --limit $PR_LIMIT --json number,title,repository,createdAt,state,url > "$OUTPUT_DIR/raw_data/prs.json"
+
+PR_COUNT=$(cat "$OUTPUT_DIR/raw_data/prs.json" | grep -c '"number"' || echo "0")
+echo -e "${GREEN}✓ Found $PR_COUNT pull requests${NC}"
+echo ""
+
+if [ "$PR_COUNT" -eq 0 ]; then
+    echo -e "${RED}No PRs found. Exiting.${NC}"
+    exit 1
+fi
+
+# Step 5: Analyze PRs
+echo -e "${YELLOW}Analyzing pull requests...${NC}"
+echo "This may take a while..."
+echo ""
+
+# Create PR analysis file
+PR_ANALYSIS_FILE="$OUTPUT_DIR/raw_data/pr_analysis.md"
+echo "# Pull Request Analysis: $ENGINEER_USERNAME" > "$PR_ANALYSIS_FILE"
+echo "" >> "$PR_ANALYSIS_FILE"
+echo "Total PRs analyzed: $PR_COUNT" >> "$PR_ANALYSIS_FILE"
+echo "Generated: $(date)" >> "$PR_ANALYSIS_FILE"
+echo "" >> "$PR_ANALYSIS_FILE"
+
+# Analyze top N PRs in detail (limit to avoid rate limiting)
+DETAILED_ANALYSIS_LIMIT=20
+if [ "$PR_COUNT" -lt "$DETAILED_ANALYSIS_LIMIT" ]; then
+    DETAILED_ANALYSIS_LIMIT=$PR_COUNT
+fi
+
+echo "Performing detailed analysis on $DETAILED_ANALYSIS_LIMIT PRs..."
+
+# Extract PR numbers and iterate
+cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"number"' | head -$DETAILED_ANALYSIS_LIMIT | while read -r line; do
+    PR_NUMBER=$(echo "$line" | grep -o '[0-9]*' | head -1)
+    REPO=$(cat "$OUTPUT_DIR/raw_data/prs.json" | grep -B5 "\"number\": $PR_NUMBER" | grep '"nameWithOwner"' | cut -d'"' -f4 | head -1)
+
+    if [ -n "$PR_NUMBER" ] && [ -n "$REPO" ]; then
+        echo "  Analyzing PR #$PR_NUMBER in $REPO..."
+
+        # Fetch PR details
+        gh pr view $PR_NUMBER --repo $REPO --json title,body,files,comments > "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" 2>/dev/null || continue
+
+        # Extract to analysis file
+        TITLE=$(cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"title"' | cut -d'"' -f4)
+        echo "## PR #$PR_NUMBER: $TITLE" >> "$PR_ANALYSIS_FILE"
+        echo "**Repository:** $REPO" >> "$PR_ANALYSIS_FILE"
+        echo "**URL:** https://github.com/$REPO/pull/$PR_NUMBER" >> "$PR_ANALYSIS_FILE"
+        echo "" >> "$PR_ANALYSIS_FILE"
+
+        # Get file changes
+        FILES_CHANGED=$(cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"path"' | wc -l)
+        echo "**Files Changed:** $FILES_CHANGED" >> "$PR_ANALYSIS_FILE"
+
+        # List languages (from file extensions)
+        cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"path"' | grep -o '\.[a-z]*"' | sort | uniq | head -5 >> "$PR_ANALYSIS_FILE"
+        echo "" >> "$PR_ANALYSIS_FILE"
+
+        sleep 1  # Rate limiting
+    fi
+done
+
+echo -e "${GREEN}✓ Detailed analysis complete${NC}"
+echo ""
+
+# Step 6: Extract Code Review Comments
+if [ "$EXTRACT_REVIEWS" = "y" ]; then
+    echo -e "${YELLOW}Extracting code review patterns...${NC}"
+
+    REVIEW_FILE="$OUTPUT_DIR/code_review/review_patterns.md"
+    echo "# Code Review Patterns: $ENGINEER_USERNAME" > "$REVIEW_FILE"
+    echo "" >> "$REVIEW_FILE"
+    echo "Extracted from $PR_COUNT pull requests" >> "$REVIEW_FILE"
+    echo "" >> "$REVIEW_FILE"
+    echo "## Common Review Comments" >> "$REVIEW_FILE"
+    echo "" >> "$REVIEW_FILE"
+    echo "[To be populated with actual review comments from PRs where they are reviewer]" >> "$REVIEW_FILE"
+    echo "" >> "$REVIEW_FILE"
+    echo "## Review Focus Areas" >> "$REVIEW_FILE"
+    echo "- Code quality" >> "$REVIEW_FILE"
+    echo "- Testing coverage" >> "$REVIEW_FILE"
+    echo "- Performance considerations" >> "$REVIEW_FILE"
+    echo "- Security practices" >> "$REVIEW_FILE"
+    echo "" >> "$REVIEW_FILE"
+
+    echo -e "${GREEN}✓ Review patterns documented${NC}"
+fi
+
+# Step 7: Create Profile README
+echo ""
+echo -e "${YELLOW}Creating profile documentation...${NC}"
+
+README_FILE="$OUTPUT_DIR/README.md"
+cat > "$README_FILE" << EOF
+# Engineer Profile: $ENGINEER_USERNAME
+
+**Name:** ${USER_NAME:-$ENGINEER_USERNAME}
+${USER_BIO:+**Bio:** $USER_BIO}
+**GitHub:** https://github.com/$ENGINEER_USERNAME
+**Profile Created:** $(date +%Y-%m-%d)
+**PRs Analyzed:** $PR_COUNT
+
+---
+
+## Overview
+
+This profile contains extracted coding expertise from analyzing $ENGINEER_USERNAME's GitHub contributions.
+
+## Contents
+
+### 📝 Coding Style
+Location: \`coding_style/\`
+
+Documents coding conventions, naming patterns, and formatting preferences.
+
+**Key files:**
+- \`naming_conventions.md\` - Variable, function, class naming
+- \`code_structure.md\` - File organization and structure
+- \`formatting_preferences.md\` - Code formatting style
+
+### 🔧 Patterns
+Location: \`patterns/\`
+
+Common solutions, design patterns, and recurring approaches.
+
+**Key files:**
+- \`common_solutions.md\` - Frequently used solutions
+- \`design_patterns.md\` - Applied design patterns
+- \`examples/\` - Code examples
+
+### ✅ Best Practices
+Location: \`best_practices/\`
+
+Quality standards, testing approaches, and guidelines.
+
+**Key files:**
+- \`code_quality.md\` - Quality standards
+- \`testing_approach.md\` - Testing strategy
+- \`performance.md\` - Performance practices
+- \`security.md\` - Security considerations
+
+### 🏗️ Architecture
+Location: \`architecture/\`
+
+Design decisions, technology choices, and system design.
+
+**Key files:**
+- \`design_decisions.md\` - Architectural choices
+- \`tech_choices.md\` - Technology selections
+- \`trade_offs.md\` - Decision trade-offs
+
+### 👀 Code Review
+Location: \`code_review/\`
+
+Code review style, common feedback, and review approach.
+
+**Key files:**
+- \`feedback_style.md\` - How they provide feedback
+- \`common_suggestions.md\` - Recurring suggestions
+- \`review_checklist.md\` - What they look for
+
+### 📚 Examples
+Location: \`examples/\`
+
+Real code examples from notable pull requests.
+
+**Contents:**
+- \`notable_prs/\` - Significant PRs and their patterns
+
+---
+
+## Using This Profile
+
+### For Learning
+1. Start with \`coding_style/\` to understand conventions
+2. Review \`patterns/\` for common solutions
+3. Study \`best_practices/\` for quality standards
+4. Read \`examples/\` for real-world applications
+
+### For AI Agents
+Provide this profile as context when asking agents to:
+- Write code matching this engineer's style
+- Review code using their standards
+- Apply their patterns and practices
+- Make architectural decisions in their approach
+
+**Example prompt:**
+\`\`\`
+Using the engineer profile at engineer_profiles/$ENGINEER_USERNAME/,
+write a user authentication service following their coding style,
+patterns, and best practices.
+\`\`\`
+
+### For Team Alignment
+- Use as reference for team coding standards
+- Share patterns for consistency
+- Adopt best practices across team
+- Train new engineers with real examples
+
+---
+
+## Expertise Areas
+
+Based on analyzed PRs:
+
+EOF
+
+# Extract languages from PR data
+echo "### Languages" >> "$README_FILE"
+cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"path"' | grep -o '\.[a-z]*"' | sed 's/"//g' | sort | uniq -c | sort -rn | head -10 | while read -r count ext; do
+    echo "- $ext files ($count occurrences)" >> "$README_FILE"
+done
+
+echo "" >> "$README_FILE"
+echo "### Repositories Contributed To" >> "$README_FILE"
+cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"nameWithOwner"' | cut -d'"' -f4 | sort | uniq | head -10 | while read -r repo; do
+    echo "- $repo" >> "$README_FILE"
+done
+
+cat >> "$README_FILE" << EOF
+
+---
+
+## Data Sources
+
+- **Pull Requests:** $PR_COUNT PRs analyzed
+- **Time Range:** Most recent contributions
+- **Scope:** ${ORG_FILTER:-All repositories}
+- **Analysis Date:** $(date +%Y-%m-%d)
+
+---
+
+## Maintenance
+
+### Updating This Profile
+
+\`\`\`bash
+./scripts/update_profile.sh $ENGINEER_USERNAME
+\`\`\`
+
+### Adding Custom Documentation
+
+Feel free to enhance this profile with:
+- Additional examples
+- Team-specific practices
+- Personal notes and observations
+- Meeting discussions and decisions
+
+---
+
+## Notes
+
+This profile is automatically generated from GitHub contributions.
+It captures public coding patterns and should be used as a learning
+resource and reference, not as a rigid rulebook.
+
+**Privacy:** Only public GitHub contributions are analyzed.
+**Accuracy:** Patterns are inferred from code; always verify with engineer.
+**Currency:** Update regularly as coding practices evolve.
+
+EOF
+
+echo -e "${GREEN}✓ Profile README created${NC}"
+
+# Step 8: Create Template Files
+echo ""
+echo -e "${YELLOW}Creating template structure...${NC}"
+
+# Coding Style Templates
+cat > "$OUTPUT_DIR/coding_style/naming_conventions.md" << 'EOF'
+# Naming Conventions
+
+Based on analysis of pull requests.
+
+## Variables
+
+### General Rules
+- Descriptive names preferred
+- Avoid abbreviations
+- Use camelCase (or language convention)
+
+### Examples
+```
+// From PR analysis
+const userAuthentication = ...  // not ua
+const isActive = ...             // boolean prefix
+const totalAmount = ...          // clear purpose
+```
+
+## Functions
+
+### General Rules
+- Verb-first naming
+- Single responsibility
+- Descriptive of action
+
+### Examples
+```
+// From PR analysis
+getUserById(id)
+validateInput(data)
+calculateTotal(items)
+```
+
+## Classes
+
+### General Rules
+- PascalCase
+- Noun-based
+- Clear purpose
+
+### Examples
+```
+// From PR analysis
+UserService
+PaymentProcessor
+AuthenticationManager
+```
+
+---
+
+**Note:** Fill in with specific patterns found in engineer's code.
+EOF
+
+cat > "$OUTPUT_DIR/patterns/common_solutions.md" << 'EOF'
+# Common Solutions
+
+Recurring patterns and solutions found in PRs.
+
+## Pattern 1: [To be extracted]
+
+**Problem:** [What problem does this solve]
+
+**Solution:**
+```
+[Code example from PRs]
+```
+
+**Why:** [Reasoning from PR discussions]
+
+**When to use:** [Applicable scenarios]
+
+---
+
+## Pattern 2: [To be extracted]
+
+[Continue documenting patterns found in analysis]
+
+---
+
+**Note:** Populate with actual patterns from PR analysis.
+EOF
+
+cat > "$OUTPUT_DIR/best_practices/testing_approach.md" << 'EOF'
+# Testing Approach
+
+Testing strategies and patterns extracted from PRs.
+
+## Test Structure
+
+[Document testing patterns from analyzed PRs]
+
+## Coverage Standards
+
+[Extract coverage expectations from PRs]
+
+## Test Types
+
+### Unit Tests
+[Patterns found]
+
+### Integration Tests
+[Patterns found]
+
+### E2E Tests
+[Patterns found]
+
+---
+
+**Note:** Fill in based on test files in analyzed PRs.
+EOF
+
+cat > "$OUTPUT_DIR/architecture/design_decisions.md" << 'EOF'
+# Design Decisions
+
+Architectural decisions extracted from PRs and discussions.
+
+## Decision Template
+
+### Decision: [Name]
+
+**Context:** [What problem or need]
+
+**Decision:** [What was decided]
+
+**Reasoning:** [Why this approach]
+
+**Alternatives Considered:** [Other options]
+
+**Trade-offs:** [Pros and cons]
+
+**Outcome:** [Results]
+
+---
+
+## Decisions Extracted from PRs
+
+[To be populated with decisions found in PR descriptions and discussions]
+
+---
+
+**Note:** Extract from PR descriptions, discussions, and code comments.
+EOF
+
+echo -e "${GREEN}✓ Template files created${NC}"
+
+# Step 9: Generate Summary
+echo ""
+echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}"
+echo -e "${BLUE}║          Extraction Complete!                  ║${NC}"
+echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}"
+echo ""
+echo -e "${GREEN}Profile created at: ${BLUE}$OUTPUT_DIR${NC}"
+echo ""
+echo -e "${YELLOW}━━━ Summary ━━━${NC}"
+echo "Engineer: $ENGINEER_USERNAME"
+echo "PRs Analyzed: $PR_COUNT"
+echo "Detailed Analysis: $DETAILED_ANALYSIS_LIMIT PRs"
+echo ""
+echo -e "${YELLOW}━━━ Created Structure ━━━${NC}"
+echo "├── README.md (profile overview)"
+echo "├── coding_style/ (conventions and preferences)"
+echo "├── patterns/ (common solutions)"
+echo "├── best_practices/ (quality standards)"
+echo "├── architecture/ (design decisions)"
+echo "├── code_review/ (feedback patterns)"
+echo "├── examples/ (code samples)"
+echo "└── raw_data/ (source PR data)"
+echo ""
+echo -e "${CYAN}━━━ Next Steps ━━━${NC}"
+echo ""
+echo "1. Review the generated profile:"
+echo -e "   ${BLUE}cat $OUTPUT_DIR/README.md${NC}"
+echo ""
+echo "2. Review PR analysis:"
+echo -e "   ${BLUE}cat $OUTPUT_DIR/raw_data/pr_analysis.md${NC}"
+echo ""
+echo "3. Enhance documentation with specific findings:"
+echo "   - Add code examples to patterns/"
+echo "   - Document specific conventions in coding_style/"
+echo "   - Extract best practices from notable PRs"
+echo "   - Add architectural decisions from PR discussions"
+echo ""
+echo "4. Use with AI agents:"
+echo -e "   ${BLUE}\"Using engineer_profiles/$ENGINEER_USERNAME/, write code matching their style\"${NC}"
+echo ""
+echo -e "${YELLOW}💡 Tip: Manually review PRs and add specific examples to strengthen profile${NC}"
+echo ""