From 7e037098e347c2b8bd0f56f939d41f14a7e46a57 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sat, 29 Nov 2025 18:49:17 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 12 + README.md | 3 + plugin.lock.json | 48 ++ skills/engineer-expertise-extractor/SKILL.md | 638 ++++++++++++++++++ .../scripts/extract_engineer.sh | 613 +++++++++++++++++ 5 files changed, 1314 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 plugin.lock.json create mode 100644 skills/engineer-expertise-extractor/SKILL.md create mode 100755 skills/engineer-expertise-extractor/scripts/extract_engineer.sh diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..b1ec6fc --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "engineer-expertise-extractor", + "description": "Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise.", + "version": "0.0.0-2025.11.28", + "author": { + "name": "James Rochabrun", + "email": "jamesrochabrun@gmail.com" + }, + "skills": [ + "./skills/engineer-expertise-extractor" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a5c8694 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# engineer-expertise-extractor + +Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise. diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..5fed8fd --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,48 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:jamesrochabrun/skills:engineer-expertise-extractor", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "e1dc9f46a4e54755eb600ba5700333189314c16e", + "treeHash": "139f221c3d9b2cff87f4e1e23a2cfb0ae12c0fe294380a4962cf33ae213b605b", + "generatedAt": "2025-11-28T10:17:47.269090Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "engineer-expertise-extractor", + "description": "Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise." + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "de3fadb15ae28e93c2efecce5a06f656992b3a90f01b1124972737c2cc93443e" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "04e70b25fc0c5f7fc0c295534f24f47b36672e2e150995772eb361697a1969a2" + }, + { + "path": "skills/engineer-expertise-extractor/SKILL.md", + "sha256": "1fa0e2fcc60ac60e7b4db7b1e93aba93624ad8fd34ba25601b29f149d384d2ff" + }, + { + "path": "skills/engineer-expertise-extractor/scripts/extract_engineer.sh", + "sha256": "3fd76ef39495e703e11478102693accc0d8180db767482215dd48f8639b88d52" + } + ], + "dirSha256": "139f221c3d9b2cff87f4e1e23a2cfb0ae12c0fe294380a4962cf33ae213b605b" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/engineer-expertise-extractor/SKILL.md b/skills/engineer-expertise-extractor/SKILL.md new file mode 100644 index 0000000..5f39fc6 --- /dev/null +++ b/skills/engineer-expertise-extractor/SKILL.md @@ -0,0 +1,638 @@ +--- +name: engineer-expertise-extractor +description: Research and extract an engineer's coding style, patterns, and best practices from their GitHub contributions. Creates structured knowledge base for replicating their expertise. +--- + +# Engineer Expertise Extractor + +Extract and document an engineer's coding expertise by analyzing their GitHub contributions, creating a structured knowledge base that captures their coding style, patterns, best practices, and architectural decisions. + +## What This Skill Does + +Researches an engineer's work to create a "digital mentor" by: +- **Analyzing Pull Requests** - Extract code patterns, review style, decisions +- **Extracting Coding Style** - Document their preferences and conventions +- **Identifying Patterns** - Common solutions and approaches they use +- **Capturing Best Practices** - Their quality standards and guidelines +- **Organizing Examples** - Real code samples from their work +- **Documenting Decisions** - Architectural choices and reasoning + +## Why This Matters + +**Knowledge Preservation:** +- Capture expert knowledge before they leave +- Document tribal knowledge +- Create mentorship materials +- Onboard new engineers faster + +**Consistency:** +- Align team coding standards +- Replicate expert approaches +- Maintain code quality +- Scale expertise across team + +**Learning:** +- Learn from senior engineers +- Understand decision-making +- See real-world patterns +- Improve code quality + +## How It Works + +### 1. Research Phase +Using GitHub CLI (`gh`), the skill: +- Fetches engineer's pull requests +- Analyzes code changes +- Reviews their comments and feedback +- Extracts patterns and conventions +- Identifies their expertise areas + +### 2. Analysis Phase +Categorizes findings into: +- **Coding Style** - Formatting, naming, structure +- **Patterns** - Common solutions and approaches +- **Best Practices** - Quality guidelines +- **Architecture** - Design decisions +- **Testing** - Testing approaches +- **Code Review** - Feedback patterns +- **Documentation** - Doc style and practices + +### 3. Organization Phase +Creates structured folders: +``` +engineer_profiles/ +└── [engineer_name]/ + ├── README.md (overview) + ├── coding_style/ + │ ├── languages/ + │ ├── naming_conventions.md + │ ├── code_structure.md + │ └── formatting_preferences.md + ├── patterns/ + │ ├── common_solutions.md + │ ├── design_patterns.md + │ └── code_examples/ + ├── best_practices/ + │ ├── code_quality.md + │ ├── testing_approach.md + │ ├── performance.md + │ └── security.md + ├── architecture/ + │ ├── design_decisions.md + │ ├── tech_choices.md + │ └── trade_offs.md + ├── code_review/ + │ ├── feedback_style.md + │ ├── common_suggestions.md + │ └── review_examples.md + └── examples/ + ├── by_language/ + ├── by_pattern/ + └── notable_prs/ +``` + +## Output Structure + +### Engineer Profile README + +**Contains:** +- Engineer overview +- Areas of expertise +- Languages and technologies +- Key contributions +- Coding philosophy +- How to use this profile + +### Coding Style Documentation + +**Captures:** +- Naming conventions (variables, functions, classes) +- Code structure preferences +- File organization +- Comment style +- Formatting preferences +- Language-specific idioms + +**Example:** +```markdown +# Coding Style: [Engineer Name] + +## Naming Conventions + +### Variables +- Use descriptive names: `userAuthentication` not `ua` +- Boolean variables: `isActive`, `hasPermission`, `canEdit` +- Collections: plural names `users`, `items`, `transactions` + +### Functions +- Verb-first: `getUserById`, `validateInput`, `calculateTotal` +- Pure functions preferred +- Single responsibility + +### Classes +- PascalCase: `UserService`, `PaymentProcessor` +- Interface prefix: `IUserRepository` +- Concrete implementations: `MongoUserRepository` + +## Code Structure + +### File Organization +- One class per file +- Related functions grouped together +- Tests alongside implementation +- Clear separation of concerns + +### Function Length +- Max 20-30 lines preferred +- Extract helper functions +- Single level of abstraction +``` + +### Patterns Documentation + +**Captures:** +- Recurring solutions +- Design patterns used +- Architectural patterns +- Problem-solving approaches + +**Example:** +```markdown +# Common Patterns: [Engineer Name] + +## Dependency Injection + +Used consistently across services: + +\`\`\`typescript +// Pattern: Constructor injection +class UserService { + constructor( + private readonly userRepo: IUserRepository, + private readonly logger: ILogger + ) {} +} +\`\`\` + +**Why:** Testability, loose coupling, clear dependencies + +## Error Handling + +Consistent error handling approach: + +\`\`\`typescript +// Pattern: Custom error types + global handler +class ValidationError extends Error { + constructor(message: string) { + super(message); + this.name = 'ValidationError'; + } +} + +// Usage +if (!isValid(input)) { + throw new ValidationError('Invalid input format'); +} +\`\`\` + +**Why:** Type-safe errors, centralized handling, clear debugging +``` + +### Best Practices Documentation + +**Captures:** +- Quality standards +- Testing approaches +- Performance guidelines +- Security practices +- Documentation standards + +**Example:** +```markdown +# Best Practices: [Engineer Name] + +## Testing + +### Unit Test Structure +- AAA pattern (Arrange, Act, Assert) +- One assertion per test preferred +- Test names describe behavior +- Mock external dependencies + +\`\`\`typescript +describe('UserService', () => { + describe('createUser', () => { + it('should create user with valid data', async () => { + // Arrange + const userData = { email: 'test@example.com', name: 'Test' }; + const mockRepo = createMockRepository(); + + // Act + const result = await userService.createUser(userData); + + // Assert + expect(result.id).toBeDefined(); + expect(result.email).toBe(userData.email); + }); + }); +}); +\`\`\` + +### Test Coverage +- Aim for 80%+ coverage +- 100% coverage for critical paths +- Integration tests for APIs +- E2E tests for user flows + +## Code Review Standards + +### What to Check +- [ ] Tests included and passing +- [ ] No console.logs remaining +- [ ] Error handling present +- [ ] Comments explain "why" not "what" +- [ ] No hardcoded values +- [ ] Security considerations addressed +``` + +### Architecture Documentation + +**Captures:** +- Design decisions +- Technology choices +- Trade-offs made +- System design approaches + +**Example:** +```markdown +# Architectural Decisions: [Engineer Name] + +## Decision: Microservices vs Monolith + +**Context:** Scaling user service +**Decision:** Start monolith, extract services when needed +**Reasoning:** +- Team size: 5 engineers +- Product stage: MVP +- Premature optimization risk +- Easier debugging and deployment + +**Trade-offs:** +- Monolith pros: Simpler, faster development +- Monolith cons: Harder to scale later +- Decision: Optimize for current needs, refactor when hitting limits + +## Decision: REST vs GraphQL + +**Context:** API design for mobile app +**Decision:** REST with versioning +**Reasoning:** +- Team familiar with REST +- Simple use cases +- Caching easier +- Over-fetching not a problem yet + +**When to reconsider:** If frontend needs complex queries +``` + +### Code Review Documentation + +**Captures:** +- Feedback patterns +- Review approach +- Common suggestions +- Communication style + +**Example:** +```markdown +# Code Review Style: [Engineer Name] + +## Review Approach + +### Priority Order +1. Security vulnerabilities +2. Logic errors +3. Test coverage +4. Code structure +5. Naming and style + +### Feedback Style +- Specific and constructive +- Explains "why" behind suggestions +- Provides examples +- Asks questions to understand reasoning + +### Common Suggestions + +**Security:** +- "Consider input validation here" +- "This query is vulnerable to SQL injection" +- "Should we rate-limit this endpoint?" + +**Performance:** +- "This N+1 query could be optimized with a join" +- "Consider caching this expensive operation" +- "Memoize this pure function" + +**Testing:** +- "Can we add a test for the error case?" +- "What happens if the API returns null?" +- "Let's test the boundary conditions" + +**Code Quality:** +- "Can we extract this into a helper function?" +- "This function is doing too many things" +- "Consider a more descriptive variable name" +``` + +## Using This Skill + +### Extract Engineer Profile + +```bash +./scripts/extract_engineer.sh [github-username] +``` + +**Interactive workflow:** +1. Enter GitHub username +2. Select repository scope (all/specific org) +3. Choose analysis depth (last N PRs) +4. Specify focus areas (languages, topics) +5. Extract and organize findings + +**Output:** Structured profile in `engineer_profiles/[username]/` + +### Analyze Specific Repository + +```bash +./scripts/analyze_repo.sh [repo-url] [engineer-username] +``` + +Focuses analysis on specific repository contributions. + +### Update Existing Profile + +```bash +./scripts/update_profile.sh [engineer-username] +``` + +Adds new PRs and updates existing profile. + +## Research Sources + +### GitHub CLI Queries + +**Pull Requests:** +```bash +gh pr list --author [username] --limit 100 --state all +gh pr view [pr-number] --json title,body,files,reviews,comments +``` + +**Code Changes:** +```bash +gh pr diff [pr-number] +gh api repos/{owner}/{repo}/pulls/{pr}/files +``` + +**Reviews:** +```bash +gh pr view [pr-number] --comments +gh api repos/{owner}/{repo}/pulls/{pr}/reviews +``` + +**Commits:** +```bash +gh api search/commits --author [username] +``` + +### Analysis Techniques + +**Pattern Recognition:** +- Identify recurring code structures +- Extract common solutions +- Detect naming patterns +- Find architectural choices + +**Style Extraction:** +- Analyze formatting consistency +- Extract naming conventions +- Identify comment patterns +- Detect structural preferences + +**Best Practice Identification:** +- Look for testing patterns +- Find error handling approaches +- Identify security practices +- Extract performance optimizations + +## Use Cases + +### 1. Onboarding New Engineers + +**Problem:** New engineer needs to learn team standards +**Solution:** Provide senior engineer's profile as reference + +**Benefits:** +- Real examples from codebase +- Understand team conventions +- See decision-making process +- Learn best practices + +### 2. Code Review Training + +**Problem:** Teaching good code review skills +**Solution:** Study experienced reviewer's feedback patterns + +**Benefits:** +- Learn what to look for +- Understand feedback style +- See common issues +- Improve review quality + +### 3. Knowledge Transfer + +**Problem:** Senior engineer leaving, knowledge lost +**Solution:** Extract their expertise before departure + +**Benefits:** +- Preserve tribal knowledge +- Document decisions +- Maintain code quality +- Reduce bus factor + +### 4. Establishing Team Standards + +**Problem:** Inconsistent coding styles across team +**Solution:** Extract patterns from best engineers, create standards + +**Benefits:** +- Evidence-based standards +- Real-world examples +- Buy-in from team +- Consistent codebase + +### 5. AI Agent Training + +**Problem:** Agent needs to code like specific engineer +**Solution:** Provide extracted profile to agent + +**Benefits:** +- Match expert's style +- Follow their patterns +- Apply their best practices +- Maintain consistency + +## Profile Usage by Agents + +When an agent has access to an engineer profile, it can: + +**Code Generation:** +- Follow extracted naming conventions +- Use identified patterns +- Apply documented best practices +- Match architectural style + +**Code Review:** +- Provide feedback in engineer's style +- Check for common issues they'd catch +- Apply their quality standards +- Match their priorities + +**Problem Solving:** +- Use their common solutions +- Follow their architectural approach +- Apply their design patterns +- Consider their trade-offs + +**Example Agent Prompt:** +``` +"Using the profile at engineer_profiles/senior_dev/, write a user service +following their coding style, patterns, and best practices. Pay special +attention to their error handling approach and testing standards." +``` + +## Best Practices + +### Research Ethics + +**DO:** +- ✅ Get permission before extracting +- ✅ Focus on public contributions +- ✅ Respect privacy +- ✅ Use for learning and improvement + +**DON'T:** +- ❌ Extract without permission +- ❌ Share profiles externally +- ❌ Include sensitive information +- ❌ Use for performance reviews + +### Profile Maintenance + +**Regular Updates:** +- Refresh every quarter +- Add new significant PRs +- Update with latest patterns +- Archive outdated practices + +**Quality Control:** +- Verify extracted patterns +- Review examples for relevance +- Update documentation +- Remove deprecated practices + +### Effective Usage + +**For Learning:** +- Study patterns with context +- Understand reasoning behind choices +- Practice applying techniques +- Ask questions when unclear + +**For Replication:** +- Start with style guide +- Reference patterns for similar problems +- Adapt to current context +- Don't blindly copy + +## Limitations + +**What This Extracts:** +- ✅ Coding style and conventions +- ✅ Common patterns and approaches +- ✅ Best practices and guidelines +- ✅ Architectural decisions +- ✅ Review feedback patterns + +**What This Doesn't Capture:** +- ❌ Real-time problem-solving process +- ❌ Verbal communication style +- ❌ Meeting discussions +- ❌ Design phase thinking +- ❌ Interpersonal mentoring + +## Future Enhancements + +**Potential additions:** +- Slack message analysis (communication style) +- Design doc extraction (design thinking) +- Meeting notes analysis (decision process) +- Video analysis (pair programming sessions) +- Code metrics tracking (evolution over time) + +## Example Output + +``` +engineer_profiles/ +└── senior_dev/ + ├── README.md + │ # Senior Dev - Staff Engineer + │ Expertise: TypeScript, Node.js, System Design + │ Focus: API design, performance optimization + │ + ├── coding_style/ + │ ├── typescript_style.md + │ ├── naming_conventions.md + │ └── code_structure.md + │ + ├── patterns/ + │ ├── dependency_injection.md + │ ├── error_handling.md + │ └── examples/ + │ ├── service_pattern.ts + │ └── repository_pattern.ts + │ + ├── best_practices/ + │ ├── testing_strategy.md + │ ├── code_quality.md + │ └── performance.md + │ + ├── architecture/ + │ ├── api_design.md + │ ├── database_design.md + │ └── scaling_approach.md + │ + ├── code_review/ + │ ├── feedback_examples.md + │ └── review_checklist.md + │ + └── examples/ + └── notable_prs/ + ├── pr_1234_auth_refactor.md + └── pr_5678_performance_fix.md +``` + +## Summary + +This skill transforms an engineer's GitHub contributions into a structured, reusable knowledge base. It captures their expertise in a format that: + +- **Humans can learn from** - Clear documentation with examples +- **Agents can replicate** - Structured patterns and guidelines +- **Teams can adopt** - Evidence-based best practices +- **Organizations can preserve** - Knowledge that survives turnover + +**The goal:** Make expertise scalable, learnable, and replicable. + +--- + +**"The best way to learn is from those who have already mastered it."** diff --git a/skills/engineer-expertise-extractor/scripts/extract_engineer.sh b/skills/engineer-expertise-extractor/scripts/extract_engineer.sh new file mode 100755 index 0000000..49d386f --- /dev/null +++ b/skills/engineer-expertise-extractor/scripts/extract_engineer.sh @@ -0,0 +1,613 @@ +#!/bin/bash + +# Engineer Expertise Extractor +# Research and document an engineer's coding style, patterns, and best practices + +set -e + +# Colors +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +MAGENTA='\033[0;35m' +CYAN='\033[0;36m' +NC='\033[0m' + +echo -e "${BLUE}╔══════════════════════════════════════════════════╗${NC}" +echo -e "${BLUE}║ Engineer Expertise Extractor ║${NC}" +echo -e "${BLUE}╚══════════════════════════════════════════════════╝${NC}" +echo "" +echo -e "${YELLOW}Extract coding style, patterns, and best practices from GitHub${NC}" +echo "" + +# Check for gh CLI +if ! command -v gh &> /dev/null; then + echo -e "${RED}Error: GitHub CLI (gh) is not installed${NC}" + echo "Install from: https://cli.github.com/" + exit 1 +fi + +# Check authentication +if ! gh auth status &> /dev/null; then + echo -e "${RED}Error: Not authenticated with GitHub CLI${NC}" + echo "Run: gh auth login" + exit 1 +fi + +# Helper function +prompt_input() { + local prompt_text="$1" + local var_name="$2" + local required="$3" + + while true; do + echo -e "${CYAN}${prompt_text}${NC}" + read -r input + + if [ -n "$input" ]; then + eval "$var_name=\"$input\"" + break + elif [ "$required" != "true" ]; then + eval "$var_name=\"\"" + break + else + echo -e "${RED}This field is required.${NC}" + fi + done +} + +# Step 1: Engineer Info +echo -e "${MAGENTA}━━━ Step 1: Engineer Information ━━━${NC}" +echo "" + +prompt_input "GitHub username to research:" ENGINEER_USERNAME true + +# Verify user exists +echo -e "${BLUE}Verifying GitHub user...${NC}" +if ! gh api users/$ENGINEER_USERNAME > /dev/null 2>&1; then + echo -e "${RED}Error: GitHub user '$ENGINEER_USERNAME' not found${NC}" + exit 1 +fi + +# Get user info +USER_INFO=$(gh api users/$ENGINEER_USERNAME) +USER_NAME=$(echo "$USER_INFO" | grep '"name"' | cut -d'"' -f4) +USER_BIO=$(echo "$USER_INFO" | grep '"bio"' | cut -d'"' -f4) + +echo -e "${GREEN}✓ Found: $USER_NAME${NC}" +[ -n "$USER_BIO" ] && echo -e " Bio: $USER_BIO" +echo "" + +# Step 2: Scope +echo -e "${MAGENTA}━━━ Step 2: Research Scope ━━━${NC}" +echo "" + +prompt_input "Organization to focus on (or 'all'):" ORG_FILTER false +ORG_FILTER=${ORG_FILTER:-all} + +echo "" +echo "How many PRs to analyze?" +echo "1) Recent 20 (quick scan)" +echo "2) Recent 50 (good coverage)" +echo "3) Recent 100 (comprehensive)" +echo "4) Custom" +echo "" + +prompt_input "Select (1-4):" PR_COUNT_NUM true + +case $PR_COUNT_NUM in + 1) PR_LIMIT=20 ;; + 2) PR_LIMIT=50 ;; + 3) PR_LIMIT=100 ;; + 4) prompt_input "Enter custom number:" PR_LIMIT true ;; + *) PR_LIMIT=50 ;; +esac + +# Step 3: Focus Areas +echo "" +echo -e "${MAGENTA}━━━ Step 3: Focus Areas ━━━${NC}" +echo "" + +echo "What to extract? (y/n for each)" +echo "" + +prompt_input "Extract coding style? (y/n):" EXTRACT_STYLE false +EXTRACT_STYLE=${EXTRACT_STYLE:-y} + +prompt_input "Extract common patterns? (y/n):" EXTRACT_PATTERNS false +EXTRACT_PATTERNS=${EXTRACT_PATTERNS:-y} + +prompt_input "Extract best practices? (y/n):" EXTRACT_PRACTICES false +EXTRACT_PRACTICES=${EXTRACT_PRACTICES:-y} + +prompt_input "Extract code review style? (y/n):" EXTRACT_REVIEWS false +EXTRACT_REVIEWS=${EXTRACT_REVIEWS:-y} + +prompt_input "Extract architectural decisions? (y/n):" EXTRACT_ARCH false +EXTRACT_ARCH=${EXTRACT_ARCH:-y} + +# Create output directory +OUTPUT_DIR="engineer_profiles/${ENGINEER_USERNAME}" +mkdir -p "$OUTPUT_DIR" +mkdir -p "$OUTPUT_DIR/coding_style" +mkdir -p "$OUTPUT_DIR/patterns" +mkdir -p "$OUTPUT_DIR/best_practices" +mkdir -p "$OUTPUT_DIR/architecture" +mkdir -p "$OUTPUT_DIR/code_review" +mkdir -p "$OUTPUT_DIR/examples/notable_prs" +mkdir -p "$OUTPUT_DIR/raw_data" + +echo "" +echo -e "${BLUE}━━━ Starting Extraction ━━━${NC}" +echo "" + +# Step 4: Fetch Pull Requests +echo -e "${YELLOW}Fetching pull requests...${NC}" + +# Build search query +SEARCH_QUERY="is:pr author:$ENGINEER_USERNAME" +if [ "$ORG_FILTER" != "all" ]; then + SEARCH_QUERY="$SEARCH_QUERY org:$ORG_FILTER" +fi + +# Fetch PRs +echo "Query: $SEARCH_QUERY (limit: $PR_LIMIT)" +gh search prs "$SEARCH_QUERY" --limit $PR_LIMIT --json number,title,repository,createdAt,state,url > "$OUTPUT_DIR/raw_data/prs.json" + +PR_COUNT=$(cat "$OUTPUT_DIR/raw_data/prs.json" | grep -c '"number"' || echo "0") +echo -e "${GREEN}✓ Found $PR_COUNT pull requests${NC}" +echo "" + +if [ "$PR_COUNT" -eq 0 ]; then + echo -e "${RED}No PRs found. Exiting.${NC}" + exit 1 +fi + +# Step 5: Analyze PRs +echo -e "${YELLOW}Analyzing pull requests...${NC}" +echo "This may take a while..." +echo "" + +# Create PR analysis file +PR_ANALYSIS_FILE="$OUTPUT_DIR/raw_data/pr_analysis.md" +echo "# Pull Request Analysis: $ENGINEER_USERNAME" > "$PR_ANALYSIS_FILE" +echo "" >> "$PR_ANALYSIS_FILE" +echo "Total PRs analyzed: $PR_COUNT" >> "$PR_ANALYSIS_FILE" +echo "Generated: $(date)" >> "$PR_ANALYSIS_FILE" +echo "" >> "$PR_ANALYSIS_FILE" + +# Analyze top N PRs in detail (limit to avoid rate limiting) +DETAILED_ANALYSIS_LIMIT=20 +if [ "$PR_COUNT" -lt "$DETAILED_ANALYSIS_LIMIT" ]; then + DETAILED_ANALYSIS_LIMIT=$PR_COUNT +fi + +echo "Performing detailed analysis on $DETAILED_ANALYSIS_LIMIT PRs..." + +# Extract PR numbers and iterate +cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"number"' | head -$DETAILED_ANALYSIS_LIMIT | while read -r line; do + PR_NUMBER=$(echo "$line" | grep -o '[0-9]*' | head -1) + REPO=$(cat "$OUTPUT_DIR/raw_data/prs.json" | grep -B5 "\"number\": $PR_NUMBER" | grep '"nameWithOwner"' | cut -d'"' -f4 | head -1) + + if [ -n "$PR_NUMBER" ] && [ -n "$REPO" ]; then + echo " Analyzing PR #$PR_NUMBER in $REPO..." + + # Fetch PR details + gh pr view $PR_NUMBER --repo $REPO --json title,body,files,comments > "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" 2>/dev/null || continue + + # Extract to analysis file + TITLE=$(cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"title"' | cut -d'"' -f4) + echo "## PR #$PR_NUMBER: $TITLE" >> "$PR_ANALYSIS_FILE" + echo "**Repository:** $REPO" >> "$PR_ANALYSIS_FILE" + echo "**URL:** https://github.com/$REPO/pull/$PR_NUMBER" >> "$PR_ANALYSIS_FILE" + echo "" >> "$PR_ANALYSIS_FILE" + + # Get file changes + FILES_CHANGED=$(cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"path"' | wc -l) + echo "**Files Changed:** $FILES_CHANGED" >> "$PR_ANALYSIS_FILE" + + # List languages (from file extensions) + cat "$OUTPUT_DIR/raw_data/pr_${PR_NUMBER}.json" | grep '"path"' | grep -o '\.[a-z]*"' | sort | uniq | head -5 >> "$PR_ANALYSIS_FILE" + echo "" >> "$PR_ANALYSIS_FILE" + + sleep 1 # Rate limiting + fi +done + +echo -e "${GREEN}✓ Detailed analysis complete${NC}" +echo "" + +# Step 6: Extract Code Review Comments +if [ "$EXTRACT_REVIEWS" = "y" ]; then + echo -e "${YELLOW}Extracting code review patterns...${NC}" + + REVIEW_FILE="$OUTPUT_DIR/code_review/review_patterns.md" + echo "# Code Review Patterns: $ENGINEER_USERNAME" > "$REVIEW_FILE" + echo "" >> "$REVIEW_FILE" + echo "Extracted from $PR_COUNT pull requests" >> "$REVIEW_FILE" + echo "" >> "$REVIEW_FILE" + echo "## Common Review Comments" >> "$REVIEW_FILE" + echo "" >> "$REVIEW_FILE" + echo "[To be populated with actual review comments from PRs where they are reviewer]" >> "$REVIEW_FILE" + echo "" >> "$REVIEW_FILE" + echo "## Review Focus Areas" >> "$REVIEW_FILE" + echo "- Code quality" >> "$REVIEW_FILE" + echo "- Testing coverage" >> "$REVIEW_FILE" + echo "- Performance considerations" >> "$REVIEW_FILE" + echo "- Security practices" >> "$REVIEW_FILE" + echo "" >> "$REVIEW_FILE" + + echo -e "${GREEN}✓ Review patterns documented${NC}" +fi + +# Step 7: Create Profile README +echo "" +echo -e "${YELLOW}Creating profile documentation...${NC}" + +README_FILE="$OUTPUT_DIR/README.md" +cat > "$README_FILE" << EOF +# Engineer Profile: $ENGINEER_USERNAME + +**Name:** ${USER_NAME:-$ENGINEER_USERNAME} +${USER_BIO:+**Bio:** $USER_BIO} +**GitHub:** https://github.com/$ENGINEER_USERNAME +**Profile Created:** $(date +%Y-%m-%d) +**PRs Analyzed:** $PR_COUNT + +--- + +## Overview + +This profile contains extracted coding expertise from analyzing $ENGINEER_USERNAME's GitHub contributions. + +## Contents + +### 📝 Coding Style +Location: \`coding_style/\` + +Documents coding conventions, naming patterns, and formatting preferences. + +**Key files:** +- \`naming_conventions.md\` - Variable, function, class naming +- \`code_structure.md\` - File organization and structure +- \`formatting_preferences.md\` - Code formatting style + +### 🔧 Patterns +Location: \`patterns/\` + +Common solutions, design patterns, and recurring approaches. + +**Key files:** +- \`common_solutions.md\` - Frequently used solutions +- \`design_patterns.md\` - Applied design patterns +- \`examples/\` - Code examples + +### ✅ Best Practices +Location: \`best_practices/\` + +Quality standards, testing approaches, and guidelines. + +**Key files:** +- \`code_quality.md\` - Quality standards +- \`testing_approach.md\` - Testing strategy +- \`performance.md\` - Performance practices +- \`security.md\` - Security considerations + +### 🏗️ Architecture +Location: \`architecture/\` + +Design decisions, technology choices, and system design. + +**Key files:** +- \`design_decisions.md\` - Architectural choices +- \`tech_choices.md\` - Technology selections +- \`trade_offs.md\` - Decision trade-offs + +### 👀 Code Review +Location: \`code_review/\` + +Code review style, common feedback, and review approach. + +**Key files:** +- \`feedback_style.md\` - How they provide feedback +- \`common_suggestions.md\` - Recurring suggestions +- \`review_checklist.md\` - What they look for + +### 📚 Examples +Location: \`examples/\` + +Real code examples from notable pull requests. + +**Contents:** +- \`notable_prs/\` - Significant PRs and their patterns + +--- + +## Using This Profile + +### For Learning +1. Start with \`coding_style/\` to understand conventions +2. Review \`patterns/\` for common solutions +3. Study \`best_practices/\` for quality standards +4. Read \`examples/\` for real-world applications + +### For AI Agents +Provide this profile as context when asking agents to: +- Write code matching this engineer's style +- Review code using their standards +- Apply their patterns and practices +- Make architectural decisions in their approach + +**Example prompt:** +\`\`\` +Using the engineer profile at engineer_profiles/$ENGINEER_USERNAME/, +write a user authentication service following their coding style, +patterns, and best practices. +\`\`\` + +### For Team Alignment +- Use as reference for team coding standards +- Share patterns for consistency +- Adopt best practices across team +- Train new engineers with real examples + +--- + +## Expertise Areas + +Based on analyzed PRs: + +EOF + +# Extract languages from PR data +echo "### Languages" >> "$README_FILE" +cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"path"' | grep -o '\.[a-z]*"' | sed 's/"//g' | sort | uniq -c | sort -rn | head -10 | while read -r count ext; do + echo "- $ext files ($count occurrences)" >> "$README_FILE" +done + +echo "" >> "$README_FILE" +echo "### Repositories Contributed To" >> "$README_FILE" +cat "$OUTPUT_DIR/raw_data/prs.json" | grep '"nameWithOwner"' | cut -d'"' -f4 | sort | uniq | head -10 | while read -r repo; do + echo "- $repo" >> "$README_FILE" +done + +cat >> "$README_FILE" << EOF + +--- + +## Data Sources + +- **Pull Requests:** $PR_COUNT PRs analyzed +- **Time Range:** Most recent contributions +- **Scope:** ${ORG_FILTER:-All repositories} +- **Analysis Date:** $(date +%Y-%m-%d) + +--- + +## Maintenance + +### Updating This Profile + +\`\`\`bash +./scripts/update_profile.sh $ENGINEER_USERNAME +\`\`\` + +### Adding Custom Documentation + +Feel free to enhance this profile with: +- Additional examples +- Team-specific practices +- Personal notes and observations +- Meeting discussions and decisions + +--- + +## Notes + +This profile is automatically generated from GitHub contributions. +It captures public coding patterns and should be used as a learning +resource and reference, not as a rigid rulebook. + +**Privacy:** Only public GitHub contributions are analyzed. +**Accuracy:** Patterns are inferred from code; always verify with engineer. +**Currency:** Update regularly as coding practices evolve. + +EOF + +echo -e "${GREEN}✓ Profile README created${NC}" + +# Step 8: Create Template Files +echo "" +echo -e "${YELLOW}Creating template structure...${NC}" + +# Coding Style Templates +cat > "$OUTPUT_DIR/coding_style/naming_conventions.md" << 'EOF' +# Naming Conventions + +Based on analysis of pull requests. + +## Variables + +### General Rules +- Descriptive names preferred +- Avoid abbreviations +- Use camelCase (or language convention) + +### Examples +``` +// From PR analysis +const userAuthentication = ... // not ua +const isActive = ... // boolean prefix +const totalAmount = ... // clear purpose +``` + +## Functions + +### General Rules +- Verb-first naming +- Single responsibility +- Descriptive of action + +### Examples +``` +// From PR analysis +getUserById(id) +validateInput(data) +calculateTotal(items) +``` + +## Classes + +### General Rules +- PascalCase +- Noun-based +- Clear purpose + +### Examples +``` +// From PR analysis +UserService +PaymentProcessor +AuthenticationManager +``` + +--- + +**Note:** Fill in with specific patterns found in engineer's code. +EOF + +cat > "$OUTPUT_DIR/patterns/common_solutions.md" << 'EOF' +# Common Solutions + +Recurring patterns and solutions found in PRs. + +## Pattern 1: [To be extracted] + +**Problem:** [What problem does this solve] + +**Solution:** +``` +[Code example from PRs] +``` + +**Why:** [Reasoning from PR discussions] + +**When to use:** [Applicable scenarios] + +--- + +## Pattern 2: [To be extracted] + +[Continue documenting patterns found in analysis] + +--- + +**Note:** Populate with actual patterns from PR analysis. +EOF + +cat > "$OUTPUT_DIR/best_practices/testing_approach.md" << 'EOF' +# Testing Approach + +Testing strategies and patterns extracted from PRs. + +## Test Structure + +[Document testing patterns from analyzed PRs] + +## Coverage Standards + +[Extract coverage expectations from PRs] + +## Test Types + +### Unit Tests +[Patterns found] + +### Integration Tests +[Patterns found] + +### E2E Tests +[Patterns found] + +--- + +**Note:** Fill in based on test files in analyzed PRs. +EOF + +cat > "$OUTPUT_DIR/architecture/design_decisions.md" << 'EOF' +# Design Decisions + +Architectural decisions extracted from PRs and discussions. + +## Decision Template + +### Decision: [Name] + +**Context:** [What problem or need] + +**Decision:** [What was decided] + +**Reasoning:** [Why this approach] + +**Alternatives Considered:** [Other options] + +**Trade-offs:** [Pros and cons] + +**Outcome:** [Results] + +--- + +## Decisions Extracted from PRs + +[To be populated with decisions found in PR descriptions and discussions] + +--- + +**Note:** Extract from PR descriptions, discussions, and code comments. +EOF + +echo -e "${GREEN}✓ Template files created${NC}" + +# Step 9: Generate Summary +echo "" +echo -e "${BLUE}╔════════════════════════════════════════════════╗${NC}" +echo -e "${BLUE}║ Extraction Complete! ║${NC}" +echo -e "${BLUE}╚════════════════════════════════════════════════╝${NC}" +echo "" +echo -e "${GREEN}Profile created at: ${BLUE}$OUTPUT_DIR${NC}" +echo "" +echo -e "${YELLOW}━━━ Summary ━━━${NC}" +echo "Engineer: $ENGINEER_USERNAME" +echo "PRs Analyzed: $PR_COUNT" +echo "Detailed Analysis: $DETAILED_ANALYSIS_LIMIT PRs" +echo "" +echo -e "${YELLOW}━━━ Created Structure ━━━${NC}" +echo "├── README.md (profile overview)" +echo "├── coding_style/ (conventions and preferences)" +echo "├── patterns/ (common solutions)" +echo "├── best_practices/ (quality standards)" +echo "├── architecture/ (design decisions)" +echo "├── code_review/ (feedback patterns)" +echo "├── examples/ (code samples)" +echo "└── raw_data/ (source PR data)" +echo "" +echo -e "${CYAN}━━━ Next Steps ━━━${NC}" +echo "" +echo "1. Review the generated profile:" +echo -e " ${BLUE}cat $OUTPUT_DIR/README.md${NC}" +echo "" +echo "2. Review PR analysis:" +echo -e " ${BLUE}cat $OUTPUT_DIR/raw_data/pr_analysis.md${NC}" +echo "" +echo "3. Enhance documentation with specific findings:" +echo " - Add code examples to patterns/" +echo " - Document specific conventions in coding_style/" +echo " - Extract best practices from notable PRs" +echo " - Add architectural decisions from PR discussions" +echo "" +echo "4. Use with AI agents:" +echo -e " ${BLUE}\"Using engineer_profiles/$ENGINEER_USERNAME/, write code matching their style\"${NC}" +echo "" +echo -e "${YELLOW}💡 Tip: Manually review PRs and add specific examples to strengthen profile${NC}" +echo ""