gh-nickth3man-claude-market/skills/pocketflow/assets/examples/05_structured_output.py

"""
PocketFlow Cookbook Example: Structured Output (Resume Parser)

Difficulty: ☆☆☆ Dummy Level
Source: https://github.com/The-Pocket/PocketFlow/tree/main/cookbook/pocketflow-structured-output

Description:
Extract structured data from resumes using YAML prompting.
Demonstrates:
- Structured LLM output with YAML
- Schema validation with assertions
- Retry logic for parsing errors
- Index-based skill matching
"""

import yaml
from pocketflow import Node, Flow
# from utils import call_llm  # You need to implement this


class ResumeParserNode(Node):
    """Parse resume text into structured YAML format"""

    def prep(self, shared):
        return {
            "resume_text": shared["resume_text"],
            "target_skills": shared.get("target_skills", [])
        }

    def exec(self, prep_res):
        """Extract structured data from resume"""
        resume_text = prep_res["resume_text"]
        target_skills = prep_res["target_skills"]

        # Create skill list with indexes for prompt
        skill_list_for_prompt = "\n".join(
            [f"{i}: {skill}" for i, skill in enumerate(target_skills)]
        )

        prompt = f"""
Analyze the resume below. Output ONLY the requested information in YAML format.

**Resume:**
```
{resume_text}
```

**Target Skills (use these indexes):**
```
{skill_list_for_prompt}
```

**YAML Output Requirements:**
- Extract `name` (string)
- Extract `email` (string)
- Extract `experience` (list of objects with `title` and `company`)
- Extract `skill_indexes` (list of integers found from the Target Skills list)
- **Add a YAML comment (`#`) explaining the source BEFORE each field**

Generate the YAML output now:
"""

        # Get LLM response
        # response = call_llm(prompt)

        # Placeholder response
        response = """
```yaml
# Extracted from header
name: John Smith

# Found in contact section
email: john.smith@email.com

# Work history section
experience:
  - title: Senior Developer
    company: Tech Corp
  - title: Software Engineer
    company: StartupXYZ

# Skills matching target list
skill_indexes: [0, 2, 5]  # Team leadership, Project management, Python
```
"""

        # Parse YAML from response
        yaml_str = response.split("```yaml")[1].split("```")[0].strip()
        structured_result = yaml.safe_load(yaml_str)

        # Validate structure
        assert structured_result is not None, "Parsed YAML is None"
        assert "name" in structured_result, "Missing 'name'"
        assert "email" in structured_result, "Missing 'email'"
        assert "experience" in structured_result, "Missing 'experience'"
        assert isinstance(structured_result.get("experience"), list), "'experience' is not a list"
        assert "skill_indexes" in structured_result, "Missing 'skill_indexes'"

        return structured_result

    def post(self, shared, prep_res, exec_res):
        """Store and display structured data"""
        shared["structured_data"] = exec_res

        print("\n=== STRUCTURED RESUME DATA ===\n")
        print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True,
                       default_flow_style=None))
        print("\n✅ Extracted resume information.\n")

        return "default"


# Example usage
def run_parser():
    """Run resume parser demo"""

    # Sample resume text
    sample_resume = """
    JOHN SMITH
    Email: john.smith@email.com | Phone: (555) 123-4567

    EXPERIENCE
    Senior Developer - Tech Corp (2020-Present)
    - Led team of 5 developers
    - Built scalable Python applications
    - Managed multiple projects simultaneously

    Software Engineer - StartupXYZ (2018-2020)
    - Developed web applications
    - Collaborated with cross-functional teams
    - Presented technical solutions to stakeholders

    SKILLS
    - Team Leadership & Management
    - Python, JavaScript, SQL
    - Project Management
    - Public Speaking
    - CRM Software
    - Data Analysis
    """

    # Target skills to match
    target_skills = [
        "Team leadership & management",
        "CRM software",
        "Project management",
        "Public speaking",
        "Microsoft Office",
        "Python",
        "Data Analysis"
    ]

    # Prepare shared store
    shared = {
        "resume_text": sample_resume,
        "target_skills": target_skills
    }

    # Create and run flow
    parser_node = ResumeParserNode(max_retries=3, wait=10)
    flow = Flow(start=parser_node)
    flow.run(shared)

    # Display matched skills
    if "structured_data" in shared:
        found_indexes = shared["structured_data"].get("skill_indexes", [])
        if found_indexes:
            print("\n--- Matched Target Skills ---")
            for index in found_indexes:
                if 0 <= index < len(target_skills):
                    print(f"✓ {target_skills[index]} (Index: {index})")


if __name__ == "__main__":
    run_parser()