Files
2025-11-30 08:44:08 +08:00

176 lines
4.8 KiB
Python

"""
PocketFlow Cookbook Example: Structured Output (Resume Parser)
Difficulty: ☆☆☆ Dummy Level
Source: https://github.com/The-Pocket/PocketFlow/tree/main/cookbook/pocketflow-structured-output
Description:
Extract structured data from resumes using YAML prompting.
Demonstrates:
- Structured LLM output with YAML
- Schema validation with assertions
- Retry logic for parsing errors
- Index-based skill matching
"""
import yaml
from pocketflow import Node, Flow
# from utils import call_llm # You need to implement this
class ResumeParserNode(Node):
"""Parse resume text into structured YAML format"""
def prep(self, shared):
return {
"resume_text": shared["resume_text"],
"target_skills": shared.get("target_skills", [])
}
def exec(self, prep_res):
"""Extract structured data from resume"""
resume_text = prep_res["resume_text"]
target_skills = prep_res["target_skills"]
# Create skill list with indexes for prompt
skill_list_for_prompt = "\n".join(
[f"{i}: {skill}" for i, skill in enumerate(target_skills)]
)
prompt = f"""
Analyze the resume below. Output ONLY the requested information in YAML format.
**Resume:**
```
{resume_text}
```
**Target Skills (use these indexes):**
```
{skill_list_for_prompt}
```
**YAML Output Requirements:**
- Extract `name` (string)
- Extract `email` (string)
- Extract `experience` (list of objects with `title` and `company`)
- Extract `skill_indexes` (list of integers found from the Target Skills list)
- **Add a YAML comment (`#`) explaining the source BEFORE each field**
Generate the YAML output now:
"""
# Get LLM response
# response = call_llm(prompt)
# Placeholder response
response = """
```yaml
# Extracted from header
name: John Smith
# Found in contact section
email: john.smith@email.com
# Work history section
experience:
- title: Senior Developer
company: Tech Corp
- title: Software Engineer
company: StartupXYZ
# Skills matching target list
skill_indexes: [0, 2, 5] # Team leadership, Project management, Python
```
"""
# Parse YAML from response
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
structured_result = yaml.safe_load(yaml_str)
# Validate structure
assert structured_result is not None, "Parsed YAML is None"
assert "name" in structured_result, "Missing 'name'"
assert "email" in structured_result, "Missing 'email'"
assert "experience" in structured_result, "Missing 'experience'"
assert isinstance(structured_result.get("experience"), list), "'experience' is not a list"
assert "skill_indexes" in structured_result, "Missing 'skill_indexes'"
return structured_result
def post(self, shared, prep_res, exec_res):
"""Store and display structured data"""
shared["structured_data"] = exec_res
print("\n=== STRUCTURED RESUME DATA ===\n")
print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True,
default_flow_style=None))
print("\n✅ Extracted resume information.\n")
return "default"
# Example usage
def run_parser():
"""Run resume parser demo"""
# Sample resume text
sample_resume = """
JOHN SMITH
Email: john.smith@email.com | Phone: (555) 123-4567
EXPERIENCE
Senior Developer - Tech Corp (2020-Present)
- Led team of 5 developers
- Built scalable Python applications
- Managed multiple projects simultaneously
Software Engineer - StartupXYZ (2018-2020)
- Developed web applications
- Collaborated with cross-functional teams
- Presented technical solutions to stakeholders
SKILLS
- Team Leadership & Management
- Python, JavaScript, SQL
- Project Management
- Public Speaking
- CRM Software
- Data Analysis
"""
# Target skills to match
target_skills = [
"Team leadership & management",
"CRM software",
"Project management",
"Public speaking",
"Microsoft Office",
"Python",
"Data Analysis"
]
# Prepare shared store
shared = {
"resume_text": sample_resume,
"target_skills": target_skills
}
# Create and run flow
parser_node = ResumeParserNode(max_retries=3, wait=10)
flow = Flow(start=parser_node)
flow.run(shared)
# Display matched skills
if "structured_data" in shared:
found_indexes = shared["structured_data"].get("skill_indexes", [])
if found_indexes:
print("\n--- Matched Target Skills ---")
for index in found_indexes:
if 0 <= index < len(target_skills):
print(f"{target_skills[index]} (Index: {index})")
if __name__ == "__main__":
run_parser()