176 lines
4.8 KiB
Python
176 lines
4.8 KiB
Python
"""
|
|
PocketFlow Cookbook Example: Structured Output (Resume Parser)
|
|
|
|
Difficulty: ☆☆☆ Dummy Level
|
|
Source: https://github.com/The-Pocket/PocketFlow/tree/main/cookbook/pocketflow-structured-output
|
|
|
|
Description:
|
|
Extract structured data from resumes using YAML prompting.
|
|
Demonstrates:
|
|
- Structured LLM output with YAML
|
|
- Schema validation with assertions
|
|
- Retry logic for parsing errors
|
|
- Index-based skill matching
|
|
"""
|
|
|
|
import yaml
|
|
from pocketflow import Node, Flow
|
|
# from utils import call_llm # You need to implement this
|
|
|
|
|
|
class ResumeParserNode(Node):
|
|
"""Parse resume text into structured YAML format"""
|
|
|
|
def prep(self, shared):
|
|
return {
|
|
"resume_text": shared["resume_text"],
|
|
"target_skills": shared.get("target_skills", [])
|
|
}
|
|
|
|
def exec(self, prep_res):
|
|
"""Extract structured data from resume"""
|
|
resume_text = prep_res["resume_text"]
|
|
target_skills = prep_res["target_skills"]
|
|
|
|
# Create skill list with indexes for prompt
|
|
skill_list_for_prompt = "\n".join(
|
|
[f"{i}: {skill}" for i, skill in enumerate(target_skills)]
|
|
)
|
|
|
|
prompt = f"""
|
|
Analyze the resume below. Output ONLY the requested information in YAML format.
|
|
|
|
**Resume:**
|
|
```
|
|
{resume_text}
|
|
```
|
|
|
|
**Target Skills (use these indexes):**
|
|
```
|
|
{skill_list_for_prompt}
|
|
```
|
|
|
|
**YAML Output Requirements:**
|
|
- Extract `name` (string)
|
|
- Extract `email` (string)
|
|
- Extract `experience` (list of objects with `title` and `company`)
|
|
- Extract `skill_indexes` (list of integers found from the Target Skills list)
|
|
- **Add a YAML comment (`#`) explaining the source BEFORE each field**
|
|
|
|
Generate the YAML output now:
|
|
"""
|
|
|
|
# Get LLM response
|
|
# response = call_llm(prompt)
|
|
|
|
# Placeholder response
|
|
response = """
|
|
```yaml
|
|
# Extracted from header
|
|
name: John Smith
|
|
|
|
# Found in contact section
|
|
email: john.smith@email.com
|
|
|
|
# Work history section
|
|
experience:
|
|
- title: Senior Developer
|
|
company: Tech Corp
|
|
- title: Software Engineer
|
|
company: StartupXYZ
|
|
|
|
# Skills matching target list
|
|
skill_indexes: [0, 2, 5] # Team leadership, Project management, Python
|
|
```
|
|
"""
|
|
|
|
# Parse YAML from response
|
|
yaml_str = response.split("```yaml")[1].split("```")[0].strip()
|
|
structured_result = yaml.safe_load(yaml_str)
|
|
|
|
# Validate structure
|
|
assert structured_result is not None, "Parsed YAML is None"
|
|
assert "name" in structured_result, "Missing 'name'"
|
|
assert "email" in structured_result, "Missing 'email'"
|
|
assert "experience" in structured_result, "Missing 'experience'"
|
|
assert isinstance(structured_result.get("experience"), list), "'experience' is not a list"
|
|
assert "skill_indexes" in structured_result, "Missing 'skill_indexes'"
|
|
|
|
return structured_result
|
|
|
|
def post(self, shared, prep_res, exec_res):
|
|
"""Store and display structured data"""
|
|
shared["structured_data"] = exec_res
|
|
|
|
print("\n=== STRUCTURED RESUME DATA ===\n")
|
|
print(yaml.dump(exec_res, sort_keys=False, allow_unicode=True,
|
|
default_flow_style=None))
|
|
print("\n✅ Extracted resume information.\n")
|
|
|
|
return "default"
|
|
|
|
|
|
# Example usage
|
|
def run_parser():
|
|
"""Run resume parser demo"""
|
|
|
|
# Sample resume text
|
|
sample_resume = """
|
|
JOHN SMITH
|
|
Email: john.smith@email.com | Phone: (555) 123-4567
|
|
|
|
EXPERIENCE
|
|
Senior Developer - Tech Corp (2020-Present)
|
|
- Led team of 5 developers
|
|
- Built scalable Python applications
|
|
- Managed multiple projects simultaneously
|
|
|
|
Software Engineer - StartupXYZ (2018-2020)
|
|
- Developed web applications
|
|
- Collaborated with cross-functional teams
|
|
- Presented technical solutions to stakeholders
|
|
|
|
SKILLS
|
|
- Team Leadership & Management
|
|
- Python, JavaScript, SQL
|
|
- Project Management
|
|
- Public Speaking
|
|
- CRM Software
|
|
- Data Analysis
|
|
"""
|
|
|
|
# Target skills to match
|
|
target_skills = [
|
|
"Team leadership & management",
|
|
"CRM software",
|
|
"Project management",
|
|
"Public speaking",
|
|
"Microsoft Office",
|
|
"Python",
|
|
"Data Analysis"
|
|
]
|
|
|
|
# Prepare shared store
|
|
shared = {
|
|
"resume_text": sample_resume,
|
|
"target_skills": target_skills
|
|
}
|
|
|
|
# Create and run flow
|
|
parser_node = ResumeParserNode(max_retries=3, wait=10)
|
|
flow = Flow(start=parser_node)
|
|
flow.run(shared)
|
|
|
|
# Display matched skills
|
|
if "structured_data" in shared:
|
|
found_indexes = shared["structured_data"].get("skill_indexes", [])
|
|
if found_indexes:
|
|
print("\n--- Matched Target Skills ---")
|
|
for index in found_indexes:
|
|
if 0 <= index < len(target_skills):
|
|
print(f"✓ {target_skills[index]} (Index: {index})")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_parser()
|