Initial commit
This commit is contained in:
@@ -0,0 +1,138 @@
|
||||
"""
|
||||
Contact Information Extraction Example
|
||||
|
||||
Extracts structured contact information from unstructured text (emails, messages, etc.)
|
||||
using JSON outputs mode with Pydantic schema validation.
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field, EmailStr
|
||||
from typing import Optional, List
|
||||
from anthropic import Anthropic
|
||||
import os
|
||||
|
||||
# Initialize client
|
||||
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
|
||||
|
||||
|
||||
# Define schema with Pydantic
|
||||
class ContactInfo(BaseModel):
|
||||
"""Structured contact information extracted from text."""
|
||||
|
||||
name: str = Field(description="Full name of the contact person")
|
||||
email: EmailStr = Field(description="Email address")
|
||||
phone: Optional[str] = Field(
|
||||
None, description="Phone number in any format"
|
||||
)
|
||||
company: Optional[str] = Field(
|
||||
None, description="Company or organization name"
|
||||
)
|
||||
plan_interest: Optional[str] = Field(
|
||||
None, description="Product plan or tier they're interested in"
|
||||
)
|
||||
demo_requested: bool = Field(
|
||||
False, description="Whether they requested a product demo"
|
||||
)
|
||||
tags: List[str] = Field(
|
||||
default_factory=list,
|
||||
description="Relevant tags or categories"
|
||||
)
|
||||
|
||||
|
||||
def extract_contact(text: str) -> Optional[ContactInfo]:
|
||||
"""
|
||||
Extract contact information from unstructured text.
|
||||
|
||||
Args:
|
||||
text: Unstructured text containing contact information
|
||||
|
||||
Returns:
|
||||
ContactInfo object with extracted data, or None if request refused
|
||||
"""
|
||||
try:
|
||||
response = client.beta.messages.parse(
|
||||
model="claude-sonnet-4-5",
|
||||
max_tokens=1024,
|
||||
betas=["structured-outputs-2025-11-13"],
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": f"Extract contact information from the following text:\n\n{text}"
|
||||
}],
|
||||
output_format=ContactInfo,
|
||||
)
|
||||
|
||||
# Handle different stop reasons
|
||||
if response.stop_reason == "refusal":
|
||||
print(f"⚠️ Request refused for safety reasons")
|
||||
return None
|
||||
|
||||
if response.stop_reason == "max_tokens":
|
||||
print(f"⚠️ Response truncated - increase max_tokens")
|
||||
return None
|
||||
|
||||
# Return validated contact info
|
||||
return response.parsed_output
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error extracting contact: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
"""Run contact extraction examples."""
|
||||
|
||||
examples = [
|
||||
# Example 1: Complete contact info
|
||||
"""
|
||||
Hi, I'm John Smith from Acme Corp. You can reach me at john.smith@acme.com
|
||||
or call me at (555) 123-4567. I'm interested in your Enterprise plan and
|
||||
would love to schedule a demo next week.
|
||||
""",
|
||||
|
||||
# Example 2: Minimal info
|
||||
"""
|
||||
Contact: jane.doe@example.com
|
||||
""",
|
||||
|
||||
# Example 3: Informal message
|
||||
"""
|
||||
Hey! Bob here. Email me at bob@startup.io if you want to chat about
|
||||
the Pro plan. Thanks!
|
||||
""",
|
||||
|
||||
# Example 4: Multiple contacts (extracts first/primary)
|
||||
"""
|
||||
From: alice@company.com
|
||||
CC: support@company.com
|
||||
|
||||
Hi, I'm Alice Johnson, VP of Engineering at TechCo.
|
||||
We're evaluating your platform for our team of 50 developers.
|
||||
""",
|
||||
]
|
||||
|
||||
print("=" * 70)
|
||||
print("Contact Extraction Examples")
|
||||
print("=" * 70)
|
||||
|
||||
for i, text in enumerate(examples, 1):
|
||||
print(f"\n📧 Example {i}:")
|
||||
print(f"Input: {text.strip()[:100]}...")
|
||||
|
||||
contact = extract_contact(text)
|
||||
|
||||
if contact:
|
||||
print(f"\n✅ Extracted Contact:")
|
||||
print(f" Name: {contact.name}")
|
||||
print(f" Email: {contact.email}")
|
||||
print(f" Phone: {contact.phone or 'N/A'}")
|
||||
print(f" Company: {contact.company or 'N/A'}")
|
||||
print(f" Plan Interest: {contact.plan_interest or 'N/A'}")
|
||||
print(f" Demo Requested: {contact.demo_requested}")
|
||||
print(f" Tags: {', '.join(contact.tags) if contact.tags else 'None'}")
|
||||
else:
|
||||
print(f"\n❌ No contact extracted")
|
||||
|
||||
print("-" * 70)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,160 @@
|
||||
"""
|
||||
Invoice Data Extraction Example
|
||||
|
||||
Extracts structured invoice data from text using JSON outputs with nested schemas.
|
||||
Demonstrates handling complex nested structures (line items, tax breakdown).
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
from decimal import Decimal
|
||||
from anthropic import Anthropic
|
||||
import os
|
||||
|
||||
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
|
||||
|
||||
|
||||
# Nested schema for line items
|
||||
class LineItem(BaseModel):
|
||||
"""Individual line item on an invoice."""
|
||||
description: str = Field(description="Item description")
|
||||
quantity: int = Field(description="Quantity ordered")
|
||||
unit_price: float = Field(description="Price per unit in USD")
|
||||
total: float = Field(description="Total for this line (quantity * unit_price)")
|
||||
|
||||
|
||||
class Invoice(BaseModel):
|
||||
"""Complete invoice structure."""
|
||||
invoice_number: str = Field(description="Invoice ID (format: INV-XXXXX)")
|
||||
date: str = Field(description="Invoice date in YYYY-MM-DD format")
|
||||
due_date: str = Field(description="Payment due date in YYYY-MM-DD format")
|
||||
customer_name: str = Field(description="Customer or company name")
|
||||
customer_email: str = Field(description="Customer email address")
|
||||
|
||||
line_items: List[LineItem] = Field(
|
||||
description="List of items on the invoice"
|
||||
)
|
||||
|
||||
subtotal: float = Field(description="Subtotal before tax in USD")
|
||||
tax_rate: float = Field(description="Tax rate as decimal (e.g., 0.08 for 8%)")
|
||||
tax_amount: float = Field(description="Tax amount in USD")
|
||||
total_amount: float = Field(description="Final total amount in USD")
|
||||
|
||||
notes: str = Field(
|
||||
default="",
|
||||
description="Additional notes or payment instructions"
|
||||
)
|
||||
|
||||
|
||||
def extract_invoice(invoice_text: str) -> Optional[Invoice]:
|
||||
"""Extract structured invoice data."""
|
||||
try:
|
||||
response = client.beta.messages.parse(
|
||||
model="claude-sonnet-4-5",
|
||||
max_tokens=2048, # Higher for complex nested structures
|
||||
betas=["structured-outputs-2025-11-13"],
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": f"Extract all invoice data from:\n\n{invoice_text}"
|
||||
}],
|
||||
output_format=Invoice,
|
||||
)
|
||||
|
||||
if response.stop_reason != "end_turn":
|
||||
print(f"⚠️ Unexpected stop reason: {response.stop_reason}")
|
||||
return None
|
||||
|
||||
return response.parsed_output
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
"""Run invoice extraction example."""
|
||||
|
||||
invoice_text = """
|
||||
INVOICE
|
||||
|
||||
Invoice Number: INV-2024-00123
|
||||
Date: 2024-01-15
|
||||
Due Date: 2024-02-15
|
||||
|
||||
Bill To:
|
||||
Acme Corporation
|
||||
John Smith, CFO
|
||||
john.smith@acme.com
|
||||
|
||||
ITEMS:
|
||||
1. Cloud Hosting - Pro Plan (x3 servers)
|
||||
Quantity: 3
|
||||
Unit Price: $299.00
|
||||
Total: $897.00
|
||||
|
||||
2. Database Storage - 500GB
|
||||
Quantity: 500
|
||||
Unit Price: $0.50
|
||||
Total: $250.00
|
||||
|
||||
3. API Calls - Premium Tier
|
||||
Quantity: 1,000,000
|
||||
Unit Price: $0.001
|
||||
Total: $1,000.00
|
||||
|
||||
4. Support - Enterprise Level
|
||||
Quantity: 1
|
||||
Unit Price: $500.00
|
||||
Total: $500.00
|
||||
|
||||
Subtotal: $2,647.00
|
||||
Tax (8.5%): $224.99
|
||||
TOTAL: $2,871.99
|
||||
|
||||
Payment Terms: Net 30
|
||||
Please remit payment to accounts@cloudprovider.com
|
||||
"""
|
||||
|
||||
print("=" * 70)
|
||||
print("Invoice Extraction Example")
|
||||
print("=" * 70)
|
||||
|
||||
invoice = extract_invoice(invoice_text)
|
||||
|
||||
if invoice:
|
||||
print(f"\n✅ Invoice Extracted Successfully\n")
|
||||
print(f"Invoice #: {invoice.invoice_number}")
|
||||
print(f"Customer: {invoice.customer_name} ({invoice.customer_email})")
|
||||
print(f"Date: {invoice.date}")
|
||||
print(f"Due: {invoice.due_date}")
|
||||
print(f"\nLine Items:")
|
||||
|
||||
for i, item in enumerate(invoice.line_items, 1):
|
||||
print(f" {i}. {item.description}")
|
||||
print(f" Qty: {item.quantity} × ${item.unit_price:.2f} = ${item.total:.2f}")
|
||||
|
||||
print(f"\nSubtotal: ${invoice.subtotal:.2f}")
|
||||
print(f"Tax ({invoice.tax_rate * 100:.1f}%): ${invoice.tax_amount:.2f}")
|
||||
print(f"TOTAL: ${invoice.total_amount:.2f}")
|
||||
|
||||
if invoice.notes:
|
||||
print(f"\nNotes: {invoice.notes}")
|
||||
|
||||
# Validation checks
|
||||
print(f"\n🔍 Validation:")
|
||||
calculated_subtotal = sum(item.total for item in invoice.line_items)
|
||||
print(f" Subtotal matches: {abs(calculated_subtotal - invoice.subtotal) < 0.01}")
|
||||
|
||||
calculated_tax = invoice.subtotal * invoice.tax_rate
|
||||
print(f" Tax calculation matches: {abs(calculated_tax - invoice.tax_amount) < 0.01}")
|
||||
|
||||
calculated_total = invoice.subtotal + invoice.tax_amount
|
||||
print(f" Total matches: {abs(calculated_total - invoice.total_amount) < 0.01}")
|
||||
|
||||
else:
|
||||
print("❌ Failed to extract invoice")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from typing import Optional
|
||||
main()
|
||||
Reference in New Issue
Block a user