Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:16:40 +08:00
commit f125e90b9f
370 changed files with 67769 additions and 0 deletions

View File

@@ -0,0 +1,138 @@
"""
Contact Information Extraction Example
Extracts structured contact information from unstructured text (emails, messages, etc.)
using JSON outputs mode with Pydantic schema validation.
"""
from pydantic import BaseModel, Field, EmailStr
from typing import Optional, List
from anthropic import Anthropic
import os
# Initialize client
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
# Define schema with Pydantic
class ContactInfo(BaseModel):
"""Structured contact information extracted from text."""
name: str = Field(description="Full name of the contact person")
email: EmailStr = Field(description="Email address")
phone: Optional[str] = Field(
None, description="Phone number in any format"
)
company: Optional[str] = Field(
None, description="Company or organization name"
)
plan_interest: Optional[str] = Field(
None, description="Product plan or tier they're interested in"
)
demo_requested: bool = Field(
False, description="Whether they requested a product demo"
)
tags: List[str] = Field(
default_factory=list,
description="Relevant tags or categories"
)
def extract_contact(text: str) -> Optional[ContactInfo]:
"""
Extract contact information from unstructured text.
Args:
text: Unstructured text containing contact information
Returns:
ContactInfo object with extracted data, or None if request refused
"""
try:
response = client.beta.messages.parse(
model="claude-sonnet-4-5",
max_tokens=1024,
betas=["structured-outputs-2025-11-13"],
messages=[{
"role": "user",
"content": f"Extract contact information from the following text:\n\n{text}"
}],
output_format=ContactInfo,
)
# Handle different stop reasons
if response.stop_reason == "refusal":
print(f"⚠️ Request refused for safety reasons")
return None
if response.stop_reason == "max_tokens":
print(f"⚠️ Response truncated - increase max_tokens")
return None
# Return validated contact info
return response.parsed_output
except Exception as e:
print(f"❌ Error extracting contact: {e}")
raise
def main():
"""Run contact extraction examples."""
examples = [
# Example 1: Complete contact info
"""
Hi, I'm John Smith from Acme Corp. You can reach me at john.smith@acme.com
or call me at (555) 123-4567. I'm interested in your Enterprise plan and
would love to schedule a demo next week.
""",
# Example 2: Minimal info
"""
Contact: jane.doe@example.com
""",
# Example 3: Informal message
"""
Hey! Bob here. Email me at bob@startup.io if you want to chat about
the Pro plan. Thanks!
""",
# Example 4: Multiple contacts (extracts first/primary)
"""
From: alice@company.com
CC: support@company.com
Hi, I'm Alice Johnson, VP of Engineering at TechCo.
We're evaluating your platform for our team of 50 developers.
""",
]
print("=" * 70)
print("Contact Extraction Examples")
print("=" * 70)
for i, text in enumerate(examples, 1):
print(f"\n📧 Example {i}:")
print(f"Input: {text.strip()[:100]}...")
contact = extract_contact(text)
if contact:
print(f"\n✅ Extracted Contact:")
print(f" Name: {contact.name}")
print(f" Email: {contact.email}")
print(f" Phone: {contact.phone or 'N/A'}")
print(f" Company: {contact.company or 'N/A'}")
print(f" Plan Interest: {contact.plan_interest or 'N/A'}")
print(f" Demo Requested: {contact.demo_requested}")
print(f" Tags: {', '.join(contact.tags) if contact.tags else 'None'}")
else:
print(f"\n❌ No contact extracted")
print("-" * 70)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,160 @@
"""
Invoice Data Extraction Example
Extracts structured invoice data from text using JSON outputs with nested schemas.
Demonstrates handling complex nested structures (line items, tax breakdown).
"""
from pydantic import BaseModel, Field
from typing import List
from decimal import Decimal
from anthropic import Anthropic
import os
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
# Nested schema for line items
class LineItem(BaseModel):
"""Individual line item on an invoice."""
description: str = Field(description="Item description")
quantity: int = Field(description="Quantity ordered")
unit_price: float = Field(description="Price per unit in USD")
total: float = Field(description="Total for this line (quantity * unit_price)")
class Invoice(BaseModel):
"""Complete invoice structure."""
invoice_number: str = Field(description="Invoice ID (format: INV-XXXXX)")
date: str = Field(description="Invoice date in YYYY-MM-DD format")
due_date: str = Field(description="Payment due date in YYYY-MM-DD format")
customer_name: str = Field(description="Customer or company name")
customer_email: str = Field(description="Customer email address")
line_items: List[LineItem] = Field(
description="List of items on the invoice"
)
subtotal: float = Field(description="Subtotal before tax in USD")
tax_rate: float = Field(description="Tax rate as decimal (e.g., 0.08 for 8%)")
tax_amount: float = Field(description="Tax amount in USD")
total_amount: float = Field(description="Final total amount in USD")
notes: str = Field(
default="",
description="Additional notes or payment instructions"
)
def extract_invoice(invoice_text: str) -> Optional[Invoice]:
"""Extract structured invoice data."""
try:
response = client.beta.messages.parse(
model="claude-sonnet-4-5",
max_tokens=2048, # Higher for complex nested structures
betas=["structured-outputs-2025-11-13"],
messages=[{
"role": "user",
"content": f"Extract all invoice data from:\n\n{invoice_text}"
}],
output_format=Invoice,
)
if response.stop_reason != "end_turn":
print(f"⚠️ Unexpected stop reason: {response.stop_reason}")
return None
return response.parsed_output
except Exception as e:
print(f"❌ Error: {e}")
raise
def main():
"""Run invoice extraction example."""
invoice_text = """
INVOICE
Invoice Number: INV-2024-00123
Date: 2024-01-15
Due Date: 2024-02-15
Bill To:
Acme Corporation
John Smith, CFO
john.smith@acme.com
ITEMS:
1. Cloud Hosting - Pro Plan (x3 servers)
Quantity: 3
Unit Price: $299.00
Total: $897.00
2. Database Storage - 500GB
Quantity: 500
Unit Price: $0.50
Total: $250.00
3. API Calls - Premium Tier
Quantity: 1,000,000
Unit Price: $0.001
Total: $1,000.00
4. Support - Enterprise Level
Quantity: 1
Unit Price: $500.00
Total: $500.00
Subtotal: $2,647.00
Tax (8.5%): $224.99
TOTAL: $2,871.99
Payment Terms: Net 30
Please remit payment to accounts@cloudprovider.com
"""
print("=" * 70)
print("Invoice Extraction Example")
print("=" * 70)
invoice = extract_invoice(invoice_text)
if invoice:
print(f"\n✅ Invoice Extracted Successfully\n")
print(f"Invoice #: {invoice.invoice_number}")
print(f"Customer: {invoice.customer_name} ({invoice.customer_email})")
print(f"Date: {invoice.date}")
print(f"Due: {invoice.due_date}")
print(f"\nLine Items:")
for i, item in enumerate(invoice.line_items, 1):
print(f" {i}. {item.description}")
print(f" Qty: {item.quantity} × ${item.unit_price:.2f} = ${item.total:.2f}")
print(f"\nSubtotal: ${invoice.subtotal:.2f}")
print(f"Tax ({invoice.tax_rate * 100:.1f}%): ${invoice.tax_amount:.2f}")
print(f"TOTAL: ${invoice.total_amount:.2f}")
if invoice.notes:
print(f"\nNotes: {invoice.notes}")
# Validation checks
print(f"\n🔍 Validation:")
calculated_subtotal = sum(item.total for item in invoice.line_items)
print(f" Subtotal matches: {abs(calculated_subtotal - invoice.subtotal) < 0.01}")
calculated_tax = invoice.subtotal * invoice.tax_rate
print(f" Tax calculation matches: {abs(calculated_tax - invoice.tax_amount) < 0.01}")
calculated_total = invoice.subtotal + invoice.tax_amount
print(f" Total matches: {abs(calculated_total - invoice.total_amount) < 0.01}")
else:
print("❌ Failed to extract invoice")
if __name__ == "__main__":
from typing import Optional
main()