Files
2025-11-29 18:16:46 +08:00

161 lines
4.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Invoice Data Extraction Example
Extracts structured invoice data from text using JSON outputs with nested schemas.
Demonstrates handling complex nested structures (line items, tax breakdown).
"""
from pydantic import BaseModel, Field
from typing import List
from decimal import Decimal
from anthropic import Anthropic
import os
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
# Nested schema for line items
class LineItem(BaseModel):
"""Individual line item on an invoice."""
description: str = Field(description="Item description")
quantity: int = Field(description="Quantity ordered")
unit_price: float = Field(description="Price per unit in USD")
total: float = Field(description="Total for this line (quantity * unit_price)")
class Invoice(BaseModel):
"""Complete invoice structure."""
invoice_number: str = Field(description="Invoice ID (format: INV-XXXXX)")
date: str = Field(description="Invoice date in YYYY-MM-DD format")
due_date: str = Field(description="Payment due date in YYYY-MM-DD format")
customer_name: str = Field(description="Customer or company name")
customer_email: str = Field(description="Customer email address")
line_items: List[LineItem] = Field(
description="List of items on the invoice"
)
subtotal: float = Field(description="Subtotal before tax in USD")
tax_rate: float = Field(description="Tax rate as decimal (e.g., 0.08 for 8%)")
tax_amount: float = Field(description="Tax amount in USD")
total_amount: float = Field(description="Final total amount in USD")
notes: str = Field(
default="",
description="Additional notes or payment instructions"
)
def extract_invoice(invoice_text: str) -> Optional[Invoice]:
"""Extract structured invoice data."""
try:
response = client.beta.messages.parse(
model="claude-sonnet-4-5",
max_tokens=2048, # Higher for complex nested structures
betas=["structured-outputs-2025-11-13"],
messages=[{
"role": "user",
"content": f"Extract all invoice data from:\n\n{invoice_text}"
}],
output_format=Invoice,
)
if response.stop_reason != "end_turn":
print(f"⚠️ Unexpected stop reason: {response.stop_reason}")
return None
return response.parsed_output
except Exception as e:
print(f"❌ Error: {e}")
raise
def main():
"""Run invoice extraction example."""
invoice_text = """
INVOICE
Invoice Number: INV-2024-00123
Date: 2024-01-15
Due Date: 2024-02-15
Bill To:
Acme Corporation
John Smith, CFO
john.smith@acme.com
ITEMS:
1. Cloud Hosting - Pro Plan (x3 servers)
Quantity: 3
Unit Price: $299.00
Total: $897.00
2. Database Storage - 500GB
Quantity: 500
Unit Price: $0.50
Total: $250.00
3. API Calls - Premium Tier
Quantity: 1,000,000
Unit Price: $0.001
Total: $1,000.00
4. Support - Enterprise Level
Quantity: 1
Unit Price: $500.00
Total: $500.00
Subtotal: $2,647.00
Tax (8.5%): $224.99
TOTAL: $2,871.99
Payment Terms: Net 30
Please remit payment to accounts@cloudprovider.com
"""
print("=" * 70)
print("Invoice Extraction Example")
print("=" * 70)
invoice = extract_invoice(invoice_text)
if invoice:
print(f"\n✅ Invoice Extracted Successfully\n")
print(f"Invoice #: {invoice.invoice_number}")
print(f"Customer: {invoice.customer_name} ({invoice.customer_email})")
print(f"Date: {invoice.date}")
print(f"Due: {invoice.due_date}")
print(f"\nLine Items:")
for i, item in enumerate(invoice.line_items, 1):
print(f" {i}. {item.description}")
print(f" Qty: {item.quantity} × ${item.unit_price:.2f} = ${item.total:.2f}")
print(f"\nSubtotal: ${invoice.subtotal:.2f}")
print(f"Tax ({invoice.tax_rate * 100:.1f}%): ${invoice.tax_amount:.2f}")
print(f"TOTAL: ${invoice.total_amount:.2f}")
if invoice.notes:
print(f"\nNotes: {invoice.notes}")
# Validation checks
print(f"\n🔍 Validation:")
calculated_subtotal = sum(item.total for item in invoice.line_items)
print(f" Subtotal matches: {abs(calculated_subtotal - invoice.subtotal) < 0.01}")
calculated_tax = invoice.subtotal * invoice.tax_rate
print(f" Tax calculation matches: {abs(calculated_tax - invoice.tax_amount) < 0.01}")
calculated_total = invoice.subtotal + invoice.tax_amount
print(f" Total matches: {abs(calculated_total - invoice.total_amount) < 0.01}")
else:
print("❌ Failed to extract invoice")
if __name__ == "__main__":
from typing import Optional
main()