161 lines
4.8 KiB
Python
161 lines
4.8 KiB
Python
"""
|
||
Invoice Data Extraction Example
|
||
|
||
Extracts structured invoice data from text using JSON outputs with nested schemas.
|
||
Demonstrates handling complex nested structures (line items, tax breakdown).
|
||
"""
|
||
|
||
from pydantic import BaseModel, Field
|
||
from typing import List
|
||
from decimal import Decimal
|
||
from anthropic import Anthropic
|
||
import os
|
||
|
||
client = Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
|
||
|
||
|
||
# Nested schema for line items
|
||
class LineItem(BaseModel):
|
||
"""Individual line item on an invoice."""
|
||
description: str = Field(description="Item description")
|
||
quantity: int = Field(description="Quantity ordered")
|
||
unit_price: float = Field(description="Price per unit in USD")
|
||
total: float = Field(description="Total for this line (quantity * unit_price)")
|
||
|
||
|
||
class Invoice(BaseModel):
|
||
"""Complete invoice structure."""
|
||
invoice_number: str = Field(description="Invoice ID (format: INV-XXXXX)")
|
||
date: str = Field(description="Invoice date in YYYY-MM-DD format")
|
||
due_date: str = Field(description="Payment due date in YYYY-MM-DD format")
|
||
customer_name: str = Field(description="Customer or company name")
|
||
customer_email: str = Field(description="Customer email address")
|
||
|
||
line_items: List[LineItem] = Field(
|
||
description="List of items on the invoice"
|
||
)
|
||
|
||
subtotal: float = Field(description="Subtotal before tax in USD")
|
||
tax_rate: float = Field(description="Tax rate as decimal (e.g., 0.08 for 8%)")
|
||
tax_amount: float = Field(description="Tax amount in USD")
|
||
total_amount: float = Field(description="Final total amount in USD")
|
||
|
||
notes: str = Field(
|
||
default="",
|
||
description="Additional notes or payment instructions"
|
||
)
|
||
|
||
|
||
def extract_invoice(invoice_text: str) -> Optional[Invoice]:
|
||
"""Extract structured invoice data."""
|
||
try:
|
||
response = client.beta.messages.parse(
|
||
model="claude-sonnet-4-5",
|
||
max_tokens=2048, # Higher for complex nested structures
|
||
betas=["structured-outputs-2025-11-13"],
|
||
messages=[{
|
||
"role": "user",
|
||
"content": f"Extract all invoice data from:\n\n{invoice_text}"
|
||
}],
|
||
output_format=Invoice,
|
||
)
|
||
|
||
if response.stop_reason != "end_turn":
|
||
print(f"⚠️ Unexpected stop reason: {response.stop_reason}")
|
||
return None
|
||
|
||
return response.parsed_output
|
||
|
||
except Exception as e:
|
||
print(f"❌ Error: {e}")
|
||
raise
|
||
|
||
|
||
def main():
|
||
"""Run invoice extraction example."""
|
||
|
||
invoice_text = """
|
||
INVOICE
|
||
|
||
Invoice Number: INV-2024-00123
|
||
Date: 2024-01-15
|
||
Due Date: 2024-02-15
|
||
|
||
Bill To:
|
||
Acme Corporation
|
||
John Smith, CFO
|
||
john.smith@acme.com
|
||
|
||
ITEMS:
|
||
1. Cloud Hosting - Pro Plan (x3 servers)
|
||
Quantity: 3
|
||
Unit Price: $299.00
|
||
Total: $897.00
|
||
|
||
2. Database Storage - 500GB
|
||
Quantity: 500
|
||
Unit Price: $0.50
|
||
Total: $250.00
|
||
|
||
3. API Calls - Premium Tier
|
||
Quantity: 1,000,000
|
||
Unit Price: $0.001
|
||
Total: $1,000.00
|
||
|
||
4. Support - Enterprise Level
|
||
Quantity: 1
|
||
Unit Price: $500.00
|
||
Total: $500.00
|
||
|
||
Subtotal: $2,647.00
|
||
Tax (8.5%): $224.99
|
||
TOTAL: $2,871.99
|
||
|
||
Payment Terms: Net 30
|
||
Please remit payment to accounts@cloudprovider.com
|
||
"""
|
||
|
||
print("=" * 70)
|
||
print("Invoice Extraction Example")
|
||
print("=" * 70)
|
||
|
||
invoice = extract_invoice(invoice_text)
|
||
|
||
if invoice:
|
||
print(f"\n✅ Invoice Extracted Successfully\n")
|
||
print(f"Invoice #: {invoice.invoice_number}")
|
||
print(f"Customer: {invoice.customer_name} ({invoice.customer_email})")
|
||
print(f"Date: {invoice.date}")
|
||
print(f"Due: {invoice.due_date}")
|
||
print(f"\nLine Items:")
|
||
|
||
for i, item in enumerate(invoice.line_items, 1):
|
||
print(f" {i}. {item.description}")
|
||
print(f" Qty: {item.quantity} × ${item.unit_price:.2f} = ${item.total:.2f}")
|
||
|
||
print(f"\nSubtotal: ${invoice.subtotal:.2f}")
|
||
print(f"Tax ({invoice.tax_rate * 100:.1f}%): ${invoice.tax_amount:.2f}")
|
||
print(f"TOTAL: ${invoice.total_amount:.2f}")
|
||
|
||
if invoice.notes:
|
||
print(f"\nNotes: {invoice.notes}")
|
||
|
||
# Validation checks
|
||
print(f"\n🔍 Validation:")
|
||
calculated_subtotal = sum(item.total for item in invoice.line_items)
|
||
print(f" Subtotal matches: {abs(calculated_subtotal - invoice.subtotal) < 0.01}")
|
||
|
||
calculated_tax = invoice.subtotal * invoice.tax_rate
|
||
print(f" Tax calculation matches: {abs(calculated_tax - invoice.tax_amount) < 0.01}")
|
||
|
||
calculated_total = invoice.subtotal + invoice.tax_amount
|
||
print(f" Total matches: {abs(calculated_total - invoice.total_amount) < 0.01}")
|
||
|
||
else:
|
||
print("❌ Failed to extract invoice")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
from typing import Optional
|
||
main()
|