Initial commit
This commit is contained in:
323
skills/consultant/scripts/file_handler.py
Normal file
323
skills/consultant/scripts/file_handler.py
Normal file
@@ -0,0 +1,323 @@
|
||||
"""
|
||||
File handling for consultant CLI.
|
||||
Categorizes and processes files: images, office documents, and text files.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import mimetypes
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from markitdown import MarkItDown
|
||||
|
||||
|
||||
class FileCategory(Enum):
|
||||
"""Categories of files the CLI can handle"""
|
||||
|
||||
IMAGE = "image"
|
||||
OFFICE = "office"
|
||||
TEXT = "text"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProcessedFile:
|
||||
"""Result of successfully processing a file"""
|
||||
|
||||
path: str
|
||||
category: FileCategory
|
||||
content: str = "" # For text/office: the text content
|
||||
base64_data: str = "" # For images: base64 encoded data
|
||||
mime_type: str = "" # For images: the MIME type
|
||||
|
||||
|
||||
@dataclass
|
||||
class FileError:
|
||||
"""Error details for a file that failed processing"""
|
||||
|
||||
path: str
|
||||
reason: str
|
||||
|
||||
|
||||
# File extension constants
|
||||
IMAGE_EXTENSIONS = frozenset({".png", ".jpg", ".jpeg", ".gif", ".webp"})
|
||||
OFFICE_EXTENSIONS = frozenset({".xls", ".xlsx", ".docx", ".pptx"})
|
||||
|
||||
# Size limits
|
||||
MAX_IMAGE_SIZE_BYTES = 20 * 1024 * 1024 # 20MB
|
||||
|
||||
|
||||
class FileHandler:
|
||||
"""Main file processing coordinator"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._markitdown = MarkItDown()
|
||||
|
||||
def process_files(
|
||||
self, file_paths: list[str]
|
||||
) -> tuple[list[ProcessedFile], list[FileError]]:
|
||||
"""
|
||||
Process a list of file paths and return categorized results.
|
||||
|
||||
Returns:
|
||||
Tuple of (successfully processed files, errors)
|
||||
"""
|
||||
processed: list[ProcessedFile] = []
|
||||
errors: list[FileError] = []
|
||||
|
||||
for file_path in file_paths:
|
||||
path = Path(file_path)
|
||||
|
||||
# Validate file exists
|
||||
if not path.exists():
|
||||
errors.append(FileError(path=str(path), reason="File not found"))
|
||||
continue
|
||||
|
||||
if not path.is_file():
|
||||
errors.append(FileError(path=str(path), reason="Not a file"))
|
||||
continue
|
||||
|
||||
# Categorize and process
|
||||
category = self._categorize(path)
|
||||
|
||||
if category == FileCategory.IMAGE:
|
||||
result = self._process_image(path)
|
||||
elif category == FileCategory.OFFICE:
|
||||
result = self._process_office(path)
|
||||
else: # FileCategory.TEXT
|
||||
result = self._process_text(path)
|
||||
|
||||
if isinstance(result, FileError):
|
||||
errors.append(result)
|
||||
else:
|
||||
processed.append(result)
|
||||
|
||||
return processed, errors
|
||||
|
||||
def _categorize(self, path: Path) -> FileCategory:
|
||||
"""Determine the category of a file based on extension"""
|
||||
suffix = path.suffix.lower()
|
||||
|
||||
if suffix in IMAGE_EXTENSIONS:
|
||||
return FileCategory.IMAGE
|
||||
|
||||
if suffix in OFFICE_EXTENSIONS:
|
||||
return FileCategory.OFFICE
|
||||
|
||||
# Default: assume text, will validate during processing
|
||||
return FileCategory.TEXT
|
||||
|
||||
def _process_image(self, path: Path) -> ProcessedFile | FileError:
|
||||
"""Process an image file: validate size and encode to base64"""
|
||||
try:
|
||||
# Read binary content
|
||||
data = path.read_bytes()
|
||||
|
||||
# Check size limit
|
||||
if len(data) > MAX_IMAGE_SIZE_BYTES:
|
||||
size_mb = len(data) / (1024 * 1024)
|
||||
max_mb = MAX_IMAGE_SIZE_BYTES / (1024 * 1024)
|
||||
return FileError(
|
||||
path=str(path),
|
||||
reason=f"Image too large: {size_mb:.1f}MB (max {max_mb:.0f}MB)",
|
||||
)
|
||||
|
||||
# Encode to base64
|
||||
base64_data = base64.b64encode(data).decode("utf-8")
|
||||
|
||||
# Determine MIME type
|
||||
mime_type, _ = mimetypes.guess_type(str(path))
|
||||
if not mime_type:
|
||||
# Fallback based on extension
|
||||
ext = path.suffix.lower()
|
||||
mime_map = {
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".gif": "image/gif",
|
||||
".webp": "image/webp",
|
||||
}
|
||||
mime_type = mime_map.get(ext, "application/octet-stream")
|
||||
|
||||
return ProcessedFile(
|
||||
path=str(path),
|
||||
category=FileCategory.IMAGE,
|
||||
base64_data=base64_data,
|
||||
mime_type=mime_type,
|
||||
)
|
||||
except Exception as e:
|
||||
return FileError(path=str(path), reason=f"Failed to process image: {e}")
|
||||
|
||||
def _process_office(self, path: Path) -> ProcessedFile | FileError:
|
||||
"""Process an office document using markitdown"""
|
||||
try:
|
||||
result = self._markitdown.convert(str(path))
|
||||
content = result.text_content
|
||||
|
||||
if not content or not content.strip():
|
||||
return FileError(
|
||||
path=str(path), reason="markitdown returned empty content"
|
||||
)
|
||||
|
||||
return ProcessedFile(
|
||||
path=str(path),
|
||||
category=FileCategory.OFFICE,
|
||||
content=content,
|
||||
)
|
||||
except Exception as e:
|
||||
return FileError(
|
||||
path=str(path), reason=f"markitdown conversion failed: {e}"
|
||||
)
|
||||
|
||||
def _process_text(self, path: Path) -> ProcessedFile | FileError:
|
||||
"""Process a text file: attempt UTF-8 decode"""
|
||||
try:
|
||||
content = path.read_text(encoding="utf-8")
|
||||
|
||||
# Check for empty or whitespace-only files
|
||||
if not content or not content.strip():
|
||||
return FileError(
|
||||
path=str(path),
|
||||
reason="File is empty or contains only whitespace",
|
||||
)
|
||||
|
||||
return ProcessedFile(
|
||||
path=str(path),
|
||||
category=FileCategory.TEXT,
|
||||
content=content,
|
||||
)
|
||||
except UnicodeDecodeError as e:
|
||||
return FileError(
|
||||
path=str(path),
|
||||
reason=f"Not a valid UTF-8 text file: {e}",
|
||||
)
|
||||
except Exception as e:
|
||||
return FileError(path=str(path), reason=f"Failed to read file: {e}")
|
||||
|
||||
|
||||
def validate_vision_support(model: str, has_images: bool) -> None:
|
||||
"""
|
||||
Validate that the model supports vision if images are present.
|
||||
Exits with code 2 if validation fails.
|
||||
"""
|
||||
if not has_images:
|
||||
return
|
||||
|
||||
from litellm import supports_vision
|
||||
|
||||
if not supports_vision(model=model):
|
||||
print(
|
||||
f"\nERROR: Model '{model}' does not support vision/images.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
"Image files were provided but the selected model cannot process them.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print("\nSuggestions:", file=sys.stderr)
|
||||
print(" 1. Use a vision-capable model:", file=sys.stderr)
|
||||
print(" - gpt-5.1, gpt-5-vision (OpenAI)", file=sys.stderr)
|
||||
print(
|
||||
" - claude-sonnet-4-5, claude-opus-4 (Anthropic)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
print(
|
||||
" - gemini/gemini-2.5-flash, gemini/gemini-3-pro-preview (Google)", file=sys.stderr
|
||||
)
|
||||
print(" 2. Remove image files from the request", file=sys.stderr)
|
||||
print(" 3. Convert images to text descriptions first\n", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
|
||||
def build_prompt_with_references(prompt: str, files: list[ProcessedFile]) -> str:
|
||||
"""
|
||||
Build the text portion of the prompt with Reference Files section.
|
||||
Does NOT include images (those go in the multimodal array).
|
||||
|
||||
Args:
|
||||
prompt: The user's original prompt
|
||||
files: List of successfully processed files
|
||||
|
||||
Returns:
|
||||
The full prompt with reference files section appended
|
||||
"""
|
||||
# Filter to text and office files only (images handled separately)
|
||||
text_content_files = [
|
||||
f for f in files if f.category in (FileCategory.TEXT, FileCategory.OFFICE)
|
||||
]
|
||||
|
||||
# Also get image files for the note
|
||||
image_files = [f for f in files if f.category == FileCategory.IMAGE]
|
||||
|
||||
if not text_content_files and not image_files:
|
||||
return prompt
|
||||
|
||||
parts = [prompt]
|
||||
|
||||
# Add reference files section if there are text/office files
|
||||
if text_content_files:
|
||||
parts.append("\n\n" + "=" * 80)
|
||||
parts.append("\n\n## Reference Files\n")
|
||||
|
||||
for file in text_content_files:
|
||||
parts.append(f"\n### {file.path}\n")
|
||||
parts.append(f"```\n{file.content}\n```\n")
|
||||
|
||||
# Add note about images if present
|
||||
if image_files:
|
||||
parts.append("\n\n" + "-" * 40)
|
||||
parts.append(
|
||||
f"\n*Note: {len(image_files)} image(s) attached for visual analysis.*\n"
|
||||
)
|
||||
for img in image_files:
|
||||
parts.append(f"- {img.path}\n")
|
||||
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def build_multimodal_content(
|
||||
text_prompt: str, files: list[ProcessedFile]
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Build multimodal content array for LLM APIs.
|
||||
|
||||
Uses the standard OpenAI Chat Completions format which is widely supported.
|
||||
Response strategies will convert to API-specific formats as needed.
|
||||
|
||||
Format:
|
||||
- Text: {"type": "text", "text": "..."}
|
||||
- Image: {"type": "image_url", "image_url": {"url": "data:...", "detail": "auto"}}
|
||||
|
||||
Args:
|
||||
text_prompt: The text portion of the prompt (with reference files)
|
||||
files: List of successfully processed files
|
||||
|
||||
Returns:
|
||||
Multimodal content array
|
||||
"""
|
||||
content: list[dict[str, Any]] = []
|
||||
|
||||
# Text content
|
||||
content.append({"type": "text", "text": text_prompt})
|
||||
|
||||
# Images with base64 data URLs
|
||||
for f in files:
|
||||
if f.category == FileCategory.IMAGE:
|
||||
content.append(
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:{f.mime_type};base64,{f.base64_data}",
|
||||
"detail": "auto",
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def has_images(files: list[ProcessedFile]) -> bool:
|
||||
"""Check if any processed files are images"""
|
||||
return any(f.category == FileCategory.IMAGE for f in files)
|
||||
Reference in New Issue
Block a user