Initial commit
This commit is contained in:
235
hooks/handlers/transcript_parser.rb
Executable file
235
hooks/handlers/transcript_parser.rb
Executable file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/env ruby
|
||||
# frozen_string_literal: true
|
||||
|
||||
# TranscriptParser - Defensive JSON parsing utilities for Claude Code transcripts
|
||||
#
|
||||
# PURPOSE: Provide safe, robust parsing of Claude Code transcript JSON data
|
||||
# HANDLES: Variable content formats (String vs Array), malformed data, type validation
|
||||
#
|
||||
# KEY FEATURES:
|
||||
# - Defensive type checking before processing
|
||||
# - Graceful degradation for unexpected formats
|
||||
# - Comprehensive error handling with meaningful messages
|
||||
# - Ruby best practices for type validation
|
||||
# - Extensible for future transcript format changes
|
||||
|
||||
module TranscriptParser
|
||||
# Custom errors for specific parsing scenarios
|
||||
class TranscriptParseError < StandardError; end
|
||||
class MalformedTranscriptError < TranscriptParseError; end
|
||||
class UnsupportedContentFormatError < TranscriptParseError; end
|
||||
class MissingRequiredFieldError < TranscriptParseError; end
|
||||
|
||||
# Main entry point for parsing transcript JSON data
|
||||
# @param data [Hash] The parsed JSON data from transcript
|
||||
# @param strict [Boolean] Whether to raise errors on malformed data (default: false)
|
||||
# @return [Hash] Normalized data structure with validated content
|
||||
def self.parse_transcript_entry(data, strict: false)
|
||||
validate_transcript_structure!(data)
|
||||
|
||||
result = {
|
||||
request_id: extract_request_id(data),
|
||||
timestamp: extract_timestamp(data),
|
||||
message: parse_message(data['message'], strict: strict)
|
||||
}
|
||||
|
||||
# Add optional fields if present
|
||||
result[:tool_name] = data['toolName'] if data.key?('toolName')
|
||||
result[:tool_input] = data['toolInput'] if data.key?('toolInput')
|
||||
result[:tool_response] = data['toolResponse'] if data.key?('toolResponse')
|
||||
|
||||
result
|
||||
rescue StandardError => e
|
||||
raise TranscriptParseError, "Failed to parse transcript entry: #{e.message}" if strict
|
||||
|
||||
# Return safe fallback structure
|
||||
{
|
||||
request_id: data['requestId'] || 'unknown',
|
||||
timestamp: data['timestamp'] || Time.now.to_f,
|
||||
message: { role: 'unknown', content: '', type: 'fallback' },
|
||||
parse_error: e.message
|
||||
}
|
||||
end
|
||||
|
||||
# Extract and normalize message content with defensive type handling
|
||||
# @param message [Hash] The message object from transcript data
|
||||
# @param strict [Boolean] Whether to raise errors on unsupported formats
|
||||
# @return [Hash] Normalized message with extracted content
|
||||
def self.parse_message(message, strict: false)
|
||||
validate_message_structure!(message)
|
||||
|
||||
role = extract_role(message)
|
||||
content = extract_content(message['content'], strict: strict)
|
||||
|
||||
{
|
||||
role: role,
|
||||
content: content[:text],
|
||||
content_type: content[:type],
|
||||
raw_content: message['content'] # Preserve original for debugging
|
||||
}
|
||||
end
|
||||
|
||||
# Safely extract text content from variable format content field
|
||||
# @param content_data [String, Array, Object] The content field from message
|
||||
# @param strict [Boolean] Whether to raise errors on unsupported formats
|
||||
# @return [Hash] Hash with :text and :type keys
|
||||
def self.extract_content(content_data, strict: false)
|
||||
case content_data
|
||||
when String
|
||||
{ text: content_data, type: 'string' }
|
||||
when Array
|
||||
{ text: extract_content_from_array(content_data), type: 'array' }
|
||||
when Hash
|
||||
{ text: extract_content_from_hash(content_data), type: 'hash' }
|
||||
when nil
|
||||
{ text: '', type: 'nil' }
|
||||
else
|
||||
if strict
|
||||
raise UnsupportedContentFormatError,
|
||||
"Unsupported content format: #{content_data.class} - #{content_data.inspect}"
|
||||
else
|
||||
{ text: content_data.to_s, type: 'converted' }
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Extract text from array-based content (Claude's structured message format)
|
||||
# @param content_array [Array] Array of content blocks
|
||||
# @return [String] Concatenated text content
|
||||
def self.extract_content_from_array(content_array)
|
||||
return '' unless content_array.is_a?(Array)
|
||||
|
||||
content_array.filter_map do |block|
|
||||
case block
|
||||
when Hash
|
||||
# Skip tool_use entries as they don't contain displayable text
|
||||
next if block['type'] == 'tool_use' || block[:type] == 'tool_use'
|
||||
|
||||
# Handle structured content blocks (e.g., {type: 'text', text: 'content'})
|
||||
block['text'] || block[:text] || block.dig('content', 'text') ||
|
||||
block.values.find { |v| v.is_a?(String) && v.length.positive? }
|
||||
when String
|
||||
block
|
||||
else
|
||||
block.to_s if block.respond_to?(:to_s)
|
||||
end
|
||||
end.join('')
|
||||
end
|
||||
|
||||
# Extract text from hash-based content
|
||||
# @param content_hash [Hash] Hash containing content data
|
||||
# @return [String] Extracted text content
|
||||
def self.extract_content_from_hash(content_hash)
|
||||
return '' unless content_hash.is_a?(Hash)
|
||||
|
||||
# Try common content keys in order of preference
|
||||
%w[text content message body data value].each do |key|
|
||||
return content_hash[key] if content_hash.key?(key) && content_hash[key].is_a?(String)
|
||||
end
|
||||
|
||||
# Fallback: convert whole hash to string for debugging
|
||||
content_hash.to_s
|
||||
end
|
||||
|
||||
# Validate basic transcript entry structure
|
||||
# @param data [Hash] The transcript entry data
|
||||
# @raise [MalformedTranscriptError] If required fields are missing
|
||||
def self.validate_transcript_structure!(data)
|
||||
raise MalformedTranscriptError, 'Data must be a Hash' unless data.is_a?(Hash)
|
||||
|
||||
return if data.key?('message')
|
||||
|
||||
raise MissingRequiredFieldError, "Missing required 'message' field"
|
||||
end
|
||||
|
||||
# Validate message structure
|
||||
# @param message [Hash] The message object
|
||||
# @raise [MalformedTranscriptError] If message structure is invalid
|
||||
def self.validate_message_structure!(message)
|
||||
raise MalformedTranscriptError, 'Message must be a Hash' unless message.is_a?(Hash)
|
||||
|
||||
raise MissingRequiredFieldError, "Missing required 'role' field in message" unless message.key?('role')
|
||||
|
||||
return if message.key?('content')
|
||||
|
||||
raise MissingRequiredFieldError, "Missing required 'content' field in message"
|
||||
end
|
||||
|
||||
# Extract request ID with fallback
|
||||
# @param data [Hash] The transcript entry data
|
||||
# @return [String] Request ID or generated fallback
|
||||
def self.extract_request_id(data)
|
||||
data['requestId'] || data['request_id'] || "req_#{Time.now.to_f}"
|
||||
end
|
||||
|
||||
# Extract timestamp with fallback
|
||||
# @param data [Hash] The transcript entry data
|
||||
# @return [Float] Timestamp or current time
|
||||
def self.extract_timestamp(data)
|
||||
timestamp = data['timestamp'] || data['created_at'] || Time.now.to_f
|
||||
timestamp.is_a?(Numeric) ? timestamp : Time.now.to_f
|
||||
end
|
||||
|
||||
# Extract role with validation
|
||||
# @param message [Hash] The message object
|
||||
# @return [String] The role (user, assistant, system)
|
||||
def self.extract_role(message)
|
||||
role = message['role'] || message[:role]
|
||||
return role if %w[user assistant system].include?(role)
|
||||
|
||||
# Fallback role detection
|
||||
'unknown'
|
||||
end
|
||||
|
||||
# Batch process multiple transcript entries with error isolation
|
||||
# @param lines [Array<String>] Array of JSON strings from transcript file
|
||||
# @param strict [Boolean] Whether to raise errors on malformed entries
|
||||
# @return [Array<Hash>] Array of parsed entries, with errors isolated
|
||||
def self.parse_transcript_lines(lines, strict: false)
|
||||
results = []
|
||||
|
||||
lines.each_with_index do |line, index|
|
||||
next if line.nil? || line.strip.empty?
|
||||
|
||||
begin
|
||||
data = JSON.parse(line.strip)
|
||||
results << parse_transcript_entry(data, strict: strict)
|
||||
rescue JSON::ParserError => e
|
||||
error_entry = {
|
||||
line_number: index + 1,
|
||||
parse_error: "JSON parse error: #{e.message}",
|
||||
raw_line: line.strip[0..100] + (line.length > 100 ? '...' : ''),
|
||||
message: { role: 'error', content: '', type: 'json_error' }
|
||||
}
|
||||
|
||||
raise TranscriptParseError, "JSON parse error on line #{index + 1}: #{e.message}" if strict
|
||||
|
||||
results << error_entry
|
||||
rescue StandardError => e
|
||||
error_entry = {
|
||||
line_number: index + 1,
|
||||
parse_error: "Unexpected error: #{e.message}",
|
||||
raw_line: line.strip[0..100] + (line.length > 100 ? '...' : ''),
|
||||
message: { role: 'error', content: '', type: 'parse_error' }
|
||||
}
|
||||
|
||||
raise TranscriptParseError, "Parse error on line #{index + 1}: #{e.message}" if strict
|
||||
|
||||
results << error_entry
|
||||
end
|
||||
end
|
||||
|
||||
results
|
||||
end
|
||||
|
||||
# Helper method to safely read and parse transcript file
|
||||
# @param file_path [String] Path to transcript file
|
||||
# @param strict [Boolean] Whether to raise errors on malformed data
|
||||
# @return [Array<Hash>] Array of parsed transcript entries
|
||||
def self.parse_transcript_file(file_path, strict: false)
|
||||
raise TranscriptParseError, "Transcript file not found: #{file_path}" unless File.exist?(file_path)
|
||||
|
||||
lines = File.readlines(file_path, chomp: true)
|
||||
parse_transcript_lines(lines, strict: strict)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user