#!/usr/bin/env -S uv run --script # /// script # requires-python = "~=3.12" # dependencies = [ # "orjson", # "rich", # "typer", # "httpx", # ] # /// from __future__ import annotations import sys from datetime import datetime from pathlib import Path from typing import Any, NotRequired, TypedDict import httpx # pyright: ignore[reportMissingImports] import orjson # pyright: ignore[reportMissingImports] import typer # pyright: ignore[reportMissingImports] from rich.console import Console # pyright: ignore[reportMissingImports] from rich.markdown import Markdown # pyright: ignore[reportMissingImports] app = typer.Typer() console = Console() LITELLM_PRICING_URL = ( "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json" ) class ModelPricing(TypedDict): input_cost_per_token: NotRequired[float] output_cost_per_token: NotRequired[float] cache_creation_input_token_cost: NotRequired[float] cache_read_input_token_cost: NotRequired[float] litellm_provider: str mode: str def escape_xml_tags(text: str) -> str: """Escape XML/HTML tags so they display in markdown.""" return text.replace("<", r"\<").replace(">", r"\>") def parse_timestamp(ts: str) -> str: try: dt = datetime.fromisoformat(ts.replace("Z", "+00:00")) local_dt = dt.astimezone() return local_dt.strftime("%Y-%m-%d %H:%M:%S %Z") except (ValueError, AttributeError): return ts def parse_timestamp_to_datetime(ts: str) -> datetime | None: try: return datetime.fromisoformat(ts.replace("Z", "+00:00")) except (ValueError, AttributeError): return None def format_duration(seconds: float) -> str: if seconds < 60: return f"{seconds:.0f}s" elif seconds < 3600: minutes = seconds // 60 secs = seconds % 60 return f"{int(minutes)}m {int(secs)}s" else: hours = seconds // 3600 minutes = (seconds % 3600) // 60 secs = seconds % 60 if secs > 0: return f"{int(hours)}h {int(minutes)}m {int(secs)}s" else: return f"{int(hours)}h {int(minutes)}m" def format_tool_parameters(params: dict[str, Any]) -> str: if not params: return "_No parameters_" lines = [] for key, value in params.items(): match value: case str(): if len(value) > 100: lines.append(f"**{key}**: `{value[:100]}...`") else: lines.append(f"**{key}**: `{value}`") case list() | dict(): value_json = orjson.dumps(value, option=orjson.OPT_INDENT_2).decode() if len(value_json) > 100: lines.append(f"**{key}**: `{value_json[:100]}...`") else: lines.append(f"**{key}**: `{value_json}`") case _: lines.append(f"**{key}**: `{value}`") return "\n\n".join(lines) def format_tool_result(result: dict[str, Any]) -> str: content = result.get("content", "") is_error = result.get("is_error", False) if is_error: return f"❌ **Error**:\n```\n{content}\n```" match content: case str(): return f"```\n{content}\n```" case _: return f"```json\n{orjson.dumps(content, option=orjson.OPT_INDENT_2).decode()}\n```" def build_tool_map(messages: list[dict[str, Any]]) -> dict[str, dict[str, Any]]: tool_map: dict[str, dict[str, Any]] = {} for msg in messages: if msg.get("type") == "user": content = msg.get("message", {}).get("content", []) if isinstance(content, list): for item in content: if isinstance(item, dict) and item.get("type") == "tool_result": tool_use_id = item.get("tool_use_id") if tool_use_id: tool_map[tool_use_id] = item return tool_map def fetch_pricing_data() -> dict[str, ModelPricing]: with httpx.Client(timeout=30.0) as client: response = client.get(LITELLM_PRICING_URL) response.raise_for_status() data = orjson.loads(response.content) if "sample_spec" in data: del data["sample_spec"] return data def find_last_share_command_index(messages: list[dict[str, Any]]) -> int | None: for i in range(len(messages) - 1, -1, -1): msg = messages[i] if msg.get("type") == "compact_marker": continue if msg.get("type") != "user": continue message_data = msg.get("message", {}) content = message_data.get("content", []) match content: case str(): if ( "/share" in content or "/cc-plus:share" in content ): return i case list(): for item in content: if isinstance(item, dict): text = item.get("text", "") if ( "/share" in text or "/cc-plus:share" in text ): return i return None def calculate_message_cost( usage: dict[str, Any], model: str, pricing_data: dict[str, ModelPricing] ) -> tuple[float, dict[str, int]]: pricing = pricing_data.get(model) if not pricing: return 0.0, {} input_tokens = usage.get("input_tokens", 0) output_tokens = usage.get("output_tokens", 0) cache_creation_tokens = usage.get("cache_creation_input_tokens", 0) cache_read_tokens = usage.get("cache_read_input_tokens", 0) input_rate = pricing.get("input_cost_per_token", 0.0) output_rate = pricing.get("output_cost_per_token", 0.0) cache_creation_rate = pricing.get("cache_creation_input_token_cost", 0.0) cache_read_rate = pricing.get("cache_read_input_token_cost", 0.0) total_cost = ( input_tokens * input_rate + output_tokens * output_rate + cache_creation_tokens * cache_creation_rate + cache_read_tokens * cache_read_rate ) token_breakdown = { "input": input_tokens, "output": output_tokens, "cache_creation": cache_creation_tokens, "cache_read": cache_read_tokens, } return total_cost, token_breakdown def convert_transcript_to_markdown(transcript_path: Path, output_path: Path | None = None) -> None: if not transcript_path.exists(): console.print(f"[red]Error: Transcript file not found: {transcript_path}[/red]") sys.exit(1) messages: list[dict[str, Any]] = [] with transcript_path.open("rb") as f: for line in f: if line.strip(): try: data = orjson.loads(line) if data.get("type") in ("user", "assistant", "compact_marker"): messages.append(data) except orjson.JSONDecodeError: continue if not messages: console.print("[yellow]No messages found in transcript[/yellow]") sys.exit(0) last_share_index = find_last_share_command_index(messages) if last_share_index is not None: messages = messages[:last_share_index] console.print( f"[yellow]📍 Truncating before /share command (excluded message #{last_share_index + 1})[/yellow]" ) build_tool_map(messages) console.print("[cyan]Fetching pricing data...[/cyan]") try: pricing_data = fetch_pricing_data() console.print("[green]✓ Pricing data loaded[/green]") except Exception as e: console.print(f"[yellow]⚠ Could not fetch pricing data: {e}[/yellow]") pricing_data = {} total_cost = 0.0 total_input_tokens = 0 total_output_tokens = 0 total_cache_creation_tokens = 0 total_cache_read_tokens = 0 models_used: dict[str, int] = {} first_timestamp: datetime | None = None last_timestamp: datetime | None = None last_user_timestamp: datetime | None = None llm_time_seconds = 0.0 llm_started = False for msg in messages: msg_type = msg.get("type") if msg_type == "compact_marker": continue timestamp_str = msg.get("timestamp", "") timestamp_dt = parse_timestamp_to_datetime(timestamp_str) if timestamp_dt: if first_timestamp is None: first_timestamp = timestamp_dt last_timestamp = timestamp_dt if msg_type == "user": last_user_timestamp = timestamp_dt llm_started = False elif msg_type == "assistant": if last_user_timestamp and timestamp_dt and not llm_started: llm_duration = (timestamp_dt - last_user_timestamp).total_seconds() llm_time_seconds += llm_duration llm_started = True message_data = msg.get("message", {}) usage = message_data.get("usage") if usage: model = message_data.get("model", "unknown") models_used[model] = models_used.get(model, 0) + 1 cost, breakdown = calculate_message_cost(usage, model, pricing_data) total_cost += cost total_input_tokens += breakdown.get("input", 0) total_output_tokens += breakdown.get("output", 0) total_cache_creation_tokens += breakdown.get("cache_creation", 0) total_cache_read_tokens += breakdown.get("cache_read", 0) total_tokens = total_input_tokens + total_output_tokens + total_cache_creation_tokens + total_cache_read_tokens total_session_time = 0.0 if first_timestamp and last_timestamp: total_session_time = (last_timestamp - first_timestamp).total_seconds() md_lines = [ "# 🤖 Claude Code Session Transcript", "", f"**Session ID**: `{messages[0].get('sessionId', 'unknown')}`", f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", f"**Messages**: {len(messages)}", "", "## 📊 Session Statistics", "", f"**Models Used**: {', '.join(f'{model} ({count})' for model, count in models_used.items())}", "", "### Token Usage", "", f"- **Input Tokens**: {total_input_tokens:,}", f"- **Output Tokens**: {total_output_tokens:,}", f"- **Cache Creation**: {total_cache_creation_tokens:,}", f"- **Cache Read**: {total_cache_read_tokens:,}", f"- **Total Tokens**: {total_tokens:,}", "", "### 💰 Cost Estimate", "", f"- **Total Cost**: ${total_cost:.6f}", ] if total_tokens > 0 and total_cache_read_tokens > 0: cache_hit_rate = (total_cache_read_tokens / total_tokens) * 100 md_lines.append(f"- **Cache Hit Rate**: {cache_hit_rate:.2f}%") if total_cost > 0: assistant_count = len([m for m in messages if m.get("type") == "assistant"]) if assistant_count > 0: avg_cost_per_msg = total_cost / assistant_count md_lines.append(f"- **Average Cost per Message**: ${avg_cost_per_msg:.6f}") if total_session_time > 0: md_lines.extend(["", "### ⏱️ Session Timeline", ""]) md_lines.append(f"- **Total Session Time**: {format_duration(total_session_time)}") md_lines.append(f"- **LLM Active Time**: {format_duration(llm_time_seconds)}") wait_time = total_session_time - llm_time_seconds if wait_time > 0: md_lines.append(f"- **Wait Time**: {format_duration(wait_time)}") if total_session_time > 0: utilization = (llm_time_seconds / total_session_time) * 100 md_lines.append(f"- **LLM Utilization**: {utilization:.1f}%") md_lines.extend(["", "---", ""]) for i, msg in enumerate(messages, 1): msg_type = msg.get("type") if msg_type == "compact_marker": md_lines.extend(["---", "", "## 📦 [COMPACTED]", "", "---", ""]) continue timestamp = parse_timestamp(msg.get("timestamp", "")) message_data = msg.get("message", {}) role = message_data.get("role", msg_type) if role == "user": is_meta = msg.get("isMeta", False) content_items = message_data.get("content", []) if is_meta: continue else: match content_items: case str(): escaped_content = escape_xml_tags(content_items) quoted_lines = [f"> {line}" if line else ">" for line in escaped_content.split("\n")] md_lines.extend( [ f"## 💬 User #{i}", f"**Time**: {timestamp}", "", ] ) md_lines.extend(quoted_lines) md_lines.append("") case list(): text_items = [ item.get("text", "") for item in content_items if isinstance(item, dict) and item.get("type") == "text" ] if text_items: md_lines.extend( [ f"## 💬 User #{i}", f"**Time**: {timestamp}", "", ] ) for text in text_items: escaped_text = escape_xml_tags(text) quoted_lines = [f"> {line}" if line else ">" for line in escaped_text.split("\n")] md_lines.extend(quoted_lines) md_lines.append("") else: md_lines.extend( [ f"## 🤖 Assistant #{i}", f"**Time**: {timestamp}", "", ] ) content = message_data.get("content", []) match content: case str(): md_lines.extend([content, ""]) case list(): for item in content: if not isinstance(item, dict): continue item_type = item.get("type") match item_type: case "text": text = item.get("text", "") if text.strip(): quoted_lines = [f"> {line}" if line else ">" for line in text.split("\n")] md_lines.extend(quoted_lines) md_lines.append("") case "thinking": thinking = item.get("thinking", "") if thinking.strip(): md_lines.append("> ") md_lines.append(">> 🧠 Thinking") thinking_lines = [f">> {line}" if line else ">>" for line in thinking.split("\n")] md_lines.extend(thinking_lines) md_lines.append(">") case "tool_use": tool_name = item.get("name", "unknown") tool_input = item.get("input", {}) is_subagent = tool_name == "Task" subagent_type = tool_input.get("subagent_type", "") if is_subagent else "" if is_subagent: tool_display = f"🚀 Subagent: {subagent_type}" else: tool_display = f"🔧 Tool: {tool_name}" md_lines.extend( [ "
", f"{tool_display}", "", format_tool_parameters(tool_input), "", "
", "", ] ) md_lines.extend(["---", ""]) markdown_content = "\n".join(md_lines) while "\n---\n\n---\n" in markdown_content: markdown_content = markdown_content.replace("\n---\n\n---\n", "\n---\n") if output_path: output_path.write_text(markdown_content, encoding="utf-8") console.print(f"[green]✅ Markdown saved to: {output_path}[/green]") else: default_output = transcript_path.with_suffix(".md") default_output.write_text(markdown_content, encoding="utf-8") console.print(f"[green]✅ Markdown saved to: {default_output}[/green]") console.print(f"\n[bold green]💰 Total Session Cost: ${total_cost:.6f}[/bold green]") console.print("\n[cyan]Preview:[/cyan]") console.print(Markdown(markdown_content[:1000] + "\n\n... (truncated)")) @app.command() def main( transcript: Path = typer.Argument(..., help="Path to transcript JSONL file"), output: Path | None = typer.Option(None, "--output", "-o", help="Output markdown file path"), preview: bool = typer.Option(False, "--preview", "-p", help="Preview in terminal instead of saving"), ) -> None: if preview: console.print("[yellow]Preview mode - content will not be saved[/yellow]\n") convert_transcript_to_markdown(transcript, output) if __name__ == "__main__": app()