Initial commit
This commit is contained in:
496
skills/share-session/scripts/transcript_to_markdown.py
Executable file
496
skills/share-session/scripts/transcript_to_markdown.py
Executable file
@@ -0,0 +1,496 @@
|
||||
#!/usr/bin/env -S uv run --script
|
||||
# /// script
|
||||
# requires-python = "~=3.12"
|
||||
# dependencies = [
|
||||
# "orjson",
|
||||
# "rich",
|
||||
# "typer",
|
||||
# "httpx",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, NotRequired, TypedDict
|
||||
|
||||
import httpx # pyright: ignore[reportMissingImports]
|
||||
import orjson # pyright: ignore[reportMissingImports]
|
||||
import typer # pyright: ignore[reportMissingImports]
|
||||
from rich.console import Console # pyright: ignore[reportMissingImports]
|
||||
from rich.markdown import Markdown # pyright: ignore[reportMissingImports]
|
||||
|
||||
app = typer.Typer()
|
||||
console = Console()
|
||||
|
||||
LITELLM_PRICING_URL = (
|
||||
"https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"
|
||||
)
|
||||
|
||||
|
||||
class ModelPricing(TypedDict):
|
||||
input_cost_per_token: NotRequired[float]
|
||||
output_cost_per_token: NotRequired[float]
|
||||
cache_creation_input_token_cost: NotRequired[float]
|
||||
cache_read_input_token_cost: NotRequired[float]
|
||||
litellm_provider: str
|
||||
mode: str
|
||||
|
||||
|
||||
def escape_xml_tags(text: str) -> str:
|
||||
"""Escape XML/HTML tags so they display in markdown."""
|
||||
return text.replace("<", r"\<").replace(">", r"\>")
|
||||
|
||||
|
||||
def parse_timestamp(ts: str) -> str:
|
||||
try:
|
||||
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
local_dt = dt.astimezone()
|
||||
return local_dt.strftime("%Y-%m-%d %H:%M:%S %Z")
|
||||
except (ValueError, AttributeError):
|
||||
return ts
|
||||
|
||||
|
||||
def parse_timestamp_to_datetime(ts: str) -> datetime | None:
|
||||
try:
|
||||
return datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
||||
except (ValueError, AttributeError):
|
||||
return None
|
||||
|
||||
|
||||
def format_duration(seconds: float) -> str:
|
||||
if seconds < 60:
|
||||
return f"{seconds:.0f}s"
|
||||
elif seconds < 3600:
|
||||
minutes = seconds // 60
|
||||
secs = seconds % 60
|
||||
return f"{int(minutes)}m {int(secs)}s"
|
||||
else:
|
||||
hours = seconds // 3600
|
||||
minutes = (seconds % 3600) // 60
|
||||
secs = seconds % 60
|
||||
if secs > 0:
|
||||
return f"{int(hours)}h {int(minutes)}m {int(secs)}s"
|
||||
else:
|
||||
return f"{int(hours)}h {int(minutes)}m"
|
||||
|
||||
|
||||
def format_tool_parameters(params: dict[str, Any]) -> str:
|
||||
if not params:
|
||||
return "_No parameters_"
|
||||
|
||||
lines = []
|
||||
for key, value in params.items():
|
||||
match value:
|
||||
case str():
|
||||
if len(value) > 100:
|
||||
lines.append(f"**{key}**: `{value[:100]}...`")
|
||||
else:
|
||||
lines.append(f"**{key}**: `{value}`")
|
||||
case list() | dict():
|
||||
value_json = orjson.dumps(value, option=orjson.OPT_INDENT_2).decode()
|
||||
if len(value_json) > 100:
|
||||
lines.append(f"**{key}**: `{value_json[:100]}...`")
|
||||
else:
|
||||
lines.append(f"**{key}**: `{value_json}`")
|
||||
case _:
|
||||
lines.append(f"**{key}**: `{value}`")
|
||||
return "\n\n".join(lines)
|
||||
|
||||
|
||||
def format_tool_result(result: dict[str, Any]) -> str:
|
||||
content = result.get("content", "")
|
||||
is_error = result.get("is_error", False)
|
||||
|
||||
if is_error:
|
||||
return f"❌ **Error**:\n```\n{content}\n```"
|
||||
|
||||
match content:
|
||||
case str():
|
||||
return f"```\n{content}\n```"
|
||||
case _:
|
||||
return f"```json\n{orjson.dumps(content, option=orjson.OPT_INDENT_2).decode()}\n```"
|
||||
|
||||
|
||||
def build_tool_map(messages: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
|
||||
tool_map: dict[str, dict[str, Any]] = {}
|
||||
|
||||
for msg in messages:
|
||||
if msg.get("type") == "user":
|
||||
content = msg.get("message", {}).get("content", [])
|
||||
if isinstance(content, list):
|
||||
for item in content:
|
||||
if isinstance(item, dict) and item.get("type") == "tool_result":
|
||||
tool_use_id = item.get("tool_use_id")
|
||||
if tool_use_id:
|
||||
tool_map[tool_use_id] = item
|
||||
|
||||
return tool_map
|
||||
|
||||
|
||||
def fetch_pricing_data() -> dict[str, ModelPricing]:
|
||||
with httpx.Client(timeout=30.0) as client:
|
||||
response = client.get(LITELLM_PRICING_URL)
|
||||
response.raise_for_status()
|
||||
data = orjson.loads(response.content)
|
||||
if "sample_spec" in data:
|
||||
del data["sample_spec"]
|
||||
return data
|
||||
|
||||
|
||||
def find_last_share_command_index(messages: list[dict[str, Any]]) -> int | None:
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
|
||||
if msg.get("type") == "compact_marker":
|
||||
continue
|
||||
|
||||
if msg.get("type") != "user":
|
||||
continue
|
||||
|
||||
message_data = msg.get("message", {})
|
||||
content = message_data.get("content", [])
|
||||
|
||||
match content:
|
||||
case str():
|
||||
if (
|
||||
"<command-name>/share</command-name>" in content
|
||||
or "<command-name>/cc-plus:share</command-name>" in content
|
||||
):
|
||||
return i
|
||||
case list():
|
||||
for item in content:
|
||||
if isinstance(item, dict):
|
||||
text = item.get("text", "")
|
||||
if (
|
||||
"<command-name>/share</command-name>" in text
|
||||
or "<command-name>/cc-plus:share</command-name>" in text
|
||||
):
|
||||
return i
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def calculate_message_cost(
|
||||
usage: dict[str, Any], model: str, pricing_data: dict[str, ModelPricing]
|
||||
) -> tuple[float, dict[str, int]]:
|
||||
pricing = pricing_data.get(model)
|
||||
if not pricing:
|
||||
return 0.0, {}
|
||||
|
||||
input_tokens = usage.get("input_tokens", 0)
|
||||
output_tokens = usage.get("output_tokens", 0)
|
||||
cache_creation_tokens = usage.get("cache_creation_input_tokens", 0)
|
||||
cache_read_tokens = usage.get("cache_read_input_tokens", 0)
|
||||
|
||||
input_rate = pricing.get("input_cost_per_token", 0.0)
|
||||
output_rate = pricing.get("output_cost_per_token", 0.0)
|
||||
cache_creation_rate = pricing.get("cache_creation_input_token_cost", 0.0)
|
||||
cache_read_rate = pricing.get("cache_read_input_token_cost", 0.0)
|
||||
|
||||
total_cost = (
|
||||
input_tokens * input_rate
|
||||
+ output_tokens * output_rate
|
||||
+ cache_creation_tokens * cache_creation_rate
|
||||
+ cache_read_tokens * cache_read_rate
|
||||
)
|
||||
|
||||
token_breakdown = {
|
||||
"input": input_tokens,
|
||||
"output": output_tokens,
|
||||
"cache_creation": cache_creation_tokens,
|
||||
"cache_read": cache_read_tokens,
|
||||
}
|
||||
|
||||
return total_cost, token_breakdown
|
||||
|
||||
|
||||
def convert_transcript_to_markdown(transcript_path: Path, output_path: Path | None = None) -> None:
|
||||
if not transcript_path.exists():
|
||||
console.print(f"[red]Error: Transcript file not found: {transcript_path}[/red]")
|
||||
sys.exit(1)
|
||||
|
||||
messages: list[dict[str, Any]] = []
|
||||
|
||||
with transcript_path.open("rb") as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
try:
|
||||
data = orjson.loads(line)
|
||||
if data.get("type") in ("user", "assistant", "compact_marker"):
|
||||
messages.append(data)
|
||||
except orjson.JSONDecodeError:
|
||||
continue
|
||||
|
||||
if not messages:
|
||||
console.print("[yellow]No messages found in transcript[/yellow]")
|
||||
sys.exit(0)
|
||||
|
||||
last_share_index = find_last_share_command_index(messages)
|
||||
if last_share_index is not None:
|
||||
messages = messages[:last_share_index]
|
||||
console.print(
|
||||
f"[yellow]📍 Truncating before /share command (excluded message #{last_share_index + 1})[/yellow]"
|
||||
)
|
||||
|
||||
build_tool_map(messages)
|
||||
|
||||
console.print("[cyan]Fetching pricing data...[/cyan]")
|
||||
try:
|
||||
pricing_data = fetch_pricing_data()
|
||||
console.print("[green]✓ Pricing data loaded[/green]")
|
||||
except Exception as e:
|
||||
console.print(f"[yellow]⚠ Could not fetch pricing data: {e}[/yellow]")
|
||||
pricing_data = {}
|
||||
|
||||
total_cost = 0.0
|
||||
total_input_tokens = 0
|
||||
total_output_tokens = 0
|
||||
total_cache_creation_tokens = 0
|
||||
total_cache_read_tokens = 0
|
||||
models_used: dict[str, int] = {}
|
||||
|
||||
first_timestamp: datetime | None = None
|
||||
last_timestamp: datetime | None = None
|
||||
last_user_timestamp: datetime | None = None
|
||||
llm_time_seconds = 0.0
|
||||
llm_started = False
|
||||
|
||||
for msg in messages:
|
||||
msg_type = msg.get("type")
|
||||
if msg_type == "compact_marker":
|
||||
continue
|
||||
|
||||
timestamp_str = msg.get("timestamp", "")
|
||||
timestamp_dt = parse_timestamp_to_datetime(timestamp_str)
|
||||
|
||||
if timestamp_dt:
|
||||
if first_timestamp is None:
|
||||
first_timestamp = timestamp_dt
|
||||
last_timestamp = timestamp_dt
|
||||
|
||||
if msg_type == "user":
|
||||
last_user_timestamp = timestamp_dt
|
||||
llm_started = False
|
||||
elif msg_type == "assistant":
|
||||
if last_user_timestamp and timestamp_dt and not llm_started:
|
||||
llm_duration = (timestamp_dt - last_user_timestamp).total_seconds()
|
||||
llm_time_seconds += llm_duration
|
||||
llm_started = True
|
||||
|
||||
message_data = msg.get("message", {})
|
||||
usage = message_data.get("usage")
|
||||
if usage:
|
||||
model = message_data.get("model", "unknown")
|
||||
models_used[model] = models_used.get(model, 0) + 1
|
||||
|
||||
cost, breakdown = calculate_message_cost(usage, model, pricing_data)
|
||||
total_cost += cost
|
||||
total_input_tokens += breakdown.get("input", 0)
|
||||
total_output_tokens += breakdown.get("output", 0)
|
||||
total_cache_creation_tokens += breakdown.get("cache_creation", 0)
|
||||
total_cache_read_tokens += breakdown.get("cache_read", 0)
|
||||
|
||||
total_tokens = total_input_tokens + total_output_tokens + total_cache_creation_tokens + total_cache_read_tokens
|
||||
|
||||
total_session_time = 0.0
|
||||
if first_timestamp and last_timestamp:
|
||||
total_session_time = (last_timestamp - first_timestamp).total_seconds()
|
||||
|
||||
md_lines = [
|
||||
"# 🤖 Claude Code Session Transcript",
|
||||
"",
|
||||
f"**Session ID**: `{messages[0].get('sessionId', 'unknown')}`",
|
||||
f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
||||
f"**Messages**: {len(messages)}",
|
||||
"",
|
||||
"## 📊 Session Statistics",
|
||||
"",
|
||||
f"**Models Used**: {', '.join(f'{model} ({count})' for model, count in models_used.items())}",
|
||||
"",
|
||||
"### Token Usage",
|
||||
"",
|
||||
f"- **Input Tokens**: {total_input_tokens:,}",
|
||||
f"- **Output Tokens**: {total_output_tokens:,}",
|
||||
f"- **Cache Creation**: {total_cache_creation_tokens:,}",
|
||||
f"- **Cache Read**: {total_cache_read_tokens:,}",
|
||||
f"- **Total Tokens**: {total_tokens:,}",
|
||||
"",
|
||||
"### 💰 Cost Estimate",
|
||||
"",
|
||||
f"- **Total Cost**: ${total_cost:.6f}",
|
||||
]
|
||||
|
||||
if total_tokens > 0 and total_cache_read_tokens > 0:
|
||||
cache_hit_rate = (total_cache_read_tokens / total_tokens) * 100
|
||||
md_lines.append(f"- **Cache Hit Rate**: {cache_hit_rate:.2f}%")
|
||||
|
||||
if total_cost > 0:
|
||||
assistant_count = len([m for m in messages if m.get("type") == "assistant"])
|
||||
if assistant_count > 0:
|
||||
avg_cost_per_msg = total_cost / assistant_count
|
||||
md_lines.append(f"- **Average Cost per Message**: ${avg_cost_per_msg:.6f}")
|
||||
|
||||
if total_session_time > 0:
|
||||
md_lines.extend(["", "### ⏱️ Session Timeline", ""])
|
||||
md_lines.append(f"- **Total Session Time**: {format_duration(total_session_time)}")
|
||||
md_lines.append(f"- **LLM Active Time**: {format_duration(llm_time_seconds)}")
|
||||
|
||||
wait_time = total_session_time - llm_time_seconds
|
||||
if wait_time > 0:
|
||||
md_lines.append(f"- **Wait Time**: {format_duration(wait_time)}")
|
||||
|
||||
if total_session_time > 0:
|
||||
utilization = (llm_time_seconds / total_session_time) * 100
|
||||
md_lines.append(f"- **LLM Utilization**: {utilization:.1f}%")
|
||||
|
||||
md_lines.extend(["", "---", ""])
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
msg_type = msg.get("type")
|
||||
|
||||
if msg_type == "compact_marker":
|
||||
md_lines.extend(["---", "", "## 📦 [COMPACTED]", "", "---", ""])
|
||||
continue
|
||||
|
||||
timestamp = parse_timestamp(msg.get("timestamp", ""))
|
||||
message_data = msg.get("message", {})
|
||||
role = message_data.get("role", msg_type)
|
||||
|
||||
if role == "user":
|
||||
is_meta = msg.get("isMeta", False)
|
||||
content_items = message_data.get("content", [])
|
||||
|
||||
if is_meta:
|
||||
continue
|
||||
else:
|
||||
match content_items:
|
||||
case str():
|
||||
escaped_content = escape_xml_tags(content_items)
|
||||
quoted_lines = [f"> {line}" if line else ">" for line in escaped_content.split("\n")]
|
||||
md_lines.extend(
|
||||
[
|
||||
f"## 💬 User #{i}",
|
||||
f"**Time**: {timestamp}",
|
||||
"",
|
||||
]
|
||||
)
|
||||
md_lines.extend(quoted_lines)
|
||||
md_lines.append("")
|
||||
case list():
|
||||
text_items = [
|
||||
item.get("text", "")
|
||||
for item in content_items
|
||||
if isinstance(item, dict) and item.get("type") == "text"
|
||||
]
|
||||
if text_items:
|
||||
md_lines.extend(
|
||||
[
|
||||
f"## 💬 User #{i}",
|
||||
f"**Time**: {timestamp}",
|
||||
"",
|
||||
]
|
||||
)
|
||||
for text in text_items:
|
||||
escaped_text = escape_xml_tags(text)
|
||||
quoted_lines = [f"> {line}" if line else ">" for line in escaped_text.split("\n")]
|
||||
md_lines.extend(quoted_lines)
|
||||
md_lines.append("")
|
||||
|
||||
else:
|
||||
md_lines.extend(
|
||||
[
|
||||
f"## 🤖 Assistant #{i}",
|
||||
f"**Time**: {timestamp}",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
content = message_data.get("content", [])
|
||||
match content:
|
||||
case str():
|
||||
md_lines.extend([content, ""])
|
||||
case list():
|
||||
for item in content:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
item_type = item.get("type")
|
||||
|
||||
match item_type:
|
||||
case "text":
|
||||
text = item.get("text", "")
|
||||
if text.strip():
|
||||
quoted_lines = [f"> {line}" if line else ">" for line in text.split("\n")]
|
||||
md_lines.extend(quoted_lines)
|
||||
md_lines.append("")
|
||||
|
||||
case "thinking":
|
||||
thinking = item.get("thinking", "")
|
||||
if thinking.strip():
|
||||
md_lines.append("> ")
|
||||
md_lines.append(">> 🧠 Thinking")
|
||||
thinking_lines = [f">> {line}" if line else ">>" for line in thinking.split("\n")]
|
||||
md_lines.extend(thinking_lines)
|
||||
md_lines.append(">")
|
||||
|
||||
case "tool_use":
|
||||
tool_name = item.get("name", "unknown")
|
||||
tool_input = item.get("input", {})
|
||||
|
||||
is_subagent = tool_name == "Task"
|
||||
subagent_type = tool_input.get("subagent_type", "") if is_subagent else ""
|
||||
|
||||
if is_subagent:
|
||||
tool_display = f"🚀 Subagent: {subagent_type}"
|
||||
else:
|
||||
tool_display = f"🔧 Tool: {tool_name}"
|
||||
|
||||
md_lines.extend(
|
||||
[
|
||||
"<details>",
|
||||
f"<summary>{tool_display}</summary>",
|
||||
"",
|
||||
format_tool_parameters(tool_input),
|
||||
"",
|
||||
"</details>",
|
||||
"",
|
||||
]
|
||||
)
|
||||
|
||||
md_lines.extend(["---", ""])
|
||||
|
||||
markdown_content = "\n".join(md_lines)
|
||||
|
||||
while "\n---\n\n---\n" in markdown_content:
|
||||
markdown_content = markdown_content.replace("\n---\n\n---\n", "\n---\n")
|
||||
|
||||
if output_path:
|
||||
output_path.write_text(markdown_content, encoding="utf-8")
|
||||
console.print(f"[green]✅ Markdown saved to: {output_path}[/green]")
|
||||
else:
|
||||
default_output = transcript_path.with_suffix(".md")
|
||||
default_output.write_text(markdown_content, encoding="utf-8")
|
||||
console.print(f"[green]✅ Markdown saved to: {default_output}[/green]")
|
||||
|
||||
console.print(f"\n[bold green]💰 Total Session Cost: ${total_cost:.6f}[/bold green]")
|
||||
console.print("\n[cyan]Preview:[/cyan]")
|
||||
console.print(Markdown(markdown_content[:1000] + "\n\n... (truncated)"))
|
||||
|
||||
|
||||
@app.command()
|
||||
def main(
|
||||
transcript: Path = typer.Argument(..., help="Path to transcript JSONL file"),
|
||||
output: Path | None = typer.Option(None, "--output", "-o", help="Output markdown file path"),
|
||||
preview: bool = typer.Option(False, "--preview", "-p", help="Preview in terminal instead of saving"),
|
||||
) -> None:
|
||||
if preview:
|
||||
console.print("[yellow]Preview mode - content will not be saved[/yellow]\n")
|
||||
|
||||
convert_transcript_to_markdown(transcript, output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
Reference in New Issue
Block a user