Initial commit

2025-11-29 18:14:49 +08:00
commit 3c7c9f9b10
10 changed files with 1950 additions and 0 deletions
--- a/skills/share-session/scripts/transcript_to_markdown.py
+++ b/skills/share-session/scripts/transcript_to_markdown.py
@@ -0,0 +1,496 @@
+#!/usr/bin/env -S uv run --script
+# /// script
+# requires-python = "~=3.12"
+# dependencies = [
+#     "orjson",
+#     "rich",
+#     "typer",
+#     "httpx",
+# ]
+# ///
+
+from __future__ import annotations
+
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Any, NotRequired, TypedDict
+
+import httpx  # pyright: ignore[reportMissingImports]
+import orjson  # pyright: ignore[reportMissingImports]
+import typer  # pyright: ignore[reportMissingImports]
+from rich.console import Console  # pyright: ignore[reportMissingImports]
+from rich.markdown import Markdown  # pyright: ignore[reportMissingImports]
+
+app = typer.Typer()
+console = Console()
+
+LITELLM_PRICING_URL = (
+    "https://raw.githubusercontent.com/BerriAI/litellm/refs/heads/main/model_prices_and_context_window.json"
+)
+
+
+class ModelPricing(TypedDict):
+    input_cost_per_token: NotRequired[float]
+    output_cost_per_token: NotRequired[float]
+    cache_creation_input_token_cost: NotRequired[float]
+    cache_read_input_token_cost: NotRequired[float]
+    litellm_provider: str
+    mode: str
+
+
+def escape_xml_tags(text: str) -> str:
+    """Escape XML/HTML tags so they display in markdown."""
+    return text.replace("<", r"\<").replace(">", r"\>")
+
+
+def parse_timestamp(ts: str) -> str:
+    try:
+        dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
+        local_dt = dt.astimezone()
+        return local_dt.strftime("%Y-%m-%d %H:%M:%S %Z")
+    except (ValueError, AttributeError):
+        return ts
+
+
+def parse_timestamp_to_datetime(ts: str) -> datetime | None:
+    try:
+        return datetime.fromisoformat(ts.replace("Z", "+00:00"))
+    except (ValueError, AttributeError):
+        return None
+
+
+def format_duration(seconds: float) -> str:
+    if seconds < 60:
+        return f"{seconds:.0f}s"
+    elif seconds < 3600:
+        minutes = seconds // 60
+        secs = seconds % 60
+        return f"{int(minutes)}m {int(secs)}s"
+    else:
+        hours = seconds // 3600
+        minutes = (seconds % 3600) // 60
+        secs = seconds % 60
+        if secs > 0:
+            return f"{int(hours)}h {int(minutes)}m {int(secs)}s"
+        else:
+            return f"{int(hours)}h {int(minutes)}m"
+
+
+def format_tool_parameters(params: dict[str, Any]) -> str:
+    if not params:
+        return "_No parameters_"
+
+    lines = []
+    for key, value in params.items():
+        match value:
+            case str():
+                if len(value) > 100:
+                    lines.append(f"**{key}**: `{value[:100]}...`")
+                else:
+                    lines.append(f"**{key}**: `{value}`")
+            case list() | dict():
+                value_json = orjson.dumps(value, option=orjson.OPT_INDENT_2).decode()
+                if len(value_json) > 100:
+                    lines.append(f"**{key}**: `{value_json[:100]}...`")
+                else:
+                    lines.append(f"**{key}**: `{value_json}`")
+            case _:
+                lines.append(f"**{key}**: `{value}`")
+    return "\n\n".join(lines)
+
+
+def format_tool_result(result: dict[str, Any]) -> str:
+    content = result.get("content", "")
+    is_error = result.get("is_error", False)
+
+    if is_error:
+        return f"❌ **Error**:\n```\n{content}\n```"
+
+    match content:
+        case str():
+            return f"```\n{content}\n```"
+        case _:
+            return f"```json\n{orjson.dumps(content, option=orjson.OPT_INDENT_2).decode()}\n```"
+
+
+def build_tool_map(messages: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
+    tool_map: dict[str, dict[str, Any]] = {}
+
+    for msg in messages:
+        if msg.get("type") == "user":
+            content = msg.get("message", {}).get("content", [])
+            if isinstance(content, list):
+                for item in content:
+                    if isinstance(item, dict) and item.get("type") == "tool_result":
+                        tool_use_id = item.get("tool_use_id")
+                        if tool_use_id:
+                            tool_map[tool_use_id] = item
+
+    return tool_map
+
+
+def fetch_pricing_data() -> dict[str, ModelPricing]:
+    with httpx.Client(timeout=30.0) as client:
+        response = client.get(LITELLM_PRICING_URL)
+        response.raise_for_status()
+        data = orjson.loads(response.content)
+        if "sample_spec" in data:
+            del data["sample_spec"]
+        return data
+
+
+def find_last_share_command_index(messages: list[dict[str, Any]]) -> int | None:
+    for i in range(len(messages) - 1, -1, -1):
+        msg = messages[i]
+
+        if msg.get("type") == "compact_marker":
+            continue
+
+        if msg.get("type") != "user":
+            continue
+
+        message_data = msg.get("message", {})
+        content = message_data.get("content", [])
+
+        match content:
+            case str():
+                if (
+                    "<command-name>/share</command-name>" in content
+                    or "<command-name>/cc-plus:share</command-name>" in content
+                ):
+                    return i
+            case list():
+                for item in content:
+                    if isinstance(item, dict):
+                        text = item.get("text", "")
+                        if (
+                            "<command-name>/share</command-name>" in text
+                            or "<command-name>/cc-plus:share</command-name>" in text
+                        ):
+                            return i
+
+    return None
+
+
+def calculate_message_cost(
+    usage: dict[str, Any], model: str, pricing_data: dict[str, ModelPricing]
+) -> tuple[float, dict[str, int]]:
+    pricing = pricing_data.get(model)
+    if not pricing:
+        return 0.0, {}
+
+    input_tokens = usage.get("input_tokens", 0)
+    output_tokens = usage.get("output_tokens", 0)
+    cache_creation_tokens = usage.get("cache_creation_input_tokens", 0)
+    cache_read_tokens = usage.get("cache_read_input_tokens", 0)
+
+    input_rate = pricing.get("input_cost_per_token", 0.0)
+    output_rate = pricing.get("output_cost_per_token", 0.0)
+    cache_creation_rate = pricing.get("cache_creation_input_token_cost", 0.0)
+    cache_read_rate = pricing.get("cache_read_input_token_cost", 0.0)
+
+    total_cost = (
+        input_tokens * input_rate
+        + output_tokens * output_rate
+        + cache_creation_tokens * cache_creation_rate
+        + cache_read_tokens * cache_read_rate
+    )
+
+    token_breakdown = {
+        "input": input_tokens,
+        "output": output_tokens,
+        "cache_creation": cache_creation_tokens,
+        "cache_read": cache_read_tokens,
+    }
+
+    return total_cost, token_breakdown
+
+
+def convert_transcript_to_markdown(transcript_path: Path, output_path: Path | None = None) -> None:
+    if not transcript_path.exists():
+        console.print(f"[red]Error: Transcript file not found: {transcript_path}[/red]")
+        sys.exit(1)
+
+    messages: list[dict[str, Any]] = []
+
+    with transcript_path.open("rb") as f:
+        for line in f:
+            if line.strip():
+                try:
+                    data = orjson.loads(line)
+                    if data.get("type") in ("user", "assistant", "compact_marker"):
+                        messages.append(data)
+                except orjson.JSONDecodeError:
+                    continue
+
+    if not messages:
+        console.print("[yellow]No messages found in transcript[/yellow]")
+        sys.exit(0)
+
+    last_share_index = find_last_share_command_index(messages)
+    if last_share_index is not None:
+        messages = messages[:last_share_index]
+        console.print(
+            f"[yellow]📍 Truncating before /share command (excluded message #{last_share_index + 1})[/yellow]"
+        )
+
+    build_tool_map(messages)
+
+    console.print("[cyan]Fetching pricing data...[/cyan]")
+    try:
+        pricing_data = fetch_pricing_data()
+        console.print("[green]✓ Pricing data loaded[/green]")
+    except Exception as e:
+        console.print(f"[yellow]⚠ Could not fetch pricing data: {e}[/yellow]")
+        pricing_data = {}
+
+    total_cost = 0.0
+    total_input_tokens = 0
+    total_output_tokens = 0
+    total_cache_creation_tokens = 0
+    total_cache_read_tokens = 0
+    models_used: dict[str, int] = {}
+
+    first_timestamp: datetime | None = None
+    last_timestamp: datetime | None = None
+    last_user_timestamp: datetime | None = None
+    llm_time_seconds = 0.0
+    llm_started = False
+
+    for msg in messages:
+        msg_type = msg.get("type")
+        if msg_type == "compact_marker":
+            continue
+
+        timestamp_str = msg.get("timestamp", "")
+        timestamp_dt = parse_timestamp_to_datetime(timestamp_str)
+
+        if timestamp_dt:
+            if first_timestamp is None:
+                first_timestamp = timestamp_dt
+            last_timestamp = timestamp_dt
+
+        if msg_type == "user":
+            last_user_timestamp = timestamp_dt
+            llm_started = False
+        elif msg_type == "assistant":
+            if last_user_timestamp and timestamp_dt and not llm_started:
+                llm_duration = (timestamp_dt - last_user_timestamp).total_seconds()
+                llm_time_seconds += llm_duration
+                llm_started = True
+
+            message_data = msg.get("message", {})
+            usage = message_data.get("usage")
+            if usage:
+                model = message_data.get("model", "unknown")
+                models_used[model] = models_used.get(model, 0) + 1
+
+                cost, breakdown = calculate_message_cost(usage, model, pricing_data)
+                total_cost += cost
+                total_input_tokens += breakdown.get("input", 0)
+                total_output_tokens += breakdown.get("output", 0)
+                total_cache_creation_tokens += breakdown.get("cache_creation", 0)
+                total_cache_read_tokens += breakdown.get("cache_read", 0)
+
+    total_tokens = total_input_tokens + total_output_tokens + total_cache_creation_tokens + total_cache_read_tokens
+
+    total_session_time = 0.0
+    if first_timestamp and last_timestamp:
+        total_session_time = (last_timestamp - first_timestamp).total_seconds()
+
+    md_lines = [
+        "# 🤖 Claude Code Session Transcript",
+        "",
+        f"**Session ID**: `{messages[0].get('sessionId', 'unknown')}`",
+        f"**Generated**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
+        f"**Messages**: {len(messages)}",
+        "",
+        "## 📊 Session Statistics",
+        "",
+        f"**Models Used**: {', '.join(f'{model} ({count})' for model, count in models_used.items())}",
+        "",
+        "### Token Usage",
+        "",
+        f"- **Input Tokens**: {total_input_tokens:,}",
+        f"- **Output Tokens**: {total_output_tokens:,}",
+        f"- **Cache Creation**: {total_cache_creation_tokens:,}",
+        f"- **Cache Read**: {total_cache_read_tokens:,}",
+        f"- **Total Tokens**: {total_tokens:,}",
+        "",
+        "### 💰 Cost Estimate",
+        "",
+        f"- **Total Cost**: ${total_cost:.6f}",
+    ]
+
+    if total_tokens > 0 and total_cache_read_tokens > 0:
+        cache_hit_rate = (total_cache_read_tokens / total_tokens) * 100
+        md_lines.append(f"- **Cache Hit Rate**: {cache_hit_rate:.2f}%")
+
+    if total_cost > 0:
+        assistant_count = len([m for m in messages if m.get("type") == "assistant"])
+        if assistant_count > 0:
+            avg_cost_per_msg = total_cost / assistant_count
+            md_lines.append(f"- **Average Cost per Message**: ${avg_cost_per_msg:.6f}")
+
+    if total_session_time > 0:
+        md_lines.extend(["", "### ⏱️ Session Timeline", ""])
+        md_lines.append(f"- **Total Session Time**: {format_duration(total_session_time)}")
+        md_lines.append(f"- **LLM Active Time**: {format_duration(llm_time_seconds)}")
+
+        wait_time = total_session_time - llm_time_seconds
+        if wait_time > 0:
+            md_lines.append(f"- **Wait Time**: {format_duration(wait_time)}")
+
+        if total_session_time > 0:
+            utilization = (llm_time_seconds / total_session_time) * 100
+            md_lines.append(f"- **LLM Utilization**: {utilization:.1f}%")
+
+    md_lines.extend(["", "---", ""])
+
+    for i, msg in enumerate(messages, 1):
+        msg_type = msg.get("type")
+
+        if msg_type == "compact_marker":
+            md_lines.extend(["---", "", "## 📦 [COMPACTED]", "", "---", ""])
+            continue
+
+        timestamp = parse_timestamp(msg.get("timestamp", ""))
+        message_data = msg.get("message", {})
+        role = message_data.get("role", msg_type)
+
+        if role == "user":
+            is_meta = msg.get("isMeta", False)
+            content_items = message_data.get("content", [])
+
+            if is_meta:
+                continue
+            else:
+                match content_items:
+                    case str():
+                        escaped_content = escape_xml_tags(content_items)
+                        quoted_lines = [f"> {line}" if line else ">" for line in escaped_content.split("\n")]
+                        md_lines.extend(
+                            [
+                                f"## 💬 User #{i}",
+                                f"**Time**: {timestamp}",
+                                "",
+                            ]
+                        )
+                        md_lines.extend(quoted_lines)
+                        md_lines.append("")
+                    case list():
+                        text_items = [
+                            item.get("text", "")
+                            for item in content_items
+                            if isinstance(item, dict) and item.get("type") == "text"
+                        ]
+                        if text_items:
+                            md_lines.extend(
+                                [
+                                    f"## 💬 User #{i}",
+                                    f"**Time**: {timestamp}",
+                                    "",
+                                ]
+                            )
+                            for text in text_items:
+                                escaped_text = escape_xml_tags(text)
+                                quoted_lines = [f"> {line}" if line else ">" for line in escaped_text.split("\n")]
+                                md_lines.extend(quoted_lines)
+                            md_lines.append("")
+
+        else:
+            md_lines.extend(
+                [
+                    f"## 🤖 Assistant #{i}",
+                    f"**Time**: {timestamp}",
+                    "",
+                ]
+            )
+
+            content = message_data.get("content", [])
+            match content:
+                case str():
+                    md_lines.extend([content, ""])
+                case list():
+                    for item in content:
+                        if not isinstance(item, dict):
+                            continue
+
+                        item_type = item.get("type")
+
+                        match item_type:
+                            case "text":
+                                text = item.get("text", "")
+                                if text.strip():
+                                    quoted_lines = [f"> {line}" if line else ">" for line in text.split("\n")]
+                                    md_lines.extend(quoted_lines)
+                                    md_lines.append("")
+
+                            case "thinking":
+                                thinking = item.get("thinking", "")
+                                if thinking.strip():
+                                    md_lines.append("> ")
+                                    md_lines.append(">> 🧠 Thinking")
+                                    thinking_lines = [f">> {line}" if line else ">>" for line in thinking.split("\n")]
+                                    md_lines.extend(thinking_lines)
+                                    md_lines.append(">")
+
+                            case "tool_use":
+                                tool_name = item.get("name", "unknown")
+                                tool_input = item.get("input", {})
+
+                                is_subagent = tool_name == "Task"
+                                subagent_type = tool_input.get("subagent_type", "") if is_subagent else ""
+
+                                if is_subagent:
+                                    tool_display = f"🚀 Subagent: {subagent_type}"
+                                else:
+                                    tool_display = f"🔧 Tool: {tool_name}"
+
+                                md_lines.extend(
+                                    [
+                                        "<details>",
+                                        f"<summary>{tool_display}</summary>",
+                                        "",
+                                        format_tool_parameters(tool_input),
+                                        "",
+                                        "</details>",
+                                        "",
+                                    ]
+                                )
+
+        md_lines.extend(["---", ""])
+
+    markdown_content = "\n".join(md_lines)
+
+    while "\n---\n\n---\n" in markdown_content:
+        markdown_content = markdown_content.replace("\n---\n\n---\n", "\n---\n")
+
+    if output_path:
+        output_path.write_text(markdown_content, encoding="utf-8")
+        console.print(f"[green]✅ Markdown saved to: {output_path}[/green]")
+    else:
+        default_output = transcript_path.with_suffix(".md")
+        default_output.write_text(markdown_content, encoding="utf-8")
+        console.print(f"[green]✅ Markdown saved to: {default_output}[/green]")
+
+    console.print(f"\n[bold green]💰 Total Session Cost: ${total_cost:.6f}[/bold green]")
+    console.print("\n[cyan]Preview:[/cyan]")
+    console.print(Markdown(markdown_content[:1000] + "\n\n... (truncated)"))
+
+
+@app.command()
+def main(
+    transcript: Path = typer.Argument(..., help="Path to transcript JSONL file"),
+    output: Path | None = typer.Option(None, "--output", "-o", help="Output markdown file path"),
+    preview: bool = typer.Option(False, "--preview", "-p", help="Preview in terminal instead of saving"),
+) -> None:
+    if preview:
+        console.print("[yellow]Preview mode - content will not be saved[/yellow]\n")
+
+    convert_transcript_to_markdown(transcript, output)
+
+
+if __name__ == "__main__":
+    app()