#!/usr/bin/env python3 """ Insight Generator for Claude Code Insights Analyzes conversation patterns and generates insight reports with visualizations, metrics, and actionable recommendations. """ import sqlite3 import json from pathlib import Path from typing import List, Dict, Any, Optional, Tuple from datetime import datetime, timedelta from collections import Counter, defaultdict import click try: from jinja2 import Template, Environment, FileSystemLoader except ImportError: print("Error: jinja2 not installed. Run: pip install jinja2") exit(1) class PatternDetector: """Detects patterns in conversation data""" def __init__(self, db_path: Path, verbose: bool = False): self.db_path = db_path self.verbose = verbose self.conn = sqlite3.connect(str(db_path)) self.conn.row_factory = sqlite3.Row def _log(self, message: str): """Log if verbose mode is enabled""" if self.verbose: print(f"[{datetime.now().strftime('%H:%M:%S')}] {message}") def get_date_range_filter(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> Tuple[str, List]: """Build date range SQL filter""" conditions = [] params = [] if date_from: conditions.append("timestamp >= ?") params.append(date_from) if date_to: conditions.append("timestamp <= ?") params.append(date_to) where_clause = " AND ".join(conditions) if conditions else "1=1" return where_clause, params def get_overview_metrics(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> Dict[str, Any]: """Get high-level overview metrics""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT COUNT(*) as total_conversations, SUM(message_count) as total_messages, SUM(user_messages) as total_user_messages, SUM(assistant_messages) as total_assistant_messages, AVG(message_count) as avg_messages_per_conversation, MIN(timestamp) as earliest_conversation, MAX(timestamp) as latest_conversation, COUNT(DISTINCT DATE(timestamp)) as active_days FROM conversations WHERE {where_clause} """, params) row = cursor.fetchone() return { 'total_conversations': row['total_conversations'] or 0, 'total_messages': row['total_messages'] or 0, 'total_user_messages': row['total_user_messages'] or 0, 'total_assistant_messages': row['total_assistant_messages'] or 0, 'avg_messages_per_conversation': round(row['avg_messages_per_conversation'] or 0, 1), 'earliest_conversation': row['earliest_conversation'], 'latest_conversation': row['latest_conversation'], 'active_days': row['active_days'] or 0 } def get_file_hotspots(self, date_from: Optional[str] = None, date_to: Optional[str] = None, limit: int = 20) -> List[Dict[str, Any]]: """Get most frequently modified files""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT fi.file_path, COUNT(DISTINCT fi.conversation_id) as conversation_count, SUM(CASE WHEN fi.interaction_type = 'read' THEN 1 ELSE 0 END) as read_count, SUM(CASE WHEN fi.interaction_type = 'write' THEN 1 ELSE 0 END) as write_count, SUM(CASE WHEN fi.interaction_type = 'edit' THEN 1 ELSE 0 END) as edit_count FROM file_interactions fi JOIN conversations c ON fi.conversation_id = c.id WHERE {where_clause} GROUP BY fi.file_path ORDER BY conversation_count DESC LIMIT ? """, params + [limit]) return [ { 'file_path': row['file_path'], 'conversation_count': row['conversation_count'], 'read_count': row['read_count'], 'write_count': row['write_count'], 'edit_count': row['edit_count'], 'total_interactions': row['read_count'] + row['write_count'] + row['edit_count'] } for row in cursor.fetchall() ] def get_tool_usage(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> List[Dict[str, Any]]: """Get tool usage statistics""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT tu.tool_name, COUNT(DISTINCT tu.conversation_id) as conversation_count, SUM(tu.usage_count) as total_uses FROM tool_usage tu JOIN conversations c ON tu.conversation_id = c.id WHERE {where_clause} GROUP BY tu.tool_name ORDER BY total_uses DESC """, params) return [ { 'tool_name': row['tool_name'], 'conversation_count': row['conversation_count'], 'total_uses': row['total_uses'] } for row in cursor.fetchall() ] def get_topic_clusters(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> List[Dict[str, Any]]: """Get most common topics""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT topics FROM conversations WHERE {where_clause} AND topics IS NOT NULL """, params) topic_counter = Counter() for row in cursor.fetchall(): topics = json.loads(row['topics']) topic_counter.update(topics) return [ {'topic': topic, 'count': count} for topic, count in topic_counter.most_common(20) ] def get_activity_timeline(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> Dict[str, int]: """Get conversation count by date""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT DATE(timestamp) as date, COUNT(*) as count FROM conversations WHERE {where_clause} GROUP BY DATE(timestamp) ORDER BY date """, params) return {row['date']: row['count'] for row in cursor.fetchall()} def get_hourly_distribution(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> Dict[int, int]: """Get conversation distribution by hour of day""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT CAST(strftime('%H', timestamp) AS INTEGER) as hour, COUNT(*) as count FROM conversations WHERE {where_clause} GROUP BY hour ORDER BY hour """, params) return {row['hour']: row['count'] for row in cursor.fetchall()} def get_weekday_distribution(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> Dict[str, int]: """Get conversation distribution by day of week""" where_clause, params = self.get_date_range_filter(date_from, date_to) cursor = self.conn.execute(f""" SELECT CASE CAST(strftime('%w', timestamp) AS INTEGER) WHEN 0 THEN 'Sunday' WHEN 1 THEN 'Monday' WHEN 2 THEN 'Tuesday' WHEN 3 THEN 'Wednesday' WHEN 4 THEN 'Thursday' WHEN 5 THEN 'Friday' WHEN 6 THEN 'Saturday' END as weekday, COUNT(*) as count FROM conversations WHERE {where_clause} GROUP BY weekday """, params) weekday_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] result = {day: 0 for day in weekday_order} for row in cursor.fetchall(): result[row['weekday']] = row['count'] return result def close(self): """Close database connection""" if self.conn: self.conn.close() class InsightGenerator: """Generates insight reports from pattern data""" def __init__(self, db_path: Path, templates_dir: Path, verbose: bool = False): self.db_path = db_path self.templates_dir = templates_dir self.verbose = verbose self.detector = PatternDetector(db_path, verbose=verbose) # Setup Jinja2 environment if templates_dir.exists(): self.jinja_env = Environment(loader=FileSystemLoader(str(templates_dir))) else: self.jinja_env = None def _log(self, message: str): """Log if verbose mode is enabled""" if self.verbose: print(f"[{datetime.now().strftime('%H:%M:%S')}] {message}") def _create_ascii_bar_chart(self, data: Dict[str, int], max_width: int = 50) -> str: """Create ASCII bar chart""" if not data: return "No data" max_value = max(data.values()) lines = [] for label, value in data.items(): bar_length = int((value / max_value) * max_width) if max_value > 0 else 0 bar = "█" * bar_length lines.append(f"{label:15} {bar} {value}") return "\n".join(lines) def _create_sparkline(self, values: List[int]) -> str: """Create sparkline chart""" if not values: return "" chars = "▁▂▃▄▅▆▇█" min_val = min(values) max_val = max(values) if max_val == min_val: return chars[0] * len(values) normalized = [(v - min_val) / (max_val - min_val) for v in values] return "".join(chars[int(n * (len(chars) - 1))] for n in normalized) def generate_weekly_report(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> str: """Generate weekly activity report""" self._log("Generating weekly report...") # Auto-calculate date range if not provided if not date_from: date_from = (datetime.now() - timedelta(days=7)).date().isoformat() if not date_to: date_to = datetime.now().date().isoformat() # Gather data overview = self.detector.get_overview_metrics(date_from, date_to) file_hotspots = self.detector.get_file_hotspots(date_from, date_to, limit=10) tool_usage = self.detector.get_tool_usage(date_from, date_to) topics = self.detector.get_topic_clusters(date_from, date_to) timeline = self.detector.get_activity_timeline(date_from, date_to) weekday_dist = self.detector.get_weekday_distribution(date_from, date_to) # Build report report_lines = [ f"# Weekly Insights Report", f"**Period:** {date_from} to {date_to}", f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "## Overview", f"- **Total Conversations:** {overview['total_conversations']}", f"- **Active Days:** {overview['active_days']}", f"- **Total Messages:** {overview['total_messages']}", f"- **Avg Messages/Conversation:** {overview['avg_messages_per_conversation']}", "", "## Activity Timeline", "```", self._create_ascii_bar_chart(timeline, max_width=40), "```", "", "## Weekday Distribution", "```", self._create_ascii_bar_chart(weekday_dist, max_width=40), "```", "" ] if file_hotspots: report_lines.extend([ "## File Hotspots (Top 10)", "" ]) for i, file in enumerate(file_hotspots, 1): heat = "🔥" * min(3, (file['conversation_count'] + 2) // 3) report_lines.append( f"{i}. {heat} **{file['file_path']}** " f"({file['conversation_count']} conversations, " f"R:{file['read_count']} W:{file['write_count']} E:{file['edit_count']})" ) report_lines.append("") if tool_usage: report_lines.extend([ "## Tool Usage", "" ]) tool_dict = {t['tool_name']: t['total_uses'] for t in tool_usage[:10]} report_lines.append("```") report_lines.append(self._create_ascii_bar_chart(tool_dict, max_width=40)) report_lines.append("```") report_lines.append("") if topics: report_lines.extend([ "## Top Topics", "" ]) topic_dict = {t['topic']: t['count'] for t in topics[:15]} report_lines.append("```") report_lines.append(self._create_ascii_bar_chart(topic_dict, max_width=40)) report_lines.append("```") report_lines.append("") # Insights and recommendations report_lines.extend([ "## Insights & Recommendations", "" ]) # File hotspot insights if file_hotspots and file_hotspots[0]['conversation_count'] >= 5: top_file = file_hotspots[0] report_lines.append( f"- 🔥 **High Activity File:** `{top_file['file_path']}` was modified in " f"{top_file['conversation_count']} conversations. Consider reviewing for refactoring opportunities." ) # Topic insights if topics and topics[0]['count'] >= 3: top_topic = topics[0] report_lines.append( f"- 📌 **Trending Topic:** '{top_topic['topic']}' appeared in {top_topic['count']} conversations. " f"This might warrant documentation or team knowledge sharing." ) # Activity pattern insights if overview['active_days'] < 3: report_lines.append( f"- 📅 **Low Activity:** Only {overview['active_days']} active days this week. " f"Consider scheduling regular development sessions." ) if not report_lines[-1]: # If no insights were added report_lines.append("- No significant patterns detected this period.") return "\n".join(report_lines) def generate_file_heatmap_report(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> str: """Generate detailed file interaction heatmap""" self._log("Generating file heatmap report...") file_hotspots = self.detector.get_file_hotspots(date_from, date_to, limit=50) report_lines = [ "# File Interaction Heatmap", f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "## File Hotspots", "" ] if not file_hotspots: report_lines.append("No file interactions found in the specified period.") return "\n".join(report_lines) for i, file in enumerate(file_hotspots, 1): heat_level = min(5, (file['conversation_count'] + 1) // 2) heat_emoji = "🔥" * heat_level report_lines.extend([ f"### {i}. {heat_emoji} {file['file_path']}", f"- **Conversations:** {file['conversation_count']}", f"- **Reads:** {file['read_count']}", f"- **Writes:** {file['write_count']}", f"- **Edits:** {file['edit_count']}", f"- **Total Interactions:** {file['total_interactions']}", "" ]) return "\n".join(report_lines) def generate_tool_usage_report(self, date_from: Optional[str] = None, date_to: Optional[str] = None) -> str: """Generate tool usage analytics report""" self._log("Generating tool usage report...") tool_usage = self.detector.get_tool_usage(date_from, date_to) report_lines = [ "# Tool Usage Analytics", f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M')}", "", "## Tool Statistics", "" ] if not tool_usage: report_lines.append("No tool usage data found.") return "\n".join(report_lines) total_uses = sum(t['total_uses'] for t in tool_usage) for i, tool in enumerate(tool_usage, 1): percentage = (tool['total_uses'] / total_uses * 100) if total_uses > 0 else 0 report_lines.extend([ f"### {i}. {tool['tool_name']}", f"- **Total Uses:** {tool['total_uses']}", f"- **Used in Conversations:** {tool['conversation_count']}", f"- **Percentage of Total:** {percentage:.1f}%", "" ]) return "\n".join(report_lines) def close(self): """Close connections""" self.detector.close() @click.command() @click.argument('report_type', type=click.Choice(['weekly', 'file-heatmap', 'tool-usage', 'custom'])) @click.option('--db-path', type=click.Path(), default='.claude/skills/cc-insights/.processed/conversations.db', help='SQLite database path') @click.option('--templates-dir', type=click.Path(), default='.claude/skills/cc-insights/templates', help='Templates directory') @click.option('--date-from', type=str, help='Start date (ISO format)') @click.option('--date-to', type=str, help='End date (ISO format)') @click.option('--output', type=click.Path(), help='Save to file (default: stdout)') @click.option('--verbose', is_flag=True, help='Show detailed logs') def main(report_type: str, db_path: str, templates_dir: str, date_from: Optional[str], date_to: Optional[str], output: Optional[str], verbose: bool): """Generate insight reports from conversation data Report types: weekly - Weekly activity summary with metrics file-heatmap - File modification heatmap tool-usage - Tool usage analytics custom - Custom report from template """ db_path = Path(db_path) templates_dir = Path(templates_dir) if not db_path.exists(): print(f"Error: Database not found at {db_path}") exit(1) generator = InsightGenerator(db_path, templates_dir, verbose=verbose) try: # Generate report based on type if report_type == 'weekly': report = generator.generate_weekly_report(date_from, date_to) elif report_type == 'file-heatmap': report = generator.generate_file_heatmap_report(date_from, date_to) elif report_type == 'tool-usage': report = generator.generate_tool_usage_report(date_from, date_to) else: print("Custom templates not yet implemented") exit(1) # Output report if output: Path(output).write_text(report) print(f"✓ Report saved to {output}") else: print(report) finally: generator.close() if __name__ == '__main__': main()