Initial commit
This commit is contained in:
384
skills/cc-insights/scripts/search-conversations.py
Executable file
384
skills/cc-insights/scripts/search-conversations.py
Executable file
@@ -0,0 +1,384 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Search Interface for Claude Code Insights
|
||||
|
||||
Provides unified search across conversations using semantic (RAG) and keyword search.
|
||||
Supports filtering by dates, files, and output formatting.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import click
|
||||
|
||||
try:
|
||||
from rag_indexer import RAGIndexer
|
||||
except ImportError:
|
||||
print("Error: Cannot import rag_indexer. Ensure it's in the same directory.")
|
||||
exit(1)
|
||||
|
||||
|
||||
class ConversationSearch:
|
||||
"""Unified search interface for conversations"""
|
||||
|
||||
def __init__(self, db_path: Path, embeddings_dir: Path, verbose: bool = False):
|
||||
self.db_path = db_path
|
||||
self.embeddings_dir = embeddings_dir
|
||||
self.verbose = verbose
|
||||
|
||||
# Initialize RAG indexer for semantic search
|
||||
self.indexer = RAGIndexer(db_path, embeddings_dir, verbose=verbose)
|
||||
|
||||
# Separate SQLite connection for metadata queries
|
||||
self.conn = sqlite3.connect(str(db_path))
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
|
||||
def _log(self, message: str):
|
||||
"""Log if verbose mode is enabled"""
|
||||
if self.verbose:
|
||||
print(f"[{datetime.now().strftime('%H:%M:%S')}] {message}")
|
||||
|
||||
def _get_conversation_details(self, conversation_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get full conversation details from SQLite"""
|
||||
cursor = self.conn.execute("""
|
||||
SELECT * FROM conversations WHERE id = ?
|
||||
""", (conversation_id,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
if not row:
|
||||
return None
|
||||
|
||||
return {
|
||||
'id': row['id'],
|
||||
'timestamp': row['timestamp'],
|
||||
'message_count': row['message_count'],
|
||||
'user_messages': row['user_messages'],
|
||||
'assistant_messages': row['assistant_messages'],
|
||||
'files_read': json.loads(row['files_read']) if row['files_read'] else [],
|
||||
'files_written': json.loads(row['files_written']) if row['files_written'] else [],
|
||||
'files_edited': json.loads(row['files_edited']) if row['files_edited'] else [],
|
||||
'tools_used': json.loads(row['tools_used']) if row['tools_used'] else [],
|
||||
'topics': json.loads(row['topics']) if row['topics'] else [],
|
||||
'first_user_message': row['first_user_message'],
|
||||
'last_assistant_message': row['last_assistant_message']
|
||||
}
|
||||
|
||||
def semantic_search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
date_from: Optional[str] = None,
|
||||
date_to: Optional[str] = None,
|
||||
file_pattern: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform RAG-based semantic search"""
|
||||
self._log(f"Semantic search: '{query}'")
|
||||
|
||||
# TODO: Add ChromaDB filters for dates/files when supported
|
||||
results = self.indexer.search(query, n_results=limit * 2) # Get extra for filtering
|
||||
|
||||
# Enrich with full conversation details
|
||||
enriched_results = []
|
||||
for result in results:
|
||||
details = self._get_conversation_details(result['id'])
|
||||
if details:
|
||||
# Apply post-search filters
|
||||
if date_from and details['timestamp'] < date_from:
|
||||
continue
|
||||
if date_to and details['timestamp'] > date_to:
|
||||
continue
|
||||
if file_pattern:
|
||||
all_files = details['files_read'] + details['files_written'] + details['files_edited']
|
||||
if not any(file_pattern in f for f in all_files):
|
||||
continue
|
||||
|
||||
enriched_results.append({
|
||||
**result,
|
||||
**details
|
||||
})
|
||||
|
||||
if len(enriched_results) >= limit:
|
||||
break
|
||||
|
||||
return enriched_results
|
||||
|
||||
def keyword_search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 10,
|
||||
date_from: Optional[str] = None,
|
||||
date_to: Optional[str] = None,
|
||||
file_pattern: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Perform SQL-based keyword search"""
|
||||
self._log(f"Keyword search: '{query}'")
|
||||
|
||||
# Build SQL query
|
||||
conditions = [
|
||||
"(first_user_message LIKE ? OR last_assistant_message LIKE ? OR topics LIKE ?)"
|
||||
]
|
||||
params = [f"%{query}%", f"%{query}%", f"%{query}%"]
|
||||
|
||||
if date_from:
|
||||
conditions.append("timestamp >= ?")
|
||||
params.append(date_from)
|
||||
|
||||
if date_to:
|
||||
conditions.append("timestamp <= ?")
|
||||
params.append(date_to)
|
||||
|
||||
if file_pattern:
|
||||
conditions.append(
|
||||
"(files_read LIKE ? OR files_written LIKE ? OR files_edited LIKE ?)"
|
||||
)
|
||||
params.extend([f"%{file_pattern}%"] * 3)
|
||||
|
||||
where_clause = " AND ".join(conditions)
|
||||
|
||||
cursor = self.conn.execute(f"""
|
||||
SELECT * FROM conversations
|
||||
WHERE {where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT ?
|
||||
""", params + [limit])
|
||||
|
||||
results = []
|
||||
for row in cursor.fetchall():
|
||||
results.append({
|
||||
'id': row['id'],
|
||||
'timestamp': row['timestamp'],
|
||||
'message_count': row['message_count'],
|
||||
'user_messages': row['user_messages'],
|
||||
'assistant_messages': row['assistant_messages'],
|
||||
'files_read': json.loads(row['files_read']) if row['files_read'] else [],
|
||||
'files_written': json.loads(row['files_written']) if row['files_written'] else [],
|
||||
'files_edited': json.loads(row['files_edited']) if row['files_edited'] else [],
|
||||
'tools_used': json.loads(row['tools_used']) if row['tools_used'] else [],
|
||||
'topics': json.loads(row['topics']) if row['topics'] else [],
|
||||
'first_user_message': row['first_user_message'],
|
||||
'last_assistant_message': row['last_assistant_message']
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def search_by_file(self, file_pattern: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Find all conversations that touched specific files"""
|
||||
self._log(f"File search: '{file_pattern}'")
|
||||
|
||||
cursor = self.conn.execute("""
|
||||
SELECT DISTINCT c.*
|
||||
FROM conversations c
|
||||
JOIN file_interactions fi ON c.id = fi.conversation_id
|
||||
WHERE fi.file_path LIKE ?
|
||||
ORDER BY c.timestamp DESC
|
||||
LIMIT ?
|
||||
""", (f"%{file_pattern}%", limit))
|
||||
|
||||
results = []
|
||||
for row in cursor.fetchall():
|
||||
results.append({
|
||||
'id': row['id'],
|
||||
'timestamp': row['timestamp'],
|
||||
'message_count': row['message_count'],
|
||||
'files_read': json.loads(row['files_read']) if row['files_read'] else [],
|
||||
'files_written': json.loads(row['files_written']) if row['files_written'] else [],
|
||||
'files_edited': json.loads(row['files_edited']) if row['files_edited'] else [],
|
||||
'tools_used': json.loads(row['tools_used']) if row['tools_used'] else [],
|
||||
'topics': json.loads(row['topics']) if row['topics'] else [],
|
||||
'first_user_message': row['first_user_message']
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def search_by_tool(self, tool_name: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
"""Find conversations using specific tools"""
|
||||
self._log(f"Tool search: '{tool_name}'")
|
||||
|
||||
cursor = self.conn.execute("""
|
||||
SELECT DISTINCT c.*
|
||||
FROM conversations c
|
||||
JOIN tool_usage tu ON c.id = tu.conversation_id
|
||||
WHERE tu.tool_name LIKE ?
|
||||
ORDER BY c.timestamp DESC
|
||||
LIMIT ?
|
||||
""", (f"%{tool_name}%", limit))
|
||||
|
||||
results = []
|
||||
for row in cursor.fetchall():
|
||||
results.append({
|
||||
'id': row['id'],
|
||||
'timestamp': row['timestamp'],
|
||||
'message_count': row['message_count'],
|
||||
'tools_used': json.loads(row['tools_used']) if row['tools_used'] else [],
|
||||
'topics': json.loads(row['topics']) if row['topics'] else [],
|
||||
'first_user_message': row['first_user_message']
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def format_results(self, results: List[Dict[str, Any]], format: str = 'text') -> str:
|
||||
"""Format search results"""
|
||||
if format == 'json':
|
||||
return json.dumps(results, indent=2)
|
||||
|
||||
elif format == 'markdown':
|
||||
output = [f"# Search Results ({len(results)} found)\n"]
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
timestamp = datetime.fromisoformat(result['timestamp']).strftime('%b %d, %Y %H:%M')
|
||||
similarity = f"[Similarity: {result['similarity']:.3f}] " if 'similarity' in result else ""
|
||||
|
||||
output.append(f"## {i}. {similarity}{result['id']}")
|
||||
output.append(f"**Date:** {timestamp}")
|
||||
output.append(f"**Messages:** {result.get('message_count', 'N/A')}")
|
||||
|
||||
if result.get('topics'):
|
||||
output.append(f"**Topics:** {', '.join(result['topics'])}")
|
||||
|
||||
all_files = (result.get('files_read', []) +
|
||||
result.get('files_written', []) +
|
||||
result.get('files_edited', []))
|
||||
if all_files:
|
||||
output.append(f"**Files:** {', '.join(all_files[:5])}")
|
||||
if len(all_files) > 5:
|
||||
output.append(f" _(and {len(all_files) - 5} more)_")
|
||||
|
||||
if result.get('tools_used'):
|
||||
output.append(f"**Tools:** {', '.join(result['tools_used'][:5])}")
|
||||
|
||||
if result.get('first_user_message'):
|
||||
msg = result['first_user_message'][:200]
|
||||
output.append(f"\n**Snippet:** {msg}...")
|
||||
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
else: # text format
|
||||
output = [f"\nFound {len(results)} conversations:\n"]
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
timestamp = datetime.fromisoformat(result['timestamp']).strftime('%b %d, %Y %H:%M')
|
||||
similarity = f"[Similarity: {result['similarity']:.3f}] " if 'similarity' in result else ""
|
||||
|
||||
output.append(f"{i}. {similarity}{result['id']}")
|
||||
output.append(f" Date: {timestamp}")
|
||||
output.append(f" Messages: {result.get('message_count', 'N/A')}")
|
||||
|
||||
if result.get('topics'):
|
||||
output.append(f" Topics: {', '.join(result['topics'][:3])}")
|
||||
|
||||
all_files = (result.get('files_read', []) +
|
||||
result.get('files_written', []) +
|
||||
result.get('files_edited', []))
|
||||
if all_files:
|
||||
output.append(f" Files: {', '.join(all_files[:3])}")
|
||||
|
||||
if result.get('first_user_message'):
|
||||
msg = result['first_user_message'][:150].replace('\n', ' ')
|
||||
output.append(f" Preview: {msg}...")
|
||||
|
||||
output.append("")
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
def close(self):
|
||||
"""Close connections"""
|
||||
self.indexer.close()
|
||||
if self.conn:
|
||||
self.conn.close()
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.argument('query', required=False)
|
||||
@click.option('--db-path', type=click.Path(), default='.claude/skills/cc-insights/.processed/conversations.db',
|
||||
help='SQLite database path')
|
||||
@click.option('--embeddings-dir', type=click.Path(), default='.claude/skills/cc-insights/.processed/embeddings',
|
||||
help='ChromaDB embeddings directory')
|
||||
@click.option('--semantic/--keyword', default=True, help='Use semantic (RAG) or keyword search')
|
||||
@click.option('--file', type=str, help='Filter by file pattern')
|
||||
@click.option('--tool', type=str, help='Search by tool name')
|
||||
@click.option('--date-from', type=str, help='Start date (ISO format)')
|
||||
@click.option('--date-to', type=str, help='End date (ISO format)')
|
||||
@click.option('--limit', default=10, help='Maximum results')
|
||||
@click.option('--format', type=click.Choice(['text', 'json', 'markdown']), default='text', help='Output format')
|
||||
@click.option('--verbose', is_flag=True, help='Show detailed logs')
|
||||
def main(query: Optional[str], db_path: str, embeddings_dir: str, semantic: bool, file: Optional[str],
|
||||
tool: Optional[str], date_from: Optional[str], date_to: Optional[str], limit: int, format: str, verbose: bool):
|
||||
"""Search Claude Code conversations
|
||||
|
||||
Examples:
|
||||
|
||||
# Semantic search
|
||||
python search-conversations.py "authentication bugs"
|
||||
|
||||
# Keyword search
|
||||
python search-conversations.py "React optimization" --keyword
|
||||
|
||||
# Filter by file
|
||||
python search-conversations.py "testing" --file "src/components"
|
||||
|
||||
# Search by tool
|
||||
python search-conversations.py --tool "Write"
|
||||
|
||||
# Date range
|
||||
python search-conversations.py "refactoring" --date-from 2025-10-01
|
||||
|
||||
# JSON output
|
||||
python search-conversations.py "deployment" --format json
|
||||
"""
|
||||
db_path = Path(db_path)
|
||||
embeddings_dir = Path(embeddings_dir)
|
||||
|
||||
if not db_path.exists():
|
||||
print(f"Error: Database not found at {db_path}")
|
||||
print("Run conversation-processor.py first")
|
||||
exit(1)
|
||||
|
||||
searcher = ConversationSearch(db_path, embeddings_dir, verbose=verbose)
|
||||
|
||||
try:
|
||||
results = []
|
||||
|
||||
if tool:
|
||||
# Search by tool
|
||||
results = searcher.search_by_tool(tool, limit=limit)
|
||||
|
||||
elif file:
|
||||
# Search by file
|
||||
results = searcher.search_by_file(file, limit=limit)
|
||||
|
||||
elif query:
|
||||
# Text search
|
||||
if semantic:
|
||||
results = searcher.semantic_search(
|
||||
query,
|
||||
limit=limit,
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
file_pattern=file
|
||||
)
|
||||
else:
|
||||
results = searcher.keyword_search(
|
||||
query,
|
||||
limit=limit,
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
file_pattern=file
|
||||
)
|
||||
else:
|
||||
print("Error: Provide a query, --file, or --tool option")
|
||||
exit(1)
|
||||
|
||||
# Format and output
|
||||
output = searcher.format_results(results, format=format)
|
||||
print(output)
|
||||
|
||||
finally:
|
||||
searcher.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user