Initial commit
This commit is contained in:
169
skills/obsidian-manager/scripts/search-readwise.py
Normal file
169
skills/obsidian-manager/scripts/search-readwise.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python3
|
||||
# /// script
|
||||
# dependencies = ["python-frontmatter", "pyyaml"]
|
||||
# ///
|
||||
"""
|
||||
Search Readwise highlights by topic, author, or category.
|
||||
|
||||
Usage:
|
||||
uv run search-readwise.py "query" [--author "name"] [--category articles|books|podcasts|tweets]
|
||||
|
||||
Arguments:
|
||||
query - Search term or phrase
|
||||
--author - Filter by author name
|
||||
--category - Filter by category (articles, books, podcasts, tweets)
|
||||
|
||||
Examples:
|
||||
uv run search-readwise.py "second brain"
|
||||
uv run search-readwise.py "AI" --author "Andrej Karpathy"
|
||||
uv run search-readwise.py "habits" --category books
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
import frontmatter
|
||||
|
||||
VAULT_PATH = Path.home() / "Library/Mobile Documents/iCloud~md~obsidian/Documents/Personal_Notes"
|
||||
READWISE_PATH = VAULT_PATH / "Readwise"
|
||||
|
||||
|
||||
def search_readwise(query: str, author: str = "", category: str = "", limit: int = 15) -> list[dict]:
|
||||
"""Search Readwise highlights."""
|
||||
results = []
|
||||
|
||||
# Determine search paths based on category
|
||||
if category:
|
||||
category_map = {
|
||||
"articles": "Articles",
|
||||
"books": "Books",
|
||||
"podcasts": "Podcasts",
|
||||
"tweets": "Tweets"
|
||||
}
|
||||
search_paths = [READWISE_PATH / category_map.get(category.lower(), category)]
|
||||
else:
|
||||
search_paths = [
|
||||
READWISE_PATH / "Articles",
|
||||
READWISE_PATH / "Books",
|
||||
READWISE_PATH / "Podcasts",
|
||||
READWISE_PATH / "Tweets"
|
||||
]
|
||||
|
||||
pattern = re.compile(re.escape(query), re.IGNORECASE)
|
||||
author_pattern = re.compile(re.escape(author), re.IGNORECASE) if author else None
|
||||
|
||||
for search_path in search_paths:
|
||||
if not search_path.exists():
|
||||
continue
|
||||
|
||||
for md_file in search_path.rglob("*.md"):
|
||||
try:
|
||||
post = frontmatter.load(md_file)
|
||||
content = post.content
|
||||
metadata = post.metadata
|
||||
|
||||
# Filter by author if specified
|
||||
if author_pattern:
|
||||
file_author = metadata.get("Author", "")
|
||||
if not author_pattern.search(file_author):
|
||||
continue
|
||||
|
||||
# Search in content
|
||||
matches = list(pattern.finditer(content))
|
||||
if not matches:
|
||||
continue
|
||||
|
||||
# Extract highlights (lines starting with - or >)
|
||||
highlights = []
|
||||
for line in content.split('\n'):
|
||||
line = line.strip()
|
||||
if (line.startswith('-') or line.startswith('>')) and pattern.search(line):
|
||||
highlights.append(line[:200])
|
||||
|
||||
# Get context around first match if no highlight found
|
||||
if not highlights and matches:
|
||||
first_match = matches[0]
|
||||
start = max(0, first_match.start() - 50)
|
||||
end = min(len(content), first_match.end() + 150)
|
||||
excerpt = content[start:end].strip()
|
||||
excerpt = re.sub(r'\s+', ' ', excerpt)
|
||||
highlights = [excerpt]
|
||||
|
||||
results.append({
|
||||
"file": str(md_file.relative_to(VAULT_PATH)),
|
||||
"title": metadata.get("Full Title", md_file.stem),
|
||||
"author": metadata.get("Author", "Unknown"),
|
||||
"category": search_path.name,
|
||||
"matches": len(matches),
|
||||
"highlights": highlights[:3] # Top 3 matching highlights
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
# Sort by number of matches
|
||||
results.sort(key=lambda x: x["matches"], reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: uv run search-readwise.py \"query\" [--author \"name\"] [--category type]")
|
||||
sys.exit(1)
|
||||
|
||||
query = sys.argv[1]
|
||||
author = ""
|
||||
category = ""
|
||||
|
||||
# Parse arguments
|
||||
i = 2
|
||||
while i < len(sys.argv):
|
||||
if sys.argv[i] == "--author" and i + 1 < len(sys.argv):
|
||||
author = sys.argv[i + 1]
|
||||
i += 2
|
||||
elif sys.argv[i] == "--category" and i + 1 < len(sys.argv):
|
||||
category = sys.argv[i + 1]
|
||||
i += 2
|
||||
else:
|
||||
i += 1
|
||||
|
||||
filters = []
|
||||
if author:
|
||||
filters.append(f"author: {author}")
|
||||
if category:
|
||||
filters.append(f"category: {category}")
|
||||
filter_str = f" ({', '.join(filters)})" if filters else ""
|
||||
|
||||
print(f"Searching Readwise for '{query}'{filter_str}...")
|
||||
results = search_readwise(query, author, category)
|
||||
|
||||
if not results:
|
||||
print(f"\nNo Readwise highlights found for '{query}'")
|
||||
return
|
||||
|
||||
print(f"\nFound {len(results)} sources:\n")
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
print(f"{i}. **{result['title']}**")
|
||||
print(f" Author: {result['author']} | Category: {result['category']} | {result['matches']} matches")
|
||||
print(f" File: {result['file']}")
|
||||
if result['highlights']:
|
||||
print(f" Highlights:")
|
||||
for h in result['highlights']:
|
||||
print(f" - {h[:150]}{'...' if len(h) > 150 else ''}")
|
||||
print()
|
||||
|
||||
# Output JSON for programmatic use
|
||||
print(f"\n{json.dumps({'results': results, 'total': len(results)})}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user