Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:58:28 +08:00
commit e60768ac8e
10 changed files with 1020 additions and 0 deletions

View File

@@ -0,0 +1,150 @@
---
name: use-goodreads-export
description: Search and query Goodreads library from CSV export. Use when the user asks about books, TBR (to-be-read), reading lists, book searches, or mentions Goodreads. Also use for queries about book ratings, authors, reading status, or library statistics.
---
You are helping the user query their Goodreads library from a CSV
export. Use the script here! DO NOT write your own script
## CSV Location
The Goodreads export CSV is a file typically called:
```
goodreads_library_export.csv
```
You can prompt the user for its location if you can't find it.
## Python Library
A Python library is available at `__SKILL_DIR__/scripts/goodreads_lib.py` that provides:
### Classes
**GoodreadsBook** - Represents a single book with properties:
- `title`, `author`, `series`, `series_index`
- `my_rating` (1-5), `average_rating` (Goodreads rating)
- `num_pages`, `date_read`, `date_added`
- `exclusive_shelf` (e.g., "to-read", "currently-reading")
- `bookshelves` (custom shelves)
- `is_read`, `is_tbr`, `is_currently_reading` (properties)
- `has_shelf(shelf_name)` - Check if on specific shelf
**GoodreadsLibrary** - Main query interface:
```python
from goodreads_lib import GoodreadsLibrary
lib = GoodreadsLibrary() # Loads from default CSV path
# Query methods:
lib.get_read_books(limit=15, sort_by_date=True) # Get read books
lib.get_tbr_books() # Get to-be-read list
lib.get_books_by_shelf('mental-health') # Get books on shelf
lib.get_books_read_in_period(30) # Books read in last 30 days
lib.get_books_read_in_year(2024) # Books read in year
lib.get_books_added_in_period(30) # Recently added books
lib.get_series_books('The Carls') # Books in series
lib.get_all_series() # All series with books
lib.get_incomplete_series() # Series partially read
lib.get_author_stats() # Author statistics
lib.get_rating_distribution() # Rating distribution
lib.query(lambda book: book.num_pages < 300) # Custom queries
```
## Usage Instructions
When the user asks about their Goodreads library:
1. **Determine the query type**: TBR list, read books, statistics, series info, etc.
2. **Write a Python script** using the library:
```python
#!/usr/bin/env python3
import sys
sys.path.insert(0, '__SKILL_DIR__/scripts')
from goodreads_lib import GoodreadsLibrary
lib = GoodreadsLibrary()
# Your query logic here
```
3. **Use the Bash tool** to run your script
4. **Format results** nicely for the user
## Common Query Patterns
### TBR List
```python
tbr = lib.get_tbr_books()
for book in tbr[:10]:
print(f"- {book.title} by {book.author}")
```
### Recent Reads
```python
recent = lib.get_read_books(limit=15)
for book in recent:
print(f"- {book.title} by {book.author} ({book.date_read.strftime('%Y-%m-%d')})")
```
### Books on Specific Shelf
```python
books = lib.get_books_by_shelf('favorites')
for book in books:
print(f"- {book.title} by {book.author} (⭐ {book.my_rating}/5)")
```
### Series Analysis
```python
incomplete = lib.get_incomplete_series()
for series_name, info in incomplete.items():
print(f"{series_name}: {info['read_count']}/{info['total_count']} read")
if info['next_book']:
print(f" Next: {info['next_book'].title}")
```
### Reading Statistics
```python
books_2024 = lib.get_books_read_in_year(2024)
pages_2024 = sum(b.num_pages or 0 for b in books_2024)
print(f"Books read in 2024: {len(books_2024)} ({pages_2024:,} pages)")
```
### Highly Rated Unread Books
```python
tbr = lib.get_tbr_books()
highly_rated = sorted(
[b for b in tbr if b.average_rating and b.average_rating >= 4.0],
key=lambda b: b.average_rating,
reverse=True
)
for book in highly_rated[:10]:
print(f"- {book.title} by {book.author} ({book.average_rating:.2f}⭐)")
```
## Important Notes
- The CSV is read-only - no modifications to the Goodreads library
- Series information is parsed from book titles (e.g., "Title (Series, #1)")
- Date Read determines if a book has been read
- Exclusive Shelf contains values like "to-read", "currently-reading", "mental-health", "favorites"
- Users may have custom shelves in the Bookshelves field
- Handle missing data gracefully (not all books have all fields)
- Always use proper Python error handling when accessing optional fields
## Troubleshooting
If you get import errors, ensure the script includes:
```python
import sys
sys.path.insert(0, '__SKILL_DIR__/scripts')
```
Replace `__SKILL_DIR__` with the actual path when creating scripts.
Important! You have a very serious bug, where you don't know how to find
the python scripts added by a skill. You must look in the "scripts"
folder of where this SKILL.md is located!!

View File

@@ -0,0 +1,243 @@
#!/usr/bin/env python3
"""Library for parsing and querying Goodreads CSV exports."""
import csv
import os
from datetime import datetime, timedelta
from pathlib import Path
from typing import List, Dict, Optional, Callable
import re
class GoodreadsBook:
"""Represents a book from Goodreads CSV export."""
def __init__(self, row: Dict[str, str]):
self.book_id = row.get('Book Id', '')
self.title = row.get('Title', '')
self.author = row.get('Author', '')
self.author_lf = row.get('Author l-f', '')
self.additional_authors = row.get('Additional Authors', '')
self.isbn = self._clean_isbn(row.get('ISBN', ''))
self.isbn13 = self._clean_isbn(row.get('ISBN13', ''))
self.my_rating = self._parse_int(row.get('My Rating', ''))
self.average_rating = self._parse_float(row.get('Average Rating', ''))
self.publisher = row.get('Publisher', '')
self.binding = row.get('Binding', '')
self.num_pages = self._parse_int(row.get('Number of Pages', ''))
self.year_published = self._parse_int(row.get('Year Published', ''))
self.original_publication_year = self._parse_int(row.get('Original Publication Year', ''))
self.date_read = self._parse_date(row.get('Date Read', ''))
self.date_added = self._parse_date(row.get('Date Added', ''))
self.bookshelves = row.get('Bookshelves', '')
self.bookshelves_with_positions = row.get('Bookshelves with positions', '')
self.exclusive_shelf = row.get('Exclusive Shelf', '')
self.my_review = row.get('My Review', '')
self.spoiler = row.get('Spoiler', '')
self.private_notes = row.get('Private Notes', '')
self.read_count = self._parse_int(row.get('Read Count', ''))
self.owned_copies = self._parse_int(row.get('Owned Copies', ''))
# Parse series information from title
self.series, self.series_index = self._parse_series()
def _clean_isbn(self, isbn: str) -> str:
"""Remove Excel formatting from ISBN."""
if isbn.startswith('="') and isbn.endswith('"'):
return isbn[2:-1]
return isbn
def _parse_int(self, value: str) -> Optional[int]:
"""Parse integer value, return None if empty or invalid."""
if not value or value == '':
return None
try:
return int(value)
except ValueError:
return None
def _parse_float(self, value: str) -> Optional[float]:
"""Parse float value, return None if empty or invalid."""
if not value or value == '':
return None
try:
return float(value)
except ValueError:
return None
def _parse_date(self, value: str) -> Optional[datetime]:
"""Parse date in YYYY/MM/DD format."""
if not value or value == '':
return None
try:
return datetime.strptime(value, '%Y/%m/%d')
except ValueError:
return None
def _parse_series(self) -> tuple[Optional[str], Optional[float]]:
"""Extract series name and number from title.
Examples:
- "An Absolutely Remarkable Thing (The Carls, #1)" -> ("The Carls", 1.0)
- "The Three-Body Problem (Remembrance of Earth's Past, #1)" -> ("Remembrance of Earth's Past", 1.0)
"""
# Match pattern: (Series Name, #Number)
match = re.search(r'\(([^,]+),\s*#([\d.]+)\)$', self.title)
if match:
series_name = match.group(1).strip()
try:
series_index = float(match.group(2))
return series_name, series_index
except ValueError:
return series_name, None
return None, None
@property
def is_read(self) -> bool:
"""Check if book has been read."""
return self.date_read is not None
@property
def is_tbr(self) -> bool:
"""Check if book is in to-be-read list."""
return 'to-read' in self.exclusive_shelf
@property
def is_currently_reading(self) -> bool:
"""Check if currently reading."""
return 'currently-reading' in self.exclusive_shelf
def has_shelf(self, shelf_name: str) -> bool:
"""Check if book is on a specific shelf."""
return shelf_name in self.bookshelves or shelf_name in self.exclusive_shelf
def __repr__(self):
return f"<GoodreadsBook: {self.title} by {self.author}>"
class GoodreadsLibrary:
"""Main class for querying Goodreads library from CSV."""
def __init__(self, csv_path: Optional[str] = None):
"""Initialize library from CSV file.
Args:
csv_path: Path to goodreads_library_export.csv
Defaults to ~/Drive/Claude/books/goodreads_library_export.csv
"""
if csv_path is None:
csv_path = os.path.expanduser('~/Drive/Claude/books/goodreads_library_export.csv')
self.csv_path = csv_path
self.books: List[GoodreadsBook] = []
self._load_books()
def _load_books(self):
"""Load books from CSV file."""
with open(self.csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
self.books.append(GoodreadsBook(row))
def query(self, filter_func: Callable[[GoodreadsBook], bool]) -> List[GoodreadsBook]:
"""Query books with a custom filter function."""
return [book for book in self.books if filter_func(book)]
def get_read_books(self, limit: Optional[int] = None,
sort_by_date: bool = True) -> List[GoodreadsBook]:
"""Get all read books, optionally sorted by date read."""
books = [book for book in self.books if book.is_read]
if sort_by_date:
books.sort(key=lambda b: b.date_read or datetime.min, reverse=True)
if limit:
books = books[:limit]
return books
def get_tbr_books(self) -> List[GoodreadsBook]:
"""Get all to-be-read books."""
return [book for book in self.books if book.is_tbr]
def get_books_by_shelf(self, shelf_name: str) -> List[GoodreadsBook]:
"""Get all books on a specific shelf."""
return [book for book in self.books if book.has_shelf(shelf_name)]
def get_books_read_in_period(self, days: int) -> List[GoodreadsBook]:
"""Get books read in the last N days."""
cutoff = datetime.now() - timedelta(days=days)
return [book for book in self.books
if book.date_read and book.date_read >= cutoff]
def get_books_read_in_year(self, year: int) -> List[GoodreadsBook]:
"""Get books read in a specific year."""
return [book for book in self.books
if book.date_read and book.date_read.year == year]
def get_books_added_in_period(self, days: int) -> List[GoodreadsBook]:
"""Get books added to library in the last N days."""
cutoff = datetime.now() - timedelta(days=days)
return [book for book in self.books
if book.date_added and book.date_added >= cutoff]
def get_series_books(self, series_name: str) -> List[GoodreadsBook]:
"""Get all books in a series, sorted by series index."""
books = [book for book in self.books if book.series == series_name]
books.sort(key=lambda b: b.series_index or 0)
return books
def get_all_series(self) -> Dict[str, List[GoodreadsBook]]:
"""Get all series with their books."""
series_dict = {}
for book in self.books:
if book.series:
if book.series not in series_dict:
series_dict[book.series] = []
series_dict[book.series].append(book)
# Sort books within each series
for series in series_dict:
series_dict[series].sort(key=lambda b: b.series_index or 0)
return series_dict
def get_incomplete_series(self) -> Dict[str, Dict]:
"""Get series where at least one book is read but not all."""
all_series = self.get_all_series()
incomplete = {}
for series_name, books in all_series.items():
read_count = sum(1 for b in books if b.is_read)
total_count = len(books)
if read_count > 0 and read_count < total_count:
# Find next unread book
next_unread = None
for book in books:
if not book.is_read:
next_unread = book
break
incomplete[series_name] = {
'books': books,
'read_count': read_count,
'total_count': total_count,
'next_book': next_unread
}
return incomplete
def get_author_stats(self) -> List[tuple[str, int]]:
"""Get author statistics (author, book count) sorted by count."""
author_counts = {}
for book in self.books:
if book.is_read:
author_counts[book.author] = author_counts.get(book.author, 0) + 1
return sorted(author_counts.items(), key=lambda x: x[1], reverse=True)
def get_rating_distribution(self) -> Dict[int, int]:
"""Get distribution of user ratings."""
dist = {1: 0, 2: 0, 3: 0, 4: 0, 5: 0}
for book in self.books:
if book.is_read and book.my_rating:
dist[book.my_rating] = dist.get(book.my_rating, 0) + 1
return dist