Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 09:06:02 +08:00
commit 02cab85880
53 changed files with 12367 additions and 0 deletions

View File

@@ -0,0 +1,219 @@
#!/usr/bin/env python3
#!/usr/bin/env python3
"""PocketBase data export helper with admin auth, pagination, filters, and NDJSON support."""
import argparse
import json
from getpass import getpass
from pathlib import Path
from typing import Dict, Iterable, List, Optional
import requests
DEFAULT_BATCH_SIZE = 200
REQUEST_TIMEOUT = 30
def authenticate(base_url: str, email: Optional[str], password: Optional[str]) -> Dict[str, str]:
if not email:
return {}
if not password:
password = getpass(prompt="Admin password: ")
response = requests.post(
f"{base_url}/api/admins/auth-with-password",
json={"identity": email, "password": password},
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
token = response.json().get("token")
if not token:
raise RuntimeError("Authentication response missing token")
return {"Authorization": f"Bearer {token}"}
def list_collections(base_url: str, headers: Dict[str, str]) -> List[Dict]:
collections: List[Dict] = []
page = 1
while True:
response = requests.get(
f"{base_url}/api/collections",
params={"page": page, "perPage": 200},
headers=headers,
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
payload = response.json()
items = payload.get("items", [])
collections.extend(items)
total = payload.get("totalItems", len(collections))
if page * 200 >= total or not items:
break
page += 1
return collections
def filter_collections(
collections: Iterable[Dict],
include: Optional[List[str]],
exclude: Optional[List[str]],
include_system: bool,
) -> List[Dict]:
include_set = {name.strip() for name in include or [] if name.strip()}
exclude_set = {name.strip() for name in exclude or [] if name.strip()}
filtered: List[Dict] = []
for collection in collections:
name = collection.get("name")
if not name:
continue
if include_set and name not in include_set:
continue
if name in exclude_set:
continue
if not include_system and collection.get("system"):
continue
filtered.append(collection)
filtered.sort(key=lambda c: c.get("name", ""))
return filtered
def export_collection(
base_url: str,
collection: Dict,
headers: Dict[str, str],
output_dir: Path,
batch_size: int,
fmt: str,
) -> int:
name = collection["name"]
output_dir.mkdir(parents=True, exist_ok=True)
total_written = 0
file_ext = "ndjson" if fmt == "ndjson" else "json"
output_path = output_dir / f"{name}.{file_ext}"
records_url = f"{base_url}/api/collections/{name}/records"
with output_path.open("w", encoding="utf-8") as handle:
page = 1
aggregated: List[Dict] = []
while True:
response = requests.get(
records_url,
params={"page": page, "perPage": batch_size},
headers=headers,
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
payload = response.json()
items = payload.get("items", [])
if not items:
break
if fmt == "ndjson":
for item in items:
handle.write(json.dumps(item, ensure_ascii=False))
handle.write("\n")
else:
aggregated.extend(items)
total_written += len(items)
total_items = payload.get("totalItems")
if total_items and total_written >= total_items:
break
page += 1
if fmt == "json":
json.dump(
{
"collection": name,
"exportedAt": collection.get("updated", ""),
"items": aggregated,
},
handle,
ensure_ascii=False,
indent=2,
)
return total_written
def build_manifest(output_dir: Path, manifest: List[Dict]):
if not manifest:
return
(output_dir / "manifest.json").write_text(
json.dumps(manifest, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Export PocketBase collections")
parser.add_argument("base_url", help="PocketBase base URL, e.g. http://127.0.0.1:8090")
parser.add_argument(
"output_dir",
nargs="?",
default="pocketbase_export",
help="Directory to write exported files",
)
parser.add_argument("--email", help="Admin email for authentication")
parser.add_argument("--password", help="Admin password (omit to prompt)")
parser.add_argument(
"--collections",
help="Comma-separated collection names to export",
)
parser.add_argument(
"--exclude",
help="Comma-separated collection names to skip",
)
parser.add_argument(
"--include-system",
action="store_true",
help="Include system collections (default: skip)",
)
parser.add_argument(
"--batch-size",
type=int,
default=DEFAULT_BATCH_SIZE,
help="Records per request (default: 200)",
)
parser.add_argument(
"--format",
choices=["json", "ndjson"],
default="json",
help="Output format per collection",
)
return parser.parse_args()
def main():
args = parse_args()
base_url = args.base_url.rstrip("/")
output_dir = Path(args.output_dir)
headers = authenticate(base_url, args.email, args.password)
collections = list_collections(base_url, headers)
include = args.collections.split(",") if args.collections else None
exclude = args.exclude.split(",") if args.exclude else None
filtered = filter_collections(collections, include, exclude, args.include_system)
if not filtered:
raise RuntimeError("No collections selected for export")
manifest: List[Dict] = []
for collection in filtered:
name = collection["name"]
count = export_collection(
base_url,
collection,
headers,
output_dir,
max(args.batch_size, 1),
args.format,
)
manifest.append({"collection": name, "records": count})
print(f"Exported {name}: {count} records")
build_manifest(output_dir, manifest)
print(f"Completed export to {output_dir.resolve()}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,339 @@
#!/usr/bin/env python3
#!/usr/bin/env python3
"""PocketBase data import helper with admin auth, batching, optional upsert, and dry-run."""
import argparse
import json
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from getpass import getpass
from pathlib import Path
from typing import Dict, Iterable, Iterator, List, Optional, Tuple
import requests
REQUEST_TIMEOUT = 30
DEFAULT_BATCH_SIZE = 100
DROP_KEYS = {"id", "created", "updated", "@collectionId", "@collectionName", "@expand"}
def authenticate(base_url: str, email: Optional[str], password: Optional[str]) -> Dict[str, str]:
if not email:
return {}
if not password:
password = getpass(prompt="Admin password: ")
response = requests.post(
f"{base_url}/api/admins/auth-with-password",
json={"identity": email, "password": password},
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
token = response.json().get("token")
if not token:
raise RuntimeError("Authentication response missing token")
return {"Authorization": f"Bearer {token}"}
def list_collections(base_url: str, headers: Dict[str, str]) -> Dict[str, Dict]:
collections: Dict[str, Dict] = {}
page = 1
while True:
response = requests.get(
f"{base_url}/api/collections",
params={"page": page, "perPage": 200},
headers=headers,
timeout=REQUEST_TIMEOUT,
)
response.raise_for_status()
payload = response.json()
items = payload.get("items", [])
for item in items:
if item.get("name"):
collections[item["name"]] = item
total = payload.get("totalItems", len(collections))
if page * 200 >= total or not items:
break
page += 1
return collections
def chunked(iterable: Iterable[Dict], size: int) -> Iterator[List[Dict]]:
chunk: List[Dict] = []
for item in iterable:
chunk.append(item)
if len(chunk) >= size:
yield chunk
chunk = []
if chunk:
yield chunk
def iter_ndjson(file_path: Path) -> Iterator[Dict]:
with file_path.open("r", encoding="utf-8") as handle:
for line in handle:
line = line.strip()
if not line:
continue
yield json.loads(line)
def load_json_records(file_path: Path) -> Tuple[List[Dict], Optional[str]]:
with file_path.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
if isinstance(payload, dict):
return payload.get("items", []), payload.get("collection")
if isinstance(payload, list):
return payload, None
raise ValueError(f"Unsupported JSON structure in {file_path}")
def clean_record(record: Dict) -> Dict:
return {k: v for k, v in record.items() if k not in DROP_KEYS}
def prepend_items(items: Iterable[Dict], iterator: Iterator[Dict]) -> Iterator[Dict]:
for item in items:
yield item
for item in iterator:
yield item
def build_filter(field: str, value) -> str:
if value is None:
return f"{field} = null"
if isinstance(value, bool):
return f"{field} = {str(value).lower()}"
if isinstance(value, (int, float)):
return f"{field} = {value}"
escaped = str(value).replace("\"", r"\"")
return f'{field} = "{escaped}"'
def request_with_retry(session: requests.Session, method: str, url: str, *, retries: int = 3, backoff: float = 1.0, **kwargs) -> requests.Response:
last_response: Optional[requests.Response] = None
for attempt in range(retries):
response = session.request(method, url, timeout=REQUEST_TIMEOUT, **kwargs)
status = response.status_code
if status in {429, 503} and attempt < retries - 1:
time.sleep(backoff)
backoff = min(backoff * 2, 8)
last_response = response
continue
if status >= 400:
response.raise_for_status()
return response
assert last_response is not None
last_response.raise_for_status()
def find_existing(
base_url: str,
collection: str,
field: str,
value,
headers: Dict[str, str],
) -> Optional[Dict]:
session = requests.Session()
try:
response = request_with_retry(
session,
"get",
f"{base_url}/api/collections/{collection}/records",
headers=headers,
params={
"page": 1,
"perPage": 1,
"filter": build_filter(field, value),
"skipTotal": 1,
},
)
items = response.json().get("items", [])
if items:
return items[0]
return None
finally:
session.close()
def process_record(
base_url: str,
collection: str,
record: Dict,
headers: Dict[str, str],
upsert_field: Optional[str],
dry_run: bool,
) -> Tuple[bool, Optional[str]]:
data = clean_record(record)
if dry_run:
return True, None
session = requests.Session()
try:
url = f"{base_url}/api/collections/{collection}/records"
if upsert_field and upsert_field in record:
existing = find_existing(base_url, collection, upsert_field, record.get(upsert_field), headers)
if existing:
record_id = existing.get("id")
if record_id:
response = request_with_retry(
session,
"patch",
f"{url}/{record_id}",
headers=headers,
json=data,
)
return response.ok, None
response = request_with_retry(
session,
"post",
url,
headers=headers,
json=data,
)
return response.status_code in {200, 201}, None
except requests.HTTPError as exc:
return False, f"HTTP {exc.response.status_code}: {exc.response.text[:200]}"
except Exception as exc: # noqa: BLE001
return False, str(exc)
finally:
session.close()
def parse_upsert(args: argparse.Namespace) -> Dict[str, str]:
mapping: Dict[str, str] = {}
for item in args.upsert or []:
if "=" not in item:
raise ValueError(f"Invalid upsert mapping '{item}'. Use collection=field or *=field")
collection, field = item.split("=", 1)
mapping[collection.strip()] = field.strip()
return mapping
def infer_collection(file_path: Path, first_record: Optional[Dict]) -> str:
if first_record and first_record.get("@collectionName"):
return first_record["@collectionName"]
return file_path.stem
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Import PocketBase data dumps")
parser.add_argument("base_url", help="PocketBase base URL, e.g. http://127.0.0.1:8090")
parser.add_argument("input_path", help="Directory or file with export data")
parser.add_argument("--email", help="Admin email for authentication")
parser.add_argument("--password", help="Admin password (omit to prompt)")
parser.add_argument("--collections", help="Comma-separated collections to include")
parser.add_argument("--exclude", help="Comma-separated collections to skip")
parser.add_argument("--upsert", action="append", help="collection=field mapping (use *=field for default)")
parser.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE, help="Records per batch")
parser.add_argument("--concurrency", type=int, default=4, help="Concurrent workers per batch")
parser.add_argument("--throttle", type=float, default=0.0, help="Seconds to sleep between batches")
parser.add_argument("--dry-run", action="store_true", help="Parse files without writing to PocketBase")
parser.add_argument("--skip-missing", action="store_true", help="Skip files whose collections do not exist")
return parser.parse_args()
def main():
args = parse_args()
base_url = args.base_url.rstrip("/")
input_path = Path(args.input_path)
if not input_path.exists():
raise SystemExit(f"Input path {input_path} does not exist")
headers = authenticate(base_url, args.email, args.password)
collections = list_collections(base_url, headers)
include = {c.strip() for c in args.collections.split(",")} if args.collections else None
exclude = {c.strip() for c in args.exclude.split(",")} if args.exclude else set()
upsert_map = parse_upsert(args)
if input_path.is_file():
files = [input_path]
else:
files = sorted(
p for p in input_path.iterdir() if p.is_file() and p.suffix.lower() in {".json", ".ndjson"}
)
if not files:
raise SystemExit("No data files found")
for file_path in files:
if file_path.stem == "manifest":
continue
if file_path.suffix.lower() == ".ndjson":
iterator = iter_ndjson(file_path)
peeked: List[Dict] = []
try:
first_record = next(iterator)
peeked.append(first_record)
except StopIteration:
print(f"Skipping {file_path.name}: no records")
continue
source_iter = prepend_items(peeked, iterator)
meta_collection = None
else:
records, meta_collection = load_json_records(file_path)
if not records:
print(f"Skipping {file_path.name}: no records")
continue
first_record = records[0]
source_iter = iter(records)
collection = meta_collection or infer_collection(file_path, first_record)
if include and collection not in include:
continue
if collection in exclude:
continue
if collection not in collections:
if args.skip_missing:
print(f"Skipping {file_path.name}: collection '{collection}' not found")
continue
raise SystemExit(f"Collection '{collection}' not found in PocketBase")
print(f"Importing {file_path.name} -> {collection}")
total = success = 0
failures: List[str] = []
field = upsert_map.get(collection, upsert_map.get("*"))
source_iter = prepend_items(peeked, iterator)
for batch in chunked(source_iter, max(args.batch_size, 1)):
workers = max(args.concurrency, 1)
if workers == 1:
for record in batch:
ok, error = process_record(base_url, collection, record, headers, field, args.dry_run)
total += 1
success += int(ok)
if not ok and error:
failures.append(error)
else:
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = {
executor.submit(
process_record,
base_url,
collection,
record,
headers,
field,
args.dry_run,
): record
for record in batch
}
for future in as_completed(futures):
ok, error = future.result()
total += 1
success += int(ok)
if not ok and error:
failures.append(error)
if args.throttle > 0:
time.sleep(args.throttle)
print(f" {success}/{total} records processed")
if failures:
print(f" {len(failures)} failures (showing up to 3):")
for message in failures[:3]:
print(f" - {message}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,60 @@
#!/bin/bash
# PocketBase Docker Setup Script
# Quickly spin up a PocketBase instance with Docker
set -e
echo "🚀 Setting up PocketBase with Docker..."
echo "========================================"
# Configuration
CONTAINER_NAME="pocketbase"
PORT="8090"
DATA_DIR="./pb_data"
# Check if Docker is installed
if ! command -v docker &> /dev/null; then
echo "❌ Docker is not installed. Please install Docker first."
echo " Visit: https://docs.docker.com/get-docker/"
exit 1
fi
# Stop and remove existing container if it exists
if docker ps -a | grep -q "$CONTAINER_NAME"; then
echo "⚠️ Stopping existing PocketBase container..."
docker stop "$CONTAINER_NAME" > /dev/null 2>&1
docker rm "$CONTAINER_NAME" > /dev/null 2>&1
fi
# Create data directory
echo "📁 Creating data directory: $DATA_DIR"
mkdir -p "$DATA_DIR"
# Start new container
echo "🐳 Starting PocketBase container..."
docker run -d \
--name "$CONTAINER_NAME" \
-p "$PORT:8090" \
-v "$DATA_DIR:/pb/pb_data" \
ghcr.io/pocketbase/pocketbase:latest serve --http=0.0.0.0:8090
echo "========================================"
echo "✅ PocketBase is starting up!"
echo ""
echo "🌐 Admin UI: http://localhost:$PORT/_/"
echo "📖 API Docs: http://localhost:$PORT/api/docs"
echo "📁 Data directory: $DATA_DIR"
echo ""
echo "To view logs: docker logs -f $CONTAINER_NAME"
echo "To stop: docker stop $CONTAINER_NAME"
echo ""
echo "⏳ Waiting for PocketBase to be ready..."
sleep 3
# Check if container is running
if docker ps | grep -q "$CONTAINER_NAME"; then
echo "✅ PocketBase is running successfully!"
else
echo "❌ Something went wrong. Check logs with: docker logs $CONTAINER_NAME"
exit 1
fi