456 lines
13 KiB
Python
456 lines
13 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Batch process multiple repositories using Repomix.
|
|
|
|
This script processes multiple repositories (local or remote) using the repomix CLI tool.
|
|
Supports configuration through environment variables loaded from multiple .env file locations.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import json
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional, Tuple
|
|
from dataclasses import dataclass
|
|
import argparse
|
|
|
|
|
|
@dataclass
|
|
class RepomixConfig:
|
|
"""Configuration for repomix execution."""
|
|
style: str = "xml"
|
|
output_dir: str = "repomix-output"
|
|
remove_comments: bool = False
|
|
include_pattern: Optional[str] = None
|
|
ignore_pattern: Optional[str] = None
|
|
no_security_check: bool = False
|
|
verbose: bool = False
|
|
|
|
|
|
class EnvLoader:
|
|
"""Load environment variables from multiple .env file locations."""
|
|
|
|
@staticmethod
|
|
def load_env_files() -> Dict[str, str]:
|
|
"""
|
|
Load environment variables from .env files in order of precedence.
|
|
|
|
Order: process.env > skill/.env > skills/.env > .claude/.env
|
|
|
|
Returns:
|
|
Dictionary of environment variables
|
|
"""
|
|
env_vars = {}
|
|
script_dir = Path(__file__).parent.resolve()
|
|
|
|
# Define search paths in reverse order (lowest to highest priority)
|
|
search_paths = [
|
|
script_dir.parent.parent.parent / ".env", # .claude/.env
|
|
script_dir.parent.parent / ".env", # skills/.env
|
|
script_dir.parent / ".env", # skill/.env (repomix/.env)
|
|
]
|
|
|
|
# Load from files (lower priority first)
|
|
for env_path in search_paths:
|
|
if env_path.exists():
|
|
env_vars.update(EnvLoader._parse_env_file(env_path))
|
|
|
|
# Override with process environment (highest priority)
|
|
env_vars.update(os.environ)
|
|
|
|
return env_vars
|
|
|
|
@staticmethod
|
|
def _parse_env_file(path: Path) -> Dict[str, str]:
|
|
"""
|
|
Parse a .env file and return key-value pairs.
|
|
|
|
Args:
|
|
path: Path to .env file
|
|
|
|
Returns:
|
|
Dictionary of environment variables
|
|
"""
|
|
env_vars = {}
|
|
try:
|
|
with open(path, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
# Skip comments and empty lines
|
|
if not line or line.startswith('#'):
|
|
continue
|
|
# Parse KEY=VALUE
|
|
if '=' in line:
|
|
key, value = line.split('=', 1)
|
|
key = key.strip()
|
|
value = value.strip()
|
|
# Remove quotes if present
|
|
if value.startswith('"') and value.endswith('"'):
|
|
value = value[1:-1]
|
|
elif value.startswith("'") and value.endswith("'"):
|
|
value = value[1:-1]
|
|
env_vars[key] = value
|
|
except Exception as e:
|
|
print(f"Warning: Failed to parse {path}: {e}", file=sys.stderr)
|
|
|
|
return env_vars
|
|
|
|
|
|
class RepomixBatchProcessor:
|
|
"""Process multiple repositories with repomix."""
|
|
|
|
def __init__(self, config: RepomixConfig):
|
|
"""
|
|
Initialize batch processor.
|
|
|
|
Args:
|
|
config: Repomix configuration
|
|
"""
|
|
self.config = config
|
|
self.env_vars = EnvLoader.load_env_files()
|
|
|
|
def check_repomix_installed(self) -> bool:
|
|
"""
|
|
Check if repomix is installed and accessible.
|
|
|
|
Returns:
|
|
True if repomix is installed, False otherwise
|
|
"""
|
|
try:
|
|
result = subprocess.run(
|
|
["repomix", "--version"],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=5,
|
|
env=self.env_vars
|
|
)
|
|
return result.returncode == 0
|
|
except (subprocess.SubprocessError, FileNotFoundError):
|
|
return False
|
|
|
|
def process_repository(
|
|
self,
|
|
repo_path: str,
|
|
output_name: Optional[str] = None,
|
|
is_remote: bool = False
|
|
) -> Tuple[bool, str]:
|
|
"""
|
|
Process a single repository with repomix.
|
|
|
|
Args:
|
|
repo_path: Path to local repository or remote repository URL
|
|
output_name: Custom output filename (optional)
|
|
is_remote: Whether repo_path is a remote URL
|
|
|
|
Returns:
|
|
Tuple of (success, message)
|
|
"""
|
|
# Create output directory if it doesn't exist
|
|
output_dir = Path(self.config.output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
# Determine output filename
|
|
if output_name:
|
|
output_file = output_dir / output_name
|
|
else:
|
|
if is_remote:
|
|
# Extract repo name from URL
|
|
repo_name = repo_path.rstrip('/').split('/')[-1]
|
|
else:
|
|
repo_name = Path(repo_path).name
|
|
|
|
extension = self._get_extension(self.config.style)
|
|
output_file = output_dir / f"{repo_name}-output.{extension}"
|
|
|
|
# Build repomix command
|
|
cmd = self._build_command(repo_path, output_file, is_remote)
|
|
|
|
if self.config.verbose:
|
|
print(f"Executing: {' '.join(cmd)}")
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300, # 5 minute timeout
|
|
env=self.env_vars
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
return True, f"Successfully processed {repo_path} -> {output_file}"
|
|
else:
|
|
error_msg = result.stderr or result.stdout or "Unknown error"
|
|
return False, f"Failed to process {repo_path}: {error_msg}"
|
|
|
|
except subprocess.TimeoutExpired:
|
|
return False, f"Timeout processing {repo_path} (exceeded 5 minutes)"
|
|
except Exception as e:
|
|
return False, f"Error processing {repo_path}: {str(e)}"
|
|
|
|
def _build_command(
|
|
self,
|
|
repo_path: str,
|
|
output_file: Path,
|
|
is_remote: bool
|
|
) -> List[str]:
|
|
"""
|
|
Build repomix command with configuration options.
|
|
|
|
Args:
|
|
repo_path: Path to repository
|
|
output_file: Output file path
|
|
is_remote: Whether this is a remote repository
|
|
|
|
Returns:
|
|
Command as list of strings
|
|
"""
|
|
cmd = ["npx" if is_remote else "repomix"]
|
|
|
|
if is_remote:
|
|
cmd.extend(["repomix", "--remote", repo_path])
|
|
else:
|
|
cmd.append(repo_path)
|
|
|
|
# Add configuration options
|
|
cmd.extend(["--style", self.config.style])
|
|
cmd.extend(["-o", str(output_file)])
|
|
|
|
if self.config.remove_comments:
|
|
cmd.append("--remove-comments")
|
|
|
|
if self.config.include_pattern:
|
|
cmd.extend(["--include", self.config.include_pattern])
|
|
|
|
if self.config.ignore_pattern:
|
|
cmd.extend(["-i", self.config.ignore_pattern])
|
|
|
|
if self.config.no_security_check:
|
|
cmd.append("--no-security-check")
|
|
|
|
if self.config.verbose:
|
|
cmd.append("--verbose")
|
|
|
|
return cmd
|
|
|
|
@staticmethod
|
|
def _get_extension(style: str) -> str:
|
|
"""
|
|
Get file extension for output style.
|
|
|
|
Args:
|
|
style: Output style (xml, markdown, json, plain)
|
|
|
|
Returns:
|
|
File extension
|
|
"""
|
|
extensions = {
|
|
"xml": "xml",
|
|
"markdown": "md",
|
|
"json": "json",
|
|
"plain": "txt"
|
|
}
|
|
return extensions.get(style, "xml")
|
|
|
|
def process_batch(
|
|
self,
|
|
repositories: List[Dict[str, str]]
|
|
) -> Dict[str, List[str]]:
|
|
"""
|
|
Process multiple repositories.
|
|
|
|
Args:
|
|
repositories: List of repository configurations
|
|
Each dict should contain:
|
|
- 'path': Repository path or URL
|
|
- 'output': Optional output filename
|
|
- 'remote': Optional boolean for remote repos
|
|
|
|
Returns:
|
|
Dictionary with 'success' and 'failed' lists
|
|
"""
|
|
results = {"success": [], "failed": []}
|
|
|
|
for repo in repositories:
|
|
repo_path = repo.get("path")
|
|
if not repo_path:
|
|
results["failed"].append("Missing 'path' in repository config")
|
|
continue
|
|
|
|
output_name = repo.get("output")
|
|
is_remote = repo.get("remote", False)
|
|
|
|
success, message = self.process_repository(
|
|
repo_path,
|
|
output_name,
|
|
is_remote
|
|
)
|
|
|
|
if success:
|
|
results["success"].append(message)
|
|
else:
|
|
results["failed"].append(message)
|
|
|
|
print(message)
|
|
|
|
return results
|
|
|
|
|
|
def load_repositories_from_file(file_path: str) -> List[Dict[str, str]]:
|
|
"""
|
|
Load repository configurations from JSON file.
|
|
|
|
Expected format:
|
|
[
|
|
{"path": "/path/to/repo", "output": "custom.xml"},
|
|
{"path": "owner/repo", "remote": true},
|
|
...
|
|
]
|
|
|
|
Args:
|
|
file_path: Path to JSON file
|
|
|
|
Returns:
|
|
List of repository configurations
|
|
"""
|
|
try:
|
|
with open(file_path, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
if isinstance(data, list):
|
|
return data
|
|
else:
|
|
print(f"Error: Expected array in {file_path}", file=sys.stderr)
|
|
return []
|
|
except json.JSONDecodeError as e:
|
|
print(f"Error: Invalid JSON in {file_path}: {e}", file=sys.stderr)
|
|
return []
|
|
except Exception as e:
|
|
print(f"Error: Failed to read {file_path}: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
def main():
|
|
"""Main entry point for the script."""
|
|
parser = argparse.ArgumentParser(
|
|
description="Batch process multiple repositories with repomix"
|
|
)
|
|
|
|
# Input options
|
|
parser.add_argument(
|
|
"repos",
|
|
nargs="*",
|
|
help="Repository paths or URLs to process"
|
|
)
|
|
parser.add_argument(
|
|
"-f", "--file",
|
|
help="JSON file containing repository configurations"
|
|
)
|
|
|
|
# Output options
|
|
parser.add_argument(
|
|
"--style",
|
|
choices=["xml", "markdown", "json", "plain"],
|
|
default="xml",
|
|
help="Output format (default: xml)"
|
|
)
|
|
parser.add_argument(
|
|
"-o", "--output-dir",
|
|
default="repomix-output",
|
|
help="Output directory (default: repomix-output)"
|
|
)
|
|
|
|
# Processing options
|
|
parser.add_argument(
|
|
"--remove-comments",
|
|
action="store_true",
|
|
help="Remove comments from source files"
|
|
)
|
|
parser.add_argument(
|
|
"--include",
|
|
help="Include pattern (glob)"
|
|
)
|
|
parser.add_argument(
|
|
"--ignore",
|
|
help="Ignore pattern (glob)"
|
|
)
|
|
parser.add_argument(
|
|
"--no-security-check",
|
|
action="store_true",
|
|
help="Disable security checks"
|
|
)
|
|
parser.add_argument(
|
|
"-v", "--verbose",
|
|
action="store_true",
|
|
help="Verbose output"
|
|
)
|
|
parser.add_argument(
|
|
"--remote",
|
|
action="store_true",
|
|
help="Treat all repos as remote URLs"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Create configuration
|
|
config = RepomixConfig(
|
|
style=args.style,
|
|
output_dir=args.output_dir,
|
|
remove_comments=args.remove_comments,
|
|
include_pattern=args.include,
|
|
ignore_pattern=args.ignore,
|
|
no_security_check=args.no_security_check,
|
|
verbose=args.verbose
|
|
)
|
|
|
|
# Initialize processor
|
|
processor = RepomixBatchProcessor(config)
|
|
|
|
# Check if repomix is installed
|
|
if not processor.check_repomix_installed():
|
|
print("Error: repomix is not installed or not in PATH", file=sys.stderr)
|
|
print("Install with: npm install -g repomix", file=sys.stderr)
|
|
return 1
|
|
|
|
# Collect repositories to process
|
|
repositories = []
|
|
|
|
# Load from file if specified
|
|
if args.file:
|
|
repositories.extend(load_repositories_from_file(args.file))
|
|
|
|
# Add command line repositories
|
|
if args.repos:
|
|
for repo_path in args.repos:
|
|
repositories.append({
|
|
"path": repo_path,
|
|
"remote": args.remote
|
|
})
|
|
|
|
# Validate we have repositories to process
|
|
if not repositories:
|
|
print("Error: No repositories specified", file=sys.stderr)
|
|
print("Use: repomix_batch.py <repo1> <repo2> ...", file=sys.stderr)
|
|
print("Or: repomix_batch.py -f repos.json", file=sys.stderr)
|
|
return 1
|
|
|
|
# Process batch
|
|
print(f"Processing {len(repositories)} repositories...")
|
|
results = processor.process_batch(repositories)
|
|
|
|
# Print summary
|
|
print("\n" + "=" * 50)
|
|
print(f"Success: {len(results['success'])}")
|
|
print(f"Failed: {len(results['failed'])}")
|
|
|
|
if results['failed']:
|
|
print("\nFailed repositories:")
|
|
for failure in results['failed']:
|
|
print(f" - {failure}")
|
|
|
|
return 0 if not results['failed'] else 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|