Initial commit
This commit is contained in:
455
skills/repomix/scripts/repomix_batch.py
Normal file
455
skills/repomix/scripts/repomix_batch.py
Normal file
@@ -0,0 +1,455 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Batch process multiple repositories using Repomix.
|
||||
|
||||
This script processes multiple repositories (local or remote) using the repomix CLI tool.
|
||||
Supports configuration through environment variables loaded from multiple .env file locations.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import subprocess
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
import argparse
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepomixConfig:
|
||||
"""Configuration for repomix execution."""
|
||||
style: str = "xml"
|
||||
output_dir: str = "repomix-output"
|
||||
remove_comments: bool = False
|
||||
include_pattern: Optional[str] = None
|
||||
ignore_pattern: Optional[str] = None
|
||||
no_security_check: bool = False
|
||||
verbose: bool = False
|
||||
|
||||
|
||||
class EnvLoader:
|
||||
"""Load environment variables from multiple .env file locations."""
|
||||
|
||||
@staticmethod
|
||||
def load_env_files() -> Dict[str, str]:
|
||||
"""
|
||||
Load environment variables from .env files in order of precedence.
|
||||
|
||||
Order: process.env > skill/.env > skills/.env > .claude/.env
|
||||
|
||||
Returns:
|
||||
Dictionary of environment variables
|
||||
"""
|
||||
env_vars = {}
|
||||
script_dir = Path(__file__).parent.resolve()
|
||||
|
||||
# Define search paths in reverse order (lowest to highest priority)
|
||||
search_paths = [
|
||||
script_dir.parent.parent.parent / ".env", # .claude/.env
|
||||
script_dir.parent.parent / ".env", # skills/.env
|
||||
script_dir.parent / ".env", # skill/.env (repomix/.env)
|
||||
]
|
||||
|
||||
# Load from files (lower priority first)
|
||||
for env_path in search_paths:
|
||||
if env_path.exists():
|
||||
env_vars.update(EnvLoader._parse_env_file(env_path))
|
||||
|
||||
# Override with process environment (highest priority)
|
||||
env_vars.update(os.environ)
|
||||
|
||||
return env_vars
|
||||
|
||||
@staticmethod
|
||||
def _parse_env_file(path: Path) -> Dict[str, str]:
|
||||
"""
|
||||
Parse a .env file and return key-value pairs.
|
||||
|
||||
Args:
|
||||
path: Path to .env file
|
||||
|
||||
Returns:
|
||||
Dictionary of environment variables
|
||||
"""
|
||||
env_vars = {}
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
# Skip comments and empty lines
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
# Parse KEY=VALUE
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
key = key.strip()
|
||||
value = value.strip()
|
||||
# Remove quotes if present
|
||||
if value.startswith('"') and value.endswith('"'):
|
||||
value = value[1:-1]
|
||||
elif value.startswith("'") and value.endswith("'"):
|
||||
value = value[1:-1]
|
||||
env_vars[key] = value
|
||||
except Exception as e:
|
||||
print(f"Warning: Failed to parse {path}: {e}", file=sys.stderr)
|
||||
|
||||
return env_vars
|
||||
|
||||
|
||||
class RepomixBatchProcessor:
|
||||
"""Process multiple repositories with repomix."""
|
||||
|
||||
def __init__(self, config: RepomixConfig):
|
||||
"""
|
||||
Initialize batch processor.
|
||||
|
||||
Args:
|
||||
config: Repomix configuration
|
||||
"""
|
||||
self.config = config
|
||||
self.env_vars = EnvLoader.load_env_files()
|
||||
|
||||
def check_repomix_installed(self) -> bool:
|
||||
"""
|
||||
Check if repomix is installed and accessible.
|
||||
|
||||
Returns:
|
||||
True if repomix is installed, False otherwise
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["repomix", "--version"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5,
|
||||
env=self.env_vars
|
||||
)
|
||||
return result.returncode == 0
|
||||
except (subprocess.SubprocessError, FileNotFoundError):
|
||||
return False
|
||||
|
||||
def process_repository(
|
||||
self,
|
||||
repo_path: str,
|
||||
output_name: Optional[str] = None,
|
||||
is_remote: bool = False
|
||||
) -> Tuple[bool, str]:
|
||||
"""
|
||||
Process a single repository with repomix.
|
||||
|
||||
Args:
|
||||
repo_path: Path to local repository or remote repository URL
|
||||
output_name: Custom output filename (optional)
|
||||
is_remote: Whether repo_path is a remote URL
|
||||
|
||||
Returns:
|
||||
Tuple of (success, message)
|
||||
"""
|
||||
# Create output directory if it doesn't exist
|
||||
output_dir = Path(self.config.output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Determine output filename
|
||||
if output_name:
|
||||
output_file = output_dir / output_name
|
||||
else:
|
||||
if is_remote:
|
||||
# Extract repo name from URL
|
||||
repo_name = repo_path.rstrip('/').split('/')[-1]
|
||||
else:
|
||||
repo_name = Path(repo_path).name
|
||||
|
||||
extension = self._get_extension(self.config.style)
|
||||
output_file = output_dir / f"{repo_name}-output.{extension}"
|
||||
|
||||
# Build repomix command
|
||||
cmd = self._build_command(repo_path, output_file, is_remote)
|
||||
|
||||
if self.config.verbose:
|
||||
print(f"Executing: {' '.join(cmd)}")
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300, # 5 minute timeout
|
||||
env=self.env_vars
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
return True, f"Successfully processed {repo_path} -> {output_file}"
|
||||
else:
|
||||
error_msg = result.stderr or result.stdout or "Unknown error"
|
||||
return False, f"Failed to process {repo_path}: {error_msg}"
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, f"Timeout processing {repo_path} (exceeded 5 minutes)"
|
||||
except Exception as e:
|
||||
return False, f"Error processing {repo_path}: {str(e)}"
|
||||
|
||||
def _build_command(
|
||||
self,
|
||||
repo_path: str,
|
||||
output_file: Path,
|
||||
is_remote: bool
|
||||
) -> List[str]:
|
||||
"""
|
||||
Build repomix command with configuration options.
|
||||
|
||||
Args:
|
||||
repo_path: Path to repository
|
||||
output_file: Output file path
|
||||
is_remote: Whether this is a remote repository
|
||||
|
||||
Returns:
|
||||
Command as list of strings
|
||||
"""
|
||||
cmd = ["npx" if is_remote else "repomix"]
|
||||
|
||||
if is_remote:
|
||||
cmd.extend(["repomix", "--remote", repo_path])
|
||||
else:
|
||||
cmd.append(repo_path)
|
||||
|
||||
# Add configuration options
|
||||
cmd.extend(["--style", self.config.style])
|
||||
cmd.extend(["-o", str(output_file)])
|
||||
|
||||
if self.config.remove_comments:
|
||||
cmd.append("--remove-comments")
|
||||
|
||||
if self.config.include_pattern:
|
||||
cmd.extend(["--include", self.config.include_pattern])
|
||||
|
||||
if self.config.ignore_pattern:
|
||||
cmd.extend(["-i", self.config.ignore_pattern])
|
||||
|
||||
if self.config.no_security_check:
|
||||
cmd.append("--no-security-check")
|
||||
|
||||
if self.config.verbose:
|
||||
cmd.append("--verbose")
|
||||
|
||||
return cmd
|
||||
|
||||
@staticmethod
|
||||
def _get_extension(style: str) -> str:
|
||||
"""
|
||||
Get file extension for output style.
|
||||
|
||||
Args:
|
||||
style: Output style (xml, markdown, json, plain)
|
||||
|
||||
Returns:
|
||||
File extension
|
||||
"""
|
||||
extensions = {
|
||||
"xml": "xml",
|
||||
"markdown": "md",
|
||||
"json": "json",
|
||||
"plain": "txt"
|
||||
}
|
||||
return extensions.get(style, "xml")
|
||||
|
||||
def process_batch(
|
||||
self,
|
||||
repositories: List[Dict[str, str]]
|
||||
) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Process multiple repositories.
|
||||
|
||||
Args:
|
||||
repositories: List of repository configurations
|
||||
Each dict should contain:
|
||||
- 'path': Repository path or URL
|
||||
- 'output': Optional output filename
|
||||
- 'remote': Optional boolean for remote repos
|
||||
|
||||
Returns:
|
||||
Dictionary with 'success' and 'failed' lists
|
||||
"""
|
||||
results = {"success": [], "failed": []}
|
||||
|
||||
for repo in repositories:
|
||||
repo_path = repo.get("path")
|
||||
if not repo_path:
|
||||
results["failed"].append("Missing 'path' in repository config")
|
||||
continue
|
||||
|
||||
output_name = repo.get("output")
|
||||
is_remote = repo.get("remote", False)
|
||||
|
||||
success, message = self.process_repository(
|
||||
repo_path,
|
||||
output_name,
|
||||
is_remote
|
||||
)
|
||||
|
||||
if success:
|
||||
results["success"].append(message)
|
||||
else:
|
||||
results["failed"].append(message)
|
||||
|
||||
print(message)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def load_repositories_from_file(file_path: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Load repository configurations from JSON file.
|
||||
|
||||
Expected format:
|
||||
[
|
||||
{"path": "/path/to/repo", "output": "custom.xml"},
|
||||
{"path": "owner/repo", "remote": true},
|
||||
...
|
||||
]
|
||||
|
||||
Args:
|
||||
file_path: Path to JSON file
|
||||
|
||||
Returns:
|
||||
List of repository configurations
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, list):
|
||||
return data
|
||||
else:
|
||||
print(f"Error: Expected array in {file_path}", file=sys.stderr)
|
||||
return []
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error: Invalid JSON in {file_path}: {e}", file=sys.stderr)
|
||||
return []
|
||||
except Exception as e:
|
||||
print(f"Error: Failed to read {file_path}: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for the script."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Batch process multiple repositories with repomix"
|
||||
)
|
||||
|
||||
# Input options
|
||||
parser.add_argument(
|
||||
"repos",
|
||||
nargs="*",
|
||||
help="Repository paths or URLs to process"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f", "--file",
|
||||
help="JSON file containing repository configurations"
|
||||
)
|
||||
|
||||
# Output options
|
||||
parser.add_argument(
|
||||
"--style",
|
||||
choices=["xml", "markdown", "json", "plain"],
|
||||
default="xml",
|
||||
help="Output format (default: xml)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output-dir",
|
||||
default="repomix-output",
|
||||
help="Output directory (default: repomix-output)"
|
||||
)
|
||||
|
||||
# Processing options
|
||||
parser.add_argument(
|
||||
"--remove-comments",
|
||||
action="store_true",
|
||||
help="Remove comments from source files"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--include",
|
||||
help="Include pattern (glob)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ignore",
|
||||
help="Ignore pattern (glob)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-security-check",
|
||||
action="store_true",
|
||||
help="Disable security checks"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v", "--verbose",
|
||||
action="store_true",
|
||||
help="Verbose output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--remote",
|
||||
action="store_true",
|
||||
help="Treat all repos as remote URLs"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create configuration
|
||||
config = RepomixConfig(
|
||||
style=args.style,
|
||||
output_dir=args.output_dir,
|
||||
remove_comments=args.remove_comments,
|
||||
include_pattern=args.include,
|
||||
ignore_pattern=args.ignore,
|
||||
no_security_check=args.no_security_check,
|
||||
verbose=args.verbose
|
||||
)
|
||||
|
||||
# Initialize processor
|
||||
processor = RepomixBatchProcessor(config)
|
||||
|
||||
# Check if repomix is installed
|
||||
if not processor.check_repomix_installed():
|
||||
print("Error: repomix is not installed or not in PATH", file=sys.stderr)
|
||||
print("Install with: npm install -g repomix", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Collect repositories to process
|
||||
repositories = []
|
||||
|
||||
# Load from file if specified
|
||||
if args.file:
|
||||
repositories.extend(load_repositories_from_file(args.file))
|
||||
|
||||
# Add command line repositories
|
||||
if args.repos:
|
||||
for repo_path in args.repos:
|
||||
repositories.append({
|
||||
"path": repo_path,
|
||||
"remote": args.remote
|
||||
})
|
||||
|
||||
# Validate we have repositories to process
|
||||
if not repositories:
|
||||
print("Error: No repositories specified", file=sys.stderr)
|
||||
print("Use: repomix_batch.py <repo1> <repo2> ...", file=sys.stderr)
|
||||
print("Or: repomix_batch.py -f repos.json", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Process batch
|
||||
print(f"Processing {len(repositories)} repositories...")
|
||||
results = processor.process_batch(repositories)
|
||||
|
||||
# Print summary
|
||||
print("\n" + "=" * 50)
|
||||
print(f"Success: {len(results['success'])}")
|
||||
print(f"Failed: {len(results['failed'])}")
|
||||
|
||||
if results['failed']:
|
||||
print("\nFailed repositories:")
|
||||
for failure in results['failed']:
|
||||
print(f" - {failure}")
|
||||
|
||||
return 0 if not results['failed'] else 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user