Initial commit

2025-11-30 08:48:27 +08:00
commit f9789b0110
16 changed files with 4715 additions and 0 deletions
--- a/skills/dependency-evaluator/scripts/dependency_evaluator.py
+++ b/skills/dependency-evaluator/scripts/dependency_evaluator.py
@@ -0,0 +1,419 @@
+#!/usr/bin/env python3
+"""
+Dependency evaluator script for gathering package ecosystem data.
+Automates command execution and data collection for dependency analysis.
+
+Uses only Python standard library - no external dependencies required.
+"""
+
+import argparse
+import json
+import subprocess
+import sys
+import urllib.request
+import urllib.error
+from datetime import datetime
+from typing import Dict, List, Any, Optional, Tuple
+
+
+class DependencyEvaluator:
+    """Main class for evaluating dependencies across package ecosystems."""
+
+    def __init__(self, package_name: str, ecosystem: str):
+        """
+        Initialize the dependency evaluator.
+
+        Args:
+            package_name: Name of the package to evaluate
+            ecosystem: Package ecosystem (npm, pypi, cargo, go)
+        """
+        self.package_name = package_name
+        self.ecosystem = ecosystem.lower()
+        self.errors: List[str] = []
+        self.warnings: List[str] = []
+
+    def run_command(self, cmd: List[str], timeout: int = 30) -> Tuple[bool, str, str]:
+        """
+        Execute a shell command and return results.
+
+        Args:
+            cmd: Command and arguments as list
+            timeout: Command timeout in seconds
+
+        Returns:
+            Tuple of (success, stdout, stderr)
+        """
+        try:
+            result = subprocess.run(
+                cmd,
+                capture_output=True,
+                text=True,
+                timeout=timeout
+            )
+            return (result.returncode == 0, result.stdout, result.stderr)
+        except subprocess.TimeoutExpired:
+            self.warnings.append(f"Command timed out after {timeout}s: {' '.join(cmd)}")
+            return (False, "", f"Timeout after {timeout}s")
+        except FileNotFoundError:
+            self.warnings.append(f"Command not found: {cmd[0]}")
+            return (False, "", f"Command not found: {cmd[0]}")
+        except Exception as e:
+            self.warnings.append(f"Command failed: {' '.join(cmd)} - {str(e)}")
+            return (False, "", str(e))
+
+    def fetch_url(self, url: str, timeout: int = 10) -> Optional[Dict[str, Any]]:
+        """
+        Fetch JSON data from a URL.
+
+        Args:
+            url: URL to fetch
+            timeout: Request timeout in seconds
+
+        Returns:
+            Parsed JSON data or None on failure
+        """
+        try:
+            req = urllib.request.Request(url)
+            req.add_header('User-Agent', 'dependency-evaluator/1.0')
+
+            with urllib.request.urlopen(req, timeout=timeout) as response:
+                data = response.read().decode('utf-8')
+                return json.loads(data)
+        except urllib.error.HTTPError as e:
+            if e.code == 404:
+                self.errors.append(f"Resource not found: {url}")
+            elif e.code == 403:
+                self.warnings.append(f"Access forbidden (rate limit?): {url}")
+            else:
+                self.warnings.append(f"HTTP {e.code} error fetching {url}")
+            return None
+        except urllib.error.URLError as e:
+            self.warnings.append(f"Network error fetching {url}: {str(e)}")
+            return None
+        except json.JSONDecodeError as e:
+            self.warnings.append(f"Invalid JSON from {url}: {str(e)}")
+            return None
+        except Exception as e:
+            self.warnings.append(f"Error fetching {url}: {str(e)}")
+            return None
+
+    def gather_npm_data(self) -> Dict[str, Any]:
+        """Gather data for npm packages."""
+        data = {}
+
+        # Get package metadata
+        success, stdout, stderr = self.run_command(['npm', 'view', self.package_name, '--json'])
+        if success and stdout:
+            try:
+                npm_data = json.loads(stdout)
+                data['latest_version'] = npm_data.get('version', '')
+                data['license'] = npm_data.get('license', '')
+                data['description'] = npm_data.get('description', '')
+                data['homepage'] = npm_data.get('homepage', '')
+                data['repository_url'] = npm_data.get('repository', {}).get('url', '') if isinstance(npm_data.get('repository'), dict) else npm_data.get('repository', '')
+                data['maintainers'] = npm_data.get('maintainers', [])
+                data['keywords'] = npm_data.get('keywords', [])
+            except json.JSONDecodeError:
+                self.warnings.append("Failed to parse npm view output")
+
+        # Get version history
+        success, stdout, stderr = self.run_command(['npm', 'view', self.package_name, 'time', '--json'])
+        if success and stdout:
+            try:
+                time_data = json.loads(stdout)
+                data['publish_history'] = time_data
+                data['versions_count'] = len([k for k in time_data.keys() if k not in ['created', 'modified']])
+            except json.JSONDecodeError:
+                self.warnings.append("Failed to parse npm time output")
+
+        # Get all versions
+        success, stdout, stderr = self.run_command(['npm', 'view', self.package_name, 'versions', '--json'])
+        if success and stdout:
+            try:
+                versions = json.loads(stdout)
+                data['all_versions'] = versions if isinstance(versions, list) else [versions]
+            except json.JSONDecodeError:
+                self.warnings.append("Failed to parse npm versions output")
+
+        return data
+
+    def gather_pypi_data(self) -> Dict[str, Any]:
+        """Gather data for PyPI packages."""
+        data = {}
+
+        # Use PyPI JSON API
+        pypi_url = f"https://pypi.org/pypi/{self.package_name}/json"
+        pypi_data = self.fetch_url(pypi_url)
+
+        if pypi_data:
+            info = pypi_data.get('info', {})
+            data['latest_version'] = info.get('version', '')
+            data['license'] = info.get('license', '')
+            data['description'] = info.get('summary', '')
+            data['homepage'] = info.get('home_page', '')
+            data['repository_url'] = info.get('project_urls', {}).get('Source', info.get('project_url', ''))
+            data['author'] = info.get('author', '')
+            data['keywords'] = info.get('keywords', '').split(',') if info.get('keywords') else []
+
+            # Get release history
+            releases = pypi_data.get('releases', {})
+            data['versions_count'] = len(releases)
+            data['publish_history'] = {
+                version: release_list[0].get('upload_time', '') if release_list else ''
+                for version, release_list in releases.items()
+            }
+
+        return data
+
+    def gather_cargo_data(self) -> Dict[str, Any]:
+        """Gather data for Cargo/Rust crates."""
+        data = {}
+
+        # Use crates.io API
+        crates_url = f"https://crates.io/api/v1/crates/{self.package_name}"
+        crate_data = self.fetch_url(crates_url)
+
+        if crate_data and 'crate' in crate_data:
+            crate = crate_data['crate']
+            data['latest_version'] = crate.get('max_version', '')
+            data['license'] = ', '.join(crate.get('license', '').split(' OR '))
+            data['description'] = crate.get('description', '')
+            data['homepage'] = crate.get('homepage', '')
+            data['repository_url'] = crate.get('repository', '')
+            data['downloads'] = crate.get('downloads', 0)
+            data['recent_downloads'] = crate.get('recent_downloads', 0)
+
+            # Get versions
+            versions_url = f"https://crates.io/api/v1/crates/{self.package_name}/versions"
+            versions_data = self.fetch_url(versions_url)
+            if versions_data and 'versions' in versions_data:
+                data['versions_count'] = len(versions_data['versions'])
+                data['all_versions'] = [v.get('num', '') for v in versions_data['versions']]
+
+        return data
+
+    def gather_go_data(self) -> Dict[str, Any]:
+        """Gather data for Go modules."""
+        data = {}
+
+        # Try go list command
+        success, stdout, stderr = self.run_command(['go', 'list', '-m', '-json', self.package_name])
+        if success and stdout:
+            try:
+                go_data = json.loads(stdout)
+                data['module_path'] = go_data.get('Path', '')
+                data['latest_version'] = go_data.get('Version', '')
+                data['time'] = go_data.get('Time', '')
+            except json.JSONDecodeError:
+                self.warnings.append("Failed to parse go list output")
+
+        return data
+
+    def extract_github_repo(self, repo_url: str) -> Optional[Tuple[str, str]]:
+        """
+        Extract owner and repo name from GitHub URL.
+
+        Args:
+            repo_url: GitHub repository URL
+
+        Returns:
+            Tuple of (owner, repo) or None
+        """
+        if not repo_url:
+            return None
+
+        # Handle various GitHub URL formats
+        import re
+        patterns = [
+            r'github\.com[:/]([^/]+)/([^/\.]+)',
+            r'github\.com/([^/]+)/([^/\.]+)',
+        ]
+
+        for pattern in patterns:
+            match = re.search(pattern, repo_url)
+            if match:
+                owner, repo = match.groups()
+                # Remove .git suffix if present
+                repo = repo.replace('.git', '')
+                return (owner, repo)
+
+        return None
+
+    def gather_github_data(self, repo_url: str) -> Dict[str, Any]:
+        """
+        Gather data from GitHub repository.
+
+        Args:
+            repo_url: GitHub repository URL
+
+        Returns:
+            Dictionary of GitHub data
+        """
+        data = {}
+
+        github_info = self.extract_github_repo(repo_url)
+        if not github_info:
+            self.warnings.append(f"Could not parse GitHub URL: {repo_url}")
+            return data
+
+        owner, repo = github_info
+        data['repository_url'] = f"https://github.com/{owner}/{repo}"
+
+        # Try using gh CLI first
+        success, stdout, stderr = self.run_command(['gh', 'api', f'repos/{owner}/{repo}'])
+        if success and stdout:
+            try:
+                repo_data = json.loads(stdout)
+                data['pushed_at'] = repo_data.get('pushed_at', '')
+                data['open_issues_count'] = repo_data.get('open_issues_count', 0)
+                data['stargazers_count'] = repo_data.get('stargazers_count', 0)
+                data['forks_count'] = repo_data.get('forks_count', 0)
+                data['watchers_count'] = repo_data.get('watchers_count', 0)
+                data['default_branch'] = repo_data.get('default_branch', '')
+            except json.JSONDecodeError:
+                self.warnings.append("Failed to parse gh api output")
+        else:
+            # Fallback to direct API call
+            api_url = f"https://api.github.com/repos/{owner}/{repo}"
+            repo_data = self.fetch_url(api_url)
+            if repo_data:
+                data['pushed_at'] = repo_data.get('pushed_at', '')
+                data['open_issues_count'] = repo_data.get('open_issues_count', 0)
+                data['stargazers_count'] = repo_data.get('stargazers_count', 0)
+                data['forks_count'] = repo_data.get('forks_count', 0)
+                data['watchers_count'] = repo_data.get('watchers_count', 0)
+                data['default_branch'] = repo_data.get('default_branch', '')
+
+        # Get community health
+        success, stdout, stderr = self.run_command(['gh', 'api', f'repos/{owner}/{repo}/community/profile'])
+        if success and stdout:
+            try:
+                community_data = json.loads(stdout)
+                data['community_health'] = {
+                    'health_percentage': community_data.get('health_percentage', 0),
+                    'files': community_data.get('files', {})
+                }
+            except json.JSONDecodeError:
+                pass
+
+        # Get contributors count
+        success, stdout, stderr = self.run_command(['gh', 'api', f'repos/{owner}/{repo}/contributors', '--jq', 'length'])
+        if success and stdout.strip().isdigit():
+            data['contributors_count'] = int(stdout.strip())
+
+        # Get license
+        success, stdout, stderr = self.run_command(['gh', 'api', f'repos/{owner}/{repo}/license', '--jq', '.license.spdx_id'])
+        if success and stdout.strip():
+            data['license_info'] = {'spdx_id': stdout.strip()}
+
+        return data
+
+    def gather_security_data(self) -> Dict[str, Any]:
+        """Gather security-related data."""
+        data = {}
+
+        if self.ecosystem == 'npm':
+            # Note: npm audit requires package.json, which we don't have in isolation
+            # This is a limitation - would need to create temp package.json
+            self.warnings.append("npm audit requires package.json context - skipping")
+
+        return data
+
+    def gather_dependency_footprint(self) -> Dict[str, Any]:
+        """Gather dependency tree information."""
+        data = {
+            'direct_dependencies': 0,
+            'total_dependencies': 0,
+            'tree_depth': 1
+        }
+
+        if self.ecosystem == 'npm':
+            # npm ls requires the package to be installed
+            self.warnings.append("npm ls requires package installation - skipping")
+
+        return data
+
+    def evaluate(self) -> Dict[str, Any]:
+        """
+        Run the full evaluation and return structured results.
+
+        Returns:
+            Dictionary containing all gathered data
+        """
+        result = {
+            'package': self.package_name,
+            'ecosystem': self.ecosystem,
+            'timestamp': datetime.utcnow().isoformat() + 'Z',
+            'registry_data': {},
+            'github_data': {},
+            'security_data': {},
+            'dependency_footprint': {},
+            'errors': [],
+            'warnings': []
+        }
+
+        # Gather ecosystem-specific data
+        if self.ecosystem == 'npm':
+            result['registry_data'] = self.gather_npm_data()
+        elif self.ecosystem == 'pypi':
+            result['registry_data'] = self.gather_pypi_data()
+        elif self.ecosystem == 'cargo':
+            result['registry_data'] = self.gather_cargo_data()
+        elif self.ecosystem == 'go':
+            result['registry_data'] = self.gather_go_data()
+        else:
+            self.errors.append(f"Unsupported ecosystem: {self.ecosystem}")
+            result['errors'] = self.errors
+            result['warnings'] = self.warnings
+            return result
+
+        # Gather GitHub data if repository URL found
+        repo_url = result['registry_data'].get('repository_url', '')
+        if repo_url and 'github.com' in repo_url:
+            result['github_data'] = self.gather_github_data(repo_url)
+
+        # Gather security data
+        result['security_data'] = self.gather_security_data()
+
+        # Gather dependency footprint
+        result['dependency_footprint'] = self.gather_dependency_footprint()
+
+        # Add errors and warnings
+        result['errors'] = self.errors
+        result['warnings'] = self.warnings
+
+        return result
+
+
+def main():
+    """Main entry point for the script."""
+    parser = argparse.ArgumentParser(
+        description='Evaluate a package dependency across different ecosystems'
+    )
+    parser.add_argument(
+        'package',
+        help='Package name to evaluate'
+    )
+    parser.add_argument(
+        'ecosystem',
+        choices=['npm', 'pypi', 'cargo', 'go'],
+        help='Package ecosystem'
+    )
+
+    args = parser.parse_args()
+
+    evaluator = DependencyEvaluator(args.package, args.ecosystem)
+    result = evaluator.evaluate()
+
+    # Output JSON to stdout
+    print(json.dumps(result, indent=2))
+
+    # Exit with non-zero code if there were errors
+    if result['errors']:
+        sys.exit(1)
+    else:
+        sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()