#!/usr/bin/env python3
"""
Dockerfile Optimization Analyzer

Analyzes Dockerfiles for optimization opportunities including multi-stage builds,
security issues, size reduction, and best practices.

Usage:
    python docker-optimize.py Dockerfile
    python docker-optimize.py --json Dockerfile
    python docker-optimize.py --verbose Dockerfile
"""

import argparse
import json
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple


class DockerfileAnalyzer:
    """Analyze Dockerfile for optimization opportunities."""

    def __init__(self, dockerfile_path: Path, verbose: bool = False):
        """
        Initialize analyzer.

        Args:
            dockerfile_path: Path to Dockerfile
            verbose: Enable verbose output
        """
        self.dockerfile_path = Path(dockerfile_path)
        self.verbose = verbose
        self.lines = []
        self.issues = []
        self.suggestions = []

    def load_dockerfile(self) -> bool:
        """
        Load and parse Dockerfile.

        Returns:
            True if loaded successfully

        Raises:
            FileNotFoundError: If Dockerfile doesn't exist
        """
        if not self.dockerfile_path.exists():
            raise FileNotFoundError(f"Dockerfile not found: {self.dockerfile_path}")

        with open(self.dockerfile_path, 'r') as f:
            self.lines = f.readlines()

        return True

    def analyze_base_image(self) -> None:
        """Check base image for optimization opportunities."""
        for i, line in enumerate(self.lines, 1):
            line = line.strip()
            if line.startswith('FROM'):
                # Check for 'latest' tag
                if ':latest' in line or (': ' not in line and 'AS' not in line and '@' not in line):
                    self.issues.append({
                        'line': i,
                        'severity': 'warning',
                        'category': 'base_image',
                        'message': 'Base image uses :latest or no tag',
                        'suggestion': 'Use specific version tags for reproducibility'
                    })

                # Check for non-alpine/slim variants
                if 'node' in line.lower() and 'alpine' not in line.lower():
                    self.suggestions.append({
                        'line': i,
                        'category': 'size',
                        'message': 'Consider using Alpine variant',
                        'suggestion': 'node:20-alpine is ~10x smaller than node:20'
                    })

    def analyze_multi_stage(self) -> None:
        """Check if multi-stage build is used."""
        from_count = sum(1 for line in self.lines if line.strip().startswith('FROM'))

        if from_count == 1:
            # Check if build tools are installed
            has_build_tools = any(
                any(tool in line.lower() for tool in ['gcc', 'make', 'build-essential', 'npm install', 'pip install'])
                for line in self.lines
            )

            if has_build_tools:
                self.issues.append({
                    'line': 0,
                    'severity': 'warning',
                    'category': 'optimization',
                    'message': 'Single-stage build with build tools',
                    'suggestion': 'Use multi-stage build to exclude build dependencies from final image'
                })

    def analyze_layer_caching(self) -> None:
        """Check for optimal layer caching order."""
        copy_lines = []
        run_lines = []

        for i, line in enumerate(self.lines, 1):
            stripped = line.strip()
            if stripped.startswith('COPY'):
                copy_lines.append((i, stripped))
            elif stripped.startswith('RUN'):
                run_lines.append((i, stripped))

        # Check if dependency files copied before source
        has_package_copy = any('package.json' in line or 'requirements.txt' in line or 'go.mod' in line
                               for _, line in copy_lines)
        has_source_copy = any('COPY . .' in line or 'COPY ./' in line
                              for _, line in copy_lines)

        if has_source_copy and not has_package_copy:
            self.issues.append({
                'line': 0,
                'severity': 'warning',
                'category': 'caching',
                'message': 'Source copied before dependencies',
                'suggestion': 'Copy dependency files first (package.json, requirements.txt) then run install, then copy source'
            })

    def analyze_security(self) -> None:
        """Check for security issues."""
        has_user = any(line.strip().startswith('USER') and 'root' not in line.lower()
                      for line in self.lines)

        if not has_user:
            self.issues.append({
                'line': 0,
                'severity': 'error',
                'category': 'security',
                'message': 'Container runs as root',
                'suggestion': 'Create and use non-root user with USER instruction'
            })

        # Check for secrets in build
        for i, line in enumerate(self.lines, 1):
            if any(secret in line.upper() for secret in ['PASSWORD', 'SECRET', 'TOKEN', 'API_KEY']):
                if 'ENV' in line or 'ARG' in line:
                    self.issues.append({
                        'line': i,
                        'severity': 'error',
                        'category': 'security',
                        'message': 'Potential secret in Dockerfile',
                        'suggestion': 'Use build-time arguments or runtime environment variables'
                    })

    def analyze_apt_cache(self) -> None:
        """Check for apt cache cleanup."""
        for i, line in enumerate(self.lines, 1):
            if 'apt-get install' in line.lower() or 'apt install' in line.lower():
                # Check if same RUN command cleans cache
                if 'rm -rf /var/lib/apt/lists/*' not in line:
                    self.suggestions.append({
                        'line': i,
                        'category': 'size',
                        'message': 'apt cache not cleaned in same layer',
                        'suggestion': 'Add && rm -rf /var/lib/apt/lists/* to reduce image size'
                    })

    def analyze_combine_run(self) -> None:
        """Check for multiple consecutive RUN commands."""
        consecutive_runs = 0
        first_run_line = 0

        for i, line in enumerate(self.lines, 1):
            if line.strip().startswith('RUN'):
                if consecutive_runs == 0:
                    first_run_line = i
                consecutive_runs += 1
            else:
                if consecutive_runs > 1:
                    self.suggestions.append({
                        'line': first_run_line,
                        'category': 'layers',
                        'message': f'{consecutive_runs} consecutive RUN commands',
                        'suggestion': 'Combine related RUN commands with && to reduce layers'
                    })
                consecutive_runs = 0

    def analyze_workdir(self) -> None:
        """Check for WORKDIR usage."""
        has_workdir = any(line.strip().startswith('WORKDIR') for line in self.lines)

        if not has_workdir:
            self.suggestions.append({
                'line': 0,
                'category': 'best_practice',
                'message': 'No WORKDIR specified',
                'suggestion': 'Use WORKDIR to set working directory instead of cd commands'
            })

    def analyze(self) -> Dict:
        """
        Run all analyses.

        Returns:
            Analysis results dictionary
        """
        self.load_dockerfile()

        self.analyze_base_image()
        self.analyze_multi_stage()
        self.analyze_layer_caching()
        self.analyze_security()
        self.analyze_apt_cache()
        self.analyze_combine_run()
        self.analyze_workdir()

        return {
            'dockerfile': str(self.dockerfile_path),
            'total_lines': len(self.lines),
            'issues': self.issues,
            'suggestions': self.suggestions,
            'summary': {
                'errors': len([i for i in self.issues if i.get('severity') == 'error']),
                'warnings': len([i for i in self.issues if i.get('severity') == 'warning']),
                'suggestions': len(self.suggestions)
            }
        }

    def print_results(self, results: Dict) -> None:
        """
        Print analysis results in human-readable format.

        Args:
            results: Analysis results from analyze()
        """
        print(f"\nDockerfile Analysis: {results['dockerfile']}")
        print(f"Total lines: {results['total_lines']}")
        print(f"\nSummary:")
        print(f"  Errors: {results['summary']['errors']}")
        print(f"  Warnings: {results['summary']['warnings']}")
        print(f"  Suggestions: {results['summary']['suggestions']}")

        if results['issues']:
            print(f"\n{'='*60}")
            print("ISSUES:")
            print('='*60)
            for issue in results['issues']:
                severity = issue.get('severity', 'info').upper()
                line_info = f"Line {issue['line']}" if issue['line'] > 0 else "General"
                print(f"\n[{severity}] {line_info} - {issue['category']}")
                print(f"  {issue['message']}")
                print(f"  → {issue['suggestion']}")

        if results['suggestions']:
            print(f"\n{'='*60}")
            print("SUGGESTIONS:")
            print('='*60)
            for sugg in results['suggestions']:
                line_info = f"Line {sugg['line']}" if sugg['line'] > 0 else "General"
                print(f"\n{line_info} - {sugg['category']}")
                print(f"  {sugg['message']}")
                print(f"  → {sugg['suggestion']}")

        print()


def main():
    """CLI entry point."""
    parser = argparse.ArgumentParser(
        description="Analyze Dockerfile for optimization opportunities",
        formatter_class=argparse.RawDescriptionHelpFormatter
    )

    parser.add_argument(
        "dockerfile",
        type=str,
        help="Path to Dockerfile"
    )

    parser.add_argument(
        "--json",
        action="store_true",
        help="Output results as JSON"
    )

    parser.add_argument(
        "--verbose",
        "-v",
        action="store_true",
        help="Enable verbose output"
    )

    args = parser.parse_args()

    try:
        analyzer = DockerfileAnalyzer(
            dockerfile_path=args.dockerfile,
            verbose=args.verbose
        )

        results = analyzer.analyze()

        if args.json:
            print(json.dumps(results, indent=2))
        else:
            analyzer.print_results(results)

        # Exit with error code if issues found
        if results['summary']['errors'] > 0:
            sys.exit(1)

    except FileNotFoundError as e:
        print(f"Error: {e}", file=sys.stderr)
        sys.exit(1)
    except Exception as e:
        print(f"Unexpected error: {e}", file=sys.stderr)
        sys.exit(1)


if __name__ == "__main__":
    main()