Files
gh-rafaelcalleja-claude-mar…/skills/devops/scripts/docker_optimize.py
2025-11-30 08:48:52 +08:00

321 lines
11 KiB
Python

#!/usr/bin/env python3
"""
Dockerfile Optimization Analyzer
Analyzes Dockerfiles for optimization opportunities including multi-stage builds,
security issues, size reduction, and best practices.
Usage:
python docker-optimize.py Dockerfile
python docker-optimize.py --json Dockerfile
python docker-optimize.py --verbose Dockerfile
"""
import argparse
import json
import re
import sys
from pathlib import Path
from typing import Dict, List, Optional, Tuple
class DockerfileAnalyzer:
"""Analyze Dockerfile for optimization opportunities."""
def __init__(self, dockerfile_path: Path, verbose: bool = False):
"""
Initialize analyzer.
Args:
dockerfile_path: Path to Dockerfile
verbose: Enable verbose output
"""
self.dockerfile_path = Path(dockerfile_path)
self.verbose = verbose
self.lines = []
self.issues = []
self.suggestions = []
def load_dockerfile(self) -> bool:
"""
Load and parse Dockerfile.
Returns:
True if loaded successfully
Raises:
FileNotFoundError: If Dockerfile doesn't exist
"""
if not self.dockerfile_path.exists():
raise FileNotFoundError(f"Dockerfile not found: {self.dockerfile_path}")
with open(self.dockerfile_path, 'r') as f:
self.lines = f.readlines()
return True
def analyze_base_image(self) -> None:
"""Check base image for optimization opportunities."""
for i, line in enumerate(self.lines, 1):
line = line.strip()
if line.startswith('FROM'):
# Check for 'latest' tag
if ':latest' in line or (': ' not in line and 'AS' not in line and '@' not in line):
self.issues.append({
'line': i,
'severity': 'warning',
'category': 'base_image',
'message': 'Base image uses :latest or no tag',
'suggestion': 'Use specific version tags for reproducibility'
})
# Check for non-alpine/slim variants
if 'node' in line.lower() and 'alpine' not in line.lower():
self.suggestions.append({
'line': i,
'category': 'size',
'message': 'Consider using Alpine variant',
'suggestion': 'node:20-alpine is ~10x smaller than node:20'
})
def analyze_multi_stage(self) -> None:
"""Check if multi-stage build is used."""
from_count = sum(1 for line in self.lines if line.strip().startswith('FROM'))
if from_count == 1:
# Check if build tools are installed
has_build_tools = any(
any(tool in line.lower() for tool in ['gcc', 'make', 'build-essential', 'npm install', 'pip install'])
for line in self.lines
)
if has_build_tools:
self.issues.append({
'line': 0,
'severity': 'warning',
'category': 'optimization',
'message': 'Single-stage build with build tools',
'suggestion': 'Use multi-stage build to exclude build dependencies from final image'
})
def analyze_layer_caching(self) -> None:
"""Check for optimal layer caching order."""
copy_lines = []
run_lines = []
for i, line in enumerate(self.lines, 1):
stripped = line.strip()
if stripped.startswith('COPY'):
copy_lines.append((i, stripped))
elif stripped.startswith('RUN'):
run_lines.append((i, stripped))
# Check if dependency files copied before source
has_package_copy = any('package.json' in line or 'requirements.txt' in line or 'go.mod' in line
for _, line in copy_lines)
has_source_copy = any('COPY . .' in line or 'COPY ./' in line
for _, line in copy_lines)
if has_source_copy and not has_package_copy:
self.issues.append({
'line': 0,
'severity': 'warning',
'category': 'caching',
'message': 'Source copied before dependencies',
'suggestion': 'Copy dependency files first (package.json, requirements.txt) then run install, then copy source'
})
def analyze_security(self) -> None:
"""Check for security issues."""
has_user = any(line.strip().startswith('USER') and 'root' not in line.lower()
for line in self.lines)
if not has_user:
self.issues.append({
'line': 0,
'severity': 'error',
'category': 'security',
'message': 'Container runs as root',
'suggestion': 'Create and use non-root user with USER instruction'
})
# Check for secrets in build
for i, line in enumerate(self.lines, 1):
if any(secret in line.upper() for secret in ['PASSWORD', 'SECRET', 'TOKEN', 'API_KEY']):
if 'ENV' in line or 'ARG' in line:
self.issues.append({
'line': i,
'severity': 'error',
'category': 'security',
'message': 'Potential secret in Dockerfile',
'suggestion': 'Use build-time arguments or runtime environment variables'
})
def analyze_apt_cache(self) -> None:
"""Check for apt cache cleanup."""
for i, line in enumerate(self.lines, 1):
if 'apt-get install' in line.lower() or 'apt install' in line.lower():
# Check if same RUN command cleans cache
if 'rm -rf /var/lib/apt/lists/*' not in line:
self.suggestions.append({
'line': i,
'category': 'size',
'message': 'apt cache not cleaned in same layer',
'suggestion': 'Add && rm -rf /var/lib/apt/lists/* to reduce image size'
})
def analyze_combine_run(self) -> None:
"""Check for multiple consecutive RUN commands."""
consecutive_runs = 0
first_run_line = 0
for i, line in enumerate(self.lines, 1):
if line.strip().startswith('RUN'):
if consecutive_runs == 0:
first_run_line = i
consecutive_runs += 1
else:
if consecutive_runs > 1:
self.suggestions.append({
'line': first_run_line,
'category': 'layers',
'message': f'{consecutive_runs} consecutive RUN commands',
'suggestion': 'Combine related RUN commands with && to reduce layers'
})
consecutive_runs = 0
def analyze_workdir(self) -> None:
"""Check for WORKDIR usage."""
has_workdir = any(line.strip().startswith('WORKDIR') for line in self.lines)
if not has_workdir:
self.suggestions.append({
'line': 0,
'category': 'best_practice',
'message': 'No WORKDIR specified',
'suggestion': 'Use WORKDIR to set working directory instead of cd commands'
})
def analyze(self) -> Dict:
"""
Run all analyses.
Returns:
Analysis results dictionary
"""
self.load_dockerfile()
self.analyze_base_image()
self.analyze_multi_stage()
self.analyze_layer_caching()
self.analyze_security()
self.analyze_apt_cache()
self.analyze_combine_run()
self.analyze_workdir()
return {
'dockerfile': str(self.dockerfile_path),
'total_lines': len(self.lines),
'issues': self.issues,
'suggestions': self.suggestions,
'summary': {
'errors': len([i for i in self.issues if i.get('severity') == 'error']),
'warnings': len([i for i in self.issues if i.get('severity') == 'warning']),
'suggestions': len(self.suggestions)
}
}
def print_results(self, results: Dict) -> None:
"""
Print analysis results in human-readable format.
Args:
results: Analysis results from analyze()
"""
print(f"\nDockerfile Analysis: {results['dockerfile']}")
print(f"Total lines: {results['total_lines']}")
print(f"\nSummary:")
print(f" Errors: {results['summary']['errors']}")
print(f" Warnings: {results['summary']['warnings']}")
print(f" Suggestions: {results['summary']['suggestions']}")
if results['issues']:
print(f"\n{'='*60}")
print("ISSUES:")
print('='*60)
for issue in results['issues']:
severity = issue.get('severity', 'info').upper()
line_info = f"Line {issue['line']}" if issue['line'] > 0 else "General"
print(f"\n[{severity}] {line_info} - {issue['category']}")
print(f" {issue['message']}")
print(f"{issue['suggestion']}")
if results['suggestions']:
print(f"\n{'='*60}")
print("SUGGESTIONS:")
print('='*60)
for sugg in results['suggestions']:
line_info = f"Line {sugg['line']}" if sugg['line'] > 0 else "General"
print(f"\n{line_info} - {sugg['category']}")
print(f" {sugg['message']}")
print(f"{sugg['suggestion']}")
print()
def main():
"""CLI entry point."""
parser = argparse.ArgumentParser(
description="Analyze Dockerfile for optimization opportunities",
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"dockerfile",
type=str,
help="Path to Dockerfile"
)
parser.add_argument(
"--json",
action="store_true",
help="Output results as JSON"
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Enable verbose output"
)
args = parser.parse_args()
try:
analyzer = DockerfileAnalyzer(
dockerfile_path=args.dockerfile,
verbose=args.verbose
)
results = analyzer.analyze()
if args.json:
print(json.dumps(results, indent=2))
else:
analyzer.print_results(results)
# Exit with error code if issues found
if results['summary']['errors'] > 0:
sys.exit(1)
except FileNotFoundError as e:
print(f"Error: {e}", file=sys.stderr)
sys.exit(1)
except Exception as e:
print(f"Unexpected error: {e}", file=sys.stderr)
sys.exit(1)
if __name__ == "__main__":
main()