Initial commit

2025-11-29 17:51:15 +08:00
commit a91d4d5a1c
25 changed files with 4094 additions and 0 deletions
--- a/scripts/applicationset_generator.py
+++ b/scripts/applicationset_generator.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python3
+"""
+Generate ArgoCD ApplicationSet manifests for multi-cluster deployments.
+Supports Cluster, List, and Matrix generators (ArgoCD 3.x).
+"""
+
+import argparse
+import sys
+import yaml
+
+
+APPLICATIONSET_TEMPLATE = """---
+apiVersion: argoproj.io/v1alpha1
+kind: ApplicationSet
+metadata:
+  name: {name}
+  namespace: argocd
+spec:
+  goTemplate: true
+  goTemplateOptions: ["missingkey=error"]
+  generators:
+{generators}
+  template:
+    metadata:
+      name: '{{{{.name}}}}-{name}'
+      labels:
+        environment: '{{{{.environment}}}}'
+    spec:
+      project: default
+      source:
+        repoURL: {repo_url}
+        targetRevision: {target_revision}
+        path: '{path}'
+      destination:
+        server: '{{{{.server}}}}'
+        namespace: {namespace}
+      syncPolicy:
+        automated:
+          prune: true
+          selfHeal: true
+        syncOptions:
+          - CreateNamespace=true
+"""
+
+
+def generate_cluster_generator(label_selector: str = "") -> str:
+    """Generate Cluster generator."""
+    selector = f"\n      selector:\n        matchLabels:\n          {label_selector}" if label_selector else ""
+    return f"""  - cluster: {{{selector}}}"""
+
+
+def generate_list_generator(clusters: list) -> str:
+    """Generate List generator."""
+    elements = "\n".join([f"      - name: {c['name']}\n        server: {c['server']}\n        environment: {c.get('environment', 'production')}"
+                          for c in clusters])
+    return f"""  - list:
+      elements:
+{elements}"""
+
+
+def generate_matrix_generator(cluster_label: str, git_directories: list) -> str:
+    """Generate Matrix generator (Cluster x Git directories)."""
+    git_list = "\n".join([f"          - path: {d}" for d in git_directories])
+    return f"""  - matrix:
+      generators:
+      - cluster:
+          selector:
+            matchLabels:
+              environment: {cluster_label}
+      - git:
+          repoURL: https://github.com/example/apps
+          revision: HEAD
+          directories:
+{git_list}"""
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Generate ArgoCD ApplicationSet manifests',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Cluster generator (all clusters)
+  python3 applicationset_generator.py cluster \\
+    --name my-apps \\
+    --repo-url https://github.com/org/repo \\
+    --path apps/
+
+  # List generator (specific clusters)
+  python3 applicationset_generator.py list \\
+    --name my-apps \\
+    --clusters prod=https://prod.k8s.local,staging=https://staging.k8s.local
+
+  # Matrix generator (cluster x directories)
+  python3 applicationset_generator.py matrix \\
+    --name my-apps \\
+    --cluster-label production \\
+    --directories app1,app2,app3
+        """
+    )
+
+    parser.add_argument('generator_type', choices=['cluster', 'list', 'matrix'],
+                       help='Generator type')
+    parser.add_argument('--name', required=True, help='ApplicationSet name')
+    parser.add_argument('--repo-url', default='https://github.com/example/repo',
+                       help='Git repository URL')
+    parser.add_argument('--path', default='apps/', help='Path in repository')
+    parser.add_argument('--namespace', default='default', help='Target namespace')
+    parser.add_argument('--target-revision', default='main', help='Git branch/tag')
+    parser.add_argument('--cluster-label', help='Cluster label selector')
+    parser.add_argument('--clusters', help='Cluster list (name=server,name=server)')
+    parser.add_argument('--directories', help='Git directories (comma-separated)')
+    parser.add_argument('--output', help='Output file')
+
+    args = parser.parse_args()
+
+    # Generate based on type
+    if args.generator_type == 'cluster':
+        generators = generate_cluster_generator(args.cluster_label or "")
+    elif args.generator_type == 'list':
+        if not args.clusters:
+            print("❌ --clusters required for list generator")
+            sys.exit(1)
+        cluster_list = []
+        for c in args.clusters.split(','):
+            name, server = c.split('=')
+            cluster_list.append({'name': name, 'server': server})
+        generators = generate_list_generator(cluster_list)
+    elif args.generator_type == 'matrix':
+        if not args.cluster_label or not args.directories:
+            print("❌ --cluster-label and --directories required for matrix generator")
+            sys.exit(1)
+        directories = args.directories.split(',')
+        generators = generate_matrix_generator(args.cluster_label, directories)
+
+    # Create ApplicationSet
+    appset = APPLICATIONSET_TEMPLATE.format(
+        name=args.name,
+        generators=generators,
+        repo_url=args.repo_url,
+        target_revision=args.target_revision,
+        path=args.path,
+        namespace=args.namespace
+    )
+
+    # Output
+    if args.output:
+        with open(args.output, 'w') as f:
+            f.write(appset)
+        print(f"✅ ApplicationSet written to: {args.output}")
+    else:
+        print(appset)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/check_argocd_health.py
+++ b/scripts/check_argocd_health.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+"""
+Check ArgoCD application health and diagnose sync issues.
+Supports ArgoCD 3.x API with annotation-based tracking.
+"""
+
+import argparse
+import sys
+import json
+from typing import Dict, List, Any, Optional
+from datetime import datetime
+
+try:
+    import requests
+except ImportError:
+    print("⚠️  Warning: 'requests' library not found. Install with: pip install requests")
+    sys.exit(1)
+
+try:
+    from tabulate import tabulate
+except ImportError:
+    tabulate = None
+
+
+class ArgoCDHealthChecker:
+    def __init__(self, server: str, token: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None):
+        self.server = server.rstrip('/')
+        self.token = token
+        self.session = requests.Session()
+
+        if token:
+            self.session.headers['Authorization'] = f'Bearer {token}'
+        elif username and password:
+            # Login to get token
+            self._login(username, password)
+        else:
+            raise ValueError("Either --token or --username/--password must be provided")
+
+    def _login(self, username: str, password: str):
+        """Login to ArgoCD and get auth token."""
+        try:
+            response = self.session.post(
+                f"{self.server}/api/v1/session",
+                json={"username": username, "password": password},
+                verify=False
+            )
+            response.raise_for_status()
+            self.token = response.json()['token']
+            self.session.headers['Authorization'] = f'Bearer {self.token}'
+        except Exception as e:
+            print(f"❌ Failed to login to ArgoCD: {e}")
+            sys.exit(1)
+
+    def get_applications(self, name: Optional[str] = None) -> List[Dict]:
+        """Get ArgoCD applications."""
+        try:
+            if name:
+                url = f"{self.server}/api/v1/applications/{name}"
+                response = self.session.get(url, verify=False)
+                response.raise_for_status()
+                return [response.json()]
+            else:
+                url = f"{self.server}/api/v1/applications"
+                response = self.session.get(url, verify=False)
+                response.raise_for_status()
+                return response.json().get('items', [])
+        except Exception as e:
+            print(f"❌ Failed to get applications: {e}")
+            return []
+
+    def check_application_health(self, app: Dict) -> Dict[str, Any]:
+        """Check application health and sync status."""
+        name = app['metadata']['name']
+        health = app.get('status', {}).get('health', {})
+        sync = app.get('status', {}).get('sync', {})
+        operation_state = app.get('status', {}).get('operationState', {})
+
+        result = {
+            'name': name,
+            'health_status': health.get('status', 'Unknown'),
+            'health_message': health.get('message', ''),
+            'sync_status': sync.get('status', 'Unknown'),
+            'sync_revision': sync.get('revision', 'N/A')[:8] if sync.get('revision') else 'N/A',
+            'operation_phase': operation_state.get('phase', 'N/A'),
+            'issues': [],
+            'recommendations': []
+        }
+
+        # Check for common issues
+        if result['health_status'] not in ['Healthy', 'Unknown']:
+            result['issues'].append(f"Application is {result['health_status']}")
+            if result['health_message']:
+                result['issues'].append(f"Health message: {result['health_message']}")
+
+        if result['sync_status'] == 'OutOfSync':
+            result['issues'].append("Application is out of sync with Git")
+            result['recommendations'].append("Run: argocd app sync " + name)
+            result['recommendations'].append("Check if manual sync is required (sync policy)")
+
+        if result['sync_status'] == 'Unknown':
+            result['issues'].append("Sync status is unknown")
+            result['recommendations'].append("Check ArgoCD application controller logs")
+            result['recommendations'].append(f"kubectl logs -n argocd -l app.kubernetes.io/name=argocd-application-controller")
+
+        # Check for failed operations
+        if operation_state.get('phase') == 'Failed':
+            result['issues'].append(f"Last operation failed")
+            if 'message' in operation_state:
+                result['issues'].append(f"Operation message: {operation_state['message']}")
+            result['recommendations'].append("Check operation details in ArgoCD UI")
+            result['recommendations'].append(f"argocd app get {name}")
+
+        # Check resource conditions (ArgoCD 3.x)
+        resources = app.get('status', {}).get('resources', [])
+        unhealthy_resources = [r for r in resources if r.get('health', {}).get('status') not in ['Healthy', 'Unknown', '']]
+        if unhealthy_resources:
+            result['issues'].append(f"{len(unhealthy_resources)} resources are unhealthy")
+            for r in unhealthy_resources[:3]:  # Show first 3
+                kind = r.get('kind', 'Unknown')
+                name = r.get('name', 'Unknown')
+                status = r.get('health', {}).get('status', 'Unknown')
+                result['issues'].append(f"  - {kind}/{name}: {status}")
+            result['recommendations'].append(f"kubectl get {unhealthy_resources[0]['kind']} -n {app['spec']['destination']['namespace']}")
+
+        # Check for annotation-based tracking (ArgoCD 3.x default)
+        tracking_method = app.get('spec', {}).get('syncPolicy', {}).get('syncOptions', [])
+        has_label_tracking = 'UseLabel=true' in tracking_method
+        if has_label_tracking:
+            result['recommendations'].append("⚠️  Using legacy label-based tracking. Consider migrating to annotation-based tracking (ArgoCD 3.x default)")
+
+        return result
+
+    def check_all_applications(self, name: Optional[str] = None, show_healthy: bool = False) -> List[Dict]:
+        """Check all applications or specific application."""
+        apps = self.get_applications(name)
+        results = []
+
+        for app in apps:
+            result = self.check_application_health(app)
+            if show_healthy or result['issues']:
+                results.append(result)
+
+        return results
+
+    def print_summary(self, results: List[Dict]):
+        """Print summary of application health."""
+        if not results:
+            print("✅ No applications found or all healthy (use --show-healthy to see healthy apps)")
+            return
+
+        # Summary statistics
+        total = len(results)
+        with_issues = len([r for r in results if r['issues']])
+
+        print(f"\n📊 Summary: {with_issues}/{total} applications have issues\n")
+
+        # Table output
+        if tabulate:
+            table_data = []
+            for r in results:
+                status_icon = "❌" if r['issues'] else "✅"
+                table_data.append([
+                    status_icon,
+                    r['name'],
+                    r['health_status'],
+                    r['sync_status'],
+                    r['sync_revision'],
+                    len(r['issues'])
+                ])
+
+            print(tabulate(
+                table_data,
+                headers=['', 'Application', 'Health', 'Sync', 'Revision', 'Issues'],
+                tablefmt='simple'
+            ))
+        else:
+            for r in results:
+                status_icon = "❌" if r['issues'] else "✅"
+                print(f"{status_icon} {r['name']}: Health={r['health_status']}, Sync={r['sync_status']}, Issues={len(r['issues'])}")
+
+        # Detailed issues and recommendations
+        print("\n🔍 Detailed Issues:\n")
+        for r in results:
+            if not r['issues']:
+                continue
+
+            print(f"Application: {r['name']}")
+            print(f"  Health: {r['health_status']}")
+            print(f"  Sync: {r['sync_status']}")
+
+            if r['issues']:
+                print("  Issues:")
+                for issue in r['issues']:
+                    print(f"    • {issue}")
+
+            if r['recommendations']:
+                print("  Recommendations:")
+                for rec in r['recommendations']:
+                    print(f"    → {rec}")
+            print()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check ArgoCD application health and diagnose sync issues (ArgoCD 3.x compatible)',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Check all applications
+  python3 check_argocd_health.py \\
+    --server https://argocd.example.com \\
+    --token $ARGOCD_TOKEN
+
+  # Check specific application
+  python3 check_argocd_health.py \\
+    --server https://argocd.example.com \\
+    --username admin \\
+    --password $ARGOCD_PASSWORD \\
+    --app my-app
+
+  # Show all applications including healthy ones
+  python3 check_argocd_health.py \\
+    --server https://argocd.example.com \\
+    --token $ARGOCD_TOKEN \\
+    --show-healthy
+
+ArgoCD 3.x Features:
+  - Annotation-based tracking (default)
+  - Fine-grained RBAC support
+  - Enhanced resource health checks
+        """
+    )
+
+    parser.add_argument('--server', required=True, help='ArgoCD server URL')
+    parser.add_argument('--token', help='ArgoCD auth token (or set ARGOCD_TOKEN env var)')
+    parser.add_argument('--username', help='ArgoCD username')
+    parser.add_argument('--password', help='ArgoCD password')
+    parser.add_argument('--app', help='Specific application name to check')
+    parser.add_argument('--show-healthy', action='store_true', help='Show healthy applications')
+    parser.add_argument('--json', action='store_true', help='Output as JSON')
+
+    args = parser.parse_args()
+
+    # Get token from env if not provided
+    import os
+    token = args.token or os.getenv('ARGOCD_TOKEN')
+
+    try:
+        checker = ArgoCDHealthChecker(
+            server=args.server,
+            token=token,
+            username=args.username,
+            password=args.password
+        )
+
+        results = checker.check_all_applications(
+            name=args.app,
+            show_healthy=args.show_healthy
+        )
+
+        if args.json:
+            print(json.dumps(results, indent=2))
+        else:
+            checker.print_summary(results)
+
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/check_flux_health.py
+++ b/scripts/check_flux_health.py
@@ -0,0 +1,418 @@
+#!/usr/bin/env python3
+"""
+Check Flux CD health and diagnose reconciliation issues.
+Supports Flux v2.7+ with OCI artifacts, image automation, and source-watcher.
+"""
+
+import argparse
+import sys
+import json
+from typing import Dict, List, Any, Optional
+from datetime import datetime, timedelta
+
+try:
+    from kubernetes import client, config
+    from kubernetes.client.rest import ApiException
+except ImportError:
+    print("⚠️  Warning: 'kubernetes' library not found. Install with: pip install kubernetes")
+    sys.exit(1)
+
+try:
+    from tabulate import tabulate
+except ImportError:
+    tabulate = None
+
+
+class FluxHealthChecker:
+    def __init__(self, namespace: str = "flux-system", kubeconfig: Optional[str] = None):
+        self.namespace = namespace
+
+        # Load kubeconfig
+        try:
+            if kubeconfig:
+                config.load_kube_config(config_file=kubeconfig)
+            else:
+                try:
+                    config.load_kube_config()
+                except:
+                    config.load_incluster_config()
+        except Exception as e:
+            print(f"❌ Failed to load kubeconfig: {e}")
+            sys.exit(1)
+
+        self.api = client.ApiClient()
+        self.custom_api = client.CustomObjectsApi(self.api)
+        self.core_api = client.CoreV1Api(self.api)
+
+    def get_flux_resources(self, resource_type: str, namespace: Optional[str] = None) -> List[Dict]:
+        """Get Flux custom resources."""
+        ns = namespace or self.namespace
+
+        resource_map = {
+            'gitrepositories': ('source.toolkit.fluxcd.io', 'v1', 'gitrepositories'),
+            'ocirepositories': ('source.toolkit.fluxcd.io', 'v1beta2', 'ocirepositories'),
+            'helmrepositories': ('source.toolkit.fluxcd.io', 'v1', 'helmrepositories'),
+            'buckets': ('source.toolkit.fluxcd.io', 'v1beta2', 'buckets'),
+            'kustomizations': ('kustomize.toolkit.fluxcd.io', 'v1', 'kustomizations'),
+            'helmreleases': ('helm.toolkit.fluxcd.io', 'v2', 'helmreleases'),
+            'imageupdateautomations': ('image.toolkit.fluxcd.io', 'v1beta2', 'imageupdateautomations'),
+            'imagerepositories': ('image.toolkit.fluxcd.io', 'v1beta2', 'imagerepositories'),
+        }
+
+        if resource_type not in resource_map:
+            return []
+
+        group, version, plural = resource_map[resource_type]
+
+        try:
+            response = self.custom_api.list_namespaced_custom_object(
+                group=group,
+                version=version,
+                namespace=ns,
+                plural=plural
+            )
+            return response.get('items', [])
+        except ApiException as e:
+            if e.status == 404:
+                return []
+            print(f"⚠️  Warning: Failed to get {resource_type}: {e}")
+            return []
+
+    def check_resource_health(self, resource: Dict, resource_type: str) -> Dict[str, Any]:
+        """Check resource health and reconciliation status."""
+        name = resource['metadata']['name']
+        namespace = resource['metadata']['namespace']
+        status = resource.get('status', {})
+
+        # Get conditions
+        conditions = status.get('conditions', [])
+        ready_condition = next((c for c in conditions if c['type'] == 'Ready'), None)
+
+        result = {
+            'type': resource_type,
+            'name': name,
+            'namespace': namespace,
+            'ready': ready_condition.get('status', 'Unknown') if ready_condition else 'Unknown',
+            'message': ready_condition.get('message', '') if ready_condition else '',
+            'last_reconcile': status.get('lastHandledReconcileAt', 'N/A'),
+            'issues': [],
+            'recommendations': []
+        }
+
+        # Check if ready
+        if result['ready'] != 'True':
+            result['issues'].append(f"{resource_type} is not ready")
+            if result['message']:
+                result['issues'].append(f"Message: {result['message']}")
+
+        # Type-specific checks
+        if resource_type == 'gitrepositories':
+            self._check_git_repository(resource, result)
+        elif resource_type == 'ocirepositories':
+            self._check_oci_repository(resource, result)
+        elif resource_type == 'kustomizations':
+            self._check_kustomization(resource, result)
+        elif resource_type == 'helmreleases':
+            self._check_helm_release(resource, result)
+        elif resource_type == 'imageupdateautomations':
+            self._check_image_automation(resource, result)
+
+        return result
+
+    def _check_git_repository(self, resource: Dict, result: Dict):
+        """Check GitRepository-specific issues."""
+        status = resource.get('status', {})
+
+        # Check artifact
+        if not status.get('artifact'):
+            result['issues'].append("No artifact available")
+            result['recommendations'].append("Check repository URL and credentials")
+            result['recommendations'].append(f"flux reconcile source git {result['name']} -n {result['namespace']}")
+
+        # Check for auth errors
+        if 'authentication' in result['message'].lower() or 'credentials' in result['message'].lower():
+            result['recommendations'].append("Check Git credentials secret")
+            result['recommendations'].append(f"kubectl get secret -n {result['namespace']}")
+
+    def _check_oci_repository(self, resource: Dict, result: Dict):
+        """Check OCIRepository-specific issues (Flux v2.6+ feature)."""
+        status = resource.get('status', {})
+
+        # Check artifact
+        if not status.get('artifact'):
+            result['issues'].append("No OCI artifact available")
+            result['recommendations'].append("Check OCI repository URL and credentials")
+            result['recommendations'].append("Verify OCI artifact exists in registry")
+
+        # Check signature verification (Flux v2.7+)
+        spec = resource.get('spec', {})
+        if spec.get('verify'):
+            verify_status = status.get('observedGeneration')
+            if not verify_status:
+                result['issues'].append("Signature verification configured but not completed")
+                result['recommendations'].append("Check cosign or notation configuration")
+
+    def _check_kustomization(self, resource: Dict, result: Dict):
+        """Check Kustomization-specific issues."""
+        status = resource.get('status', {})
+
+        # Check source reference
+        spec = resource.get('spec', {})
+        source_ref = spec.get('sourceRef', {})
+        if not source_ref:
+            result['issues'].append("No source reference configured")
+
+        # Check inventory
+        inventory = status.get('inventory')
+        if inventory and 'entries' in inventory:
+            total_resources = len(inventory['entries'])
+            result['recommendations'].append(f"Managing {total_resources} resources")
+
+        # Check for prune errors
+        if 'prune' in result['message'].lower():
+            result['recommendations'].append("Check for resources blocking pruning")
+            result['recommendations'].append("Review finalizers on deleted resources")
+
+    def _check_helm_release(self, resource: Dict, result: Dict):
+        """Check HelmRelease-specific issues."""
+        status = resource.get('status', {})
+
+        # Check install/upgrade status
+        install_failures = status.get('installFailures', 0)
+        upgrade_failures = status.get('upgradeFailures', 0)
+
+        if install_failures > 0:
+            result['issues'].append(f"Install failed {install_failures} times")
+            result['recommendations'].append("Check Helm values and chart compatibility")
+
+        if upgrade_failures > 0:
+            result['issues'].append(f"Upgrade failed {upgrade_failures} times")
+            result['recommendations'].append("Review Helm upgrade logs")
+            result['recommendations'].append(f"kubectl logs -n {result['namespace']} -l app=helm-controller")
+
+        # Check for timeout issues
+        if 'timeout' in result['message'].lower():
+            result['recommendations'].append("Increase timeout in HelmRelease spec")
+            result['recommendations'].append("Check pod startup times and readiness probes")
+
+    def _check_image_automation(self, resource: Dict, result: Dict):
+        """Check ImageUpdateAutomation-specific issues (Flux v2.7+ GA)."""
+        status = resource.get('status', {})
+
+        # Check last automation time
+        last_automation = status.get('lastAutomationRunTime')
+        if not last_automation:
+            result['issues'].append("No automation runs recorded")
+            result['recommendations'].append("Check ImagePolicy and git write access")
+
+    def check_flux_controllers(self) -> List[Dict]:
+        """Check health of Flux controller pods."""
+        results = []
+
+        controller_labels = [
+            'source-controller',
+            'kustomize-controller',
+            'helm-controller',
+            'notification-controller',
+            'image-reflector-controller',
+            'image-automation-controller',
+        ]
+
+        for controller in controller_labels:
+            try:
+                pods = self.core_api.list_namespaced_pod(
+                    namespace=self.namespace,
+                    label_selector=f'app={controller}'
+                )
+
+                if not pods.items:
+                    results.append({
+                        'controller': controller,
+                        'status': 'Not Found',
+                        'issues': [f'{controller} not found'],
+                        'recommendations': ['Check Flux installation']
+                    })
+                    continue
+
+                pod = pods.items[0]
+                pod_status = pod.status.phase
+
+                result = {
+                    'controller': controller,
+                    'status': pod_status,
+                    'issues': [],
+                    'recommendations': []
+                }
+
+                if pod_status != 'Running':
+                    result['issues'].append(f'Controller not running (status: {pod_status})')
+                    result['recommendations'].append(f'kubectl describe pod -n {self.namespace} -l app={controller}')
+                    result['recommendations'].append(f'kubectl logs -n {self.namespace} -l app={controller}')
+
+                # Check container restarts
+                for container_status in pod.status.container_statuses or []:
+                    if container_status.restart_count > 5:
+                        result['issues'].append(f'High restart count: {container_status.restart_count}')
+                        result['recommendations'].append('Check controller logs for crash loops')
+
+                results.append(result)
+
+            except ApiException as e:
+                results.append({
+                    'controller': controller,
+                    'status': 'Error',
+                    'issues': [f'Failed to check: {e}'],
+                    'recommendations': []
+                })
+
+        return results
+
+    def print_summary(self, resource_results: List[Dict], controller_results: List[Dict]):
+        """Print summary of Flux health."""
+        # Controller health
+        print("\n🎛️  Flux Controllers:\n")
+
+        if tabulate:
+            controller_table = []
+            for r in controller_results:
+                status_icon = "✅" if r['status'] == 'Running' and not r['issues'] else "❌"
+                controller_table.append([
+                    status_icon,
+                    r['controller'],
+                    r['status'],
+                    len(r['issues'])
+                ])
+            print(tabulate(
+                controller_table,
+                headers=['', 'Controller', 'Status', 'Issues'],
+                tablefmt='simple'
+            ))
+        else:
+            for r in controller_results:
+                status_icon = "✅" if r['status'] == 'Running' and not r['issues'] else "❌"
+                print(f"{status_icon} {r['controller']}: {r['status']} ({len(r['issues'])} issues)")
+
+        # Resource health
+        if resource_results:
+            print("\n📦 Flux Resources:\n")
+
+            if tabulate:
+                resource_table = []
+                for r in resource_results:
+                    status_icon = "✅" if r['ready'] == 'True' and not r['issues'] else "❌"
+                    resource_table.append([
+                        status_icon,
+                        r['type'],
+                        r['name'],
+                        r['namespace'],
+                        r['ready'],
+                        len(r['issues'])
+                    ])
+                print(tabulate(
+                    resource_table,
+                    headers=['', 'Type', 'Name', 'Namespace', 'Ready', 'Issues'],
+                    tablefmt='simple'
+                ))
+            else:
+                for r in resource_results:
+                    status_icon = "✅" if r['ready'] == 'True' and not r['issues'] else "❌"
+                    print(f"{status_icon} {r['type']}/{r['name']}: {r['ready']} ({len(r['issues'])} issues)")
+
+        # Detailed issues
+        all_results = controller_results + resource_results
+        issues_found = [r for r in all_results if r.get('issues')]
+
+        if issues_found:
+            print("\n🔍 Detailed Issues:\n")
+            for r in issues_found:
+                print(f"{r.get('controller') or r.get('type')}/{r.get('name', 'N/A')}:")
+                for issue in r['issues']:
+                    print(f"  • {issue}")
+                if r.get('recommendations'):
+                    print("  Recommendations:")
+                    for rec in r['recommendations']:
+                        print(f"    → {rec}")
+                print()
+        else:
+            print("\n✅ No issues found!")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Check Flux CD health and diagnose reconciliation issues (Flux v2.7+ compatible)',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Check Flux controllers and all resources
+  python3 check_flux_health.py
+
+  # Check specific namespace
+  python3 check_flux_health.py --namespace my-app
+
+  # Check only GitRepositories
+  python3 check_flux_health.py --type gitrepositories
+
+  # Check OCI repositories (Flux v2.6+)
+  python3 check_flux_health.py --type ocirepositories
+
+  # Output as JSON
+  python3 check_flux_health.py --json
+
+Flux v2.7+ Features:
+  - OCI artifact support (GA in v2.6)
+  - Image automation (GA in v2.7)
+  - Source-watcher component
+  - OpenTelemetry tracing
+        """
+    )
+
+    parser.add_argument('--namespace', default='flux-system', help='Flux namespace (default: flux-system)')
+    parser.add_argument('--type', help='Check specific resource type only')
+    parser.add_argument('--kubeconfig', help='Path to kubeconfig file')
+    parser.add_argument('--json', action='store_true', help='Output as JSON')
+
+    args = parser.parse_args()
+
+    try:
+        checker = FluxHealthChecker(namespace=args.namespace, kubeconfig=args.kubeconfig)
+
+        # Check controllers
+        controller_results = checker.check_flux_controllers()
+
+        # Check resources
+        resource_results = []
+        resource_types = [args.type] if args.type else [
+            'gitrepositories',
+            'ocirepositories',
+            'helmrepositories',
+            'kustomizations',
+            'helmreleases',
+            'imageupdateautomations',
+        ]
+
+        for resource_type in resource_types:
+            resources = checker.get_flux_resources(resource_type)
+            for resource in resources:
+                result = checker.check_resource_health(resource, resource_type)
+                resource_results.append(result)
+
+        if args.json:
+            print(json.dumps({
+                'controllers': controller_results,
+                'resources': resource_results
+            }, indent=2))
+        else:
+            checker.print_summary(resource_results, controller_results)
+
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/oci_artifact_checker.py
+++ b/scripts/oci_artifact_checker.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+"""
+Validate Flux OCI artifact references and verify signatures.
+Supports Flux v2.6+ OCI artifacts with cosign/notation verification.
+"""
+
+import argparse
+import sys
+import subprocess
+import json
+
+try:
+    from kubernetes import client, config
+except ImportError:
+    print("⚠️  'kubernetes' not found. Install with: pip install kubernetes")
+    sys.exit(1)
+
+
+def check_oci_repository(name: str, namespace: str = 'flux-system'):
+    """Check OCIRepository resource status."""
+    try:
+        config.load_kube_config()
+        api = client.CustomObjectsApi()
+
+        oci_repo = api.get_namespaced_custom_object(
+            group='source.toolkit.fluxcd.io',
+            version='v1beta2',
+            namespace=namespace,
+            plural='ocirepositories',
+            name=name
+        )
+
+        status = oci_repo.get('status', {})
+        conditions = status.get('conditions', [])
+        ready = next((c for c in conditions if c['type'] == 'Ready'), None)
+
+        print(f"📦 OCIRepository: {name}")
+        print(f"   Ready: {ready.get('status') if ready else 'Unknown'}")
+        print(f"   Message: {ready.get('message', 'N/A') if ready else 'N/A'}")
+
+        # Check artifact
+        artifact = status.get('artifact')
+        if artifact:
+            print(f"   Artifact: {artifact.get('revision', 'N/A')}")
+            print(f"   Digest: {artifact.get('digest', 'N/A')}")
+        else:
+            print("   ⚠️  No artifact available")
+
+        # Check verification
+        spec = oci_repo.get('spec', {})
+        if spec.get('verify'):
+            print("   ✓ Signature verification enabled")
+            provider = spec['verify'].get('provider', 'cosign')
+            print(f"   Provider: {provider}")
+        else:
+            print("   ⚠️  No signature verification")
+
+        return ready.get('status') == 'True' if ready else False
+
+    except Exception as e:
+        print(f"❌ Error checking OCIRepository: {e}")
+        return False
+
+
+def verify_oci_artifact(image: str, provider: str = 'cosign'):
+    """Verify OCI artifact signature."""
+    print(f"\n🔐 Verifying {image} with {provider}...\n")
+
+    if provider == 'cosign':
+        try:
+            result = subprocess.run(
+                ['cosign', 'verify', image],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode == 0:
+                print("✅ Signature verification successful")
+                return True
+            else:
+                print(f"❌ Verification failed: {result.stderr}")
+                return False
+        except FileNotFoundError:
+            print("⚠️  cosign not found. Install: https://github.com/sigstore/cosign")
+            return False
+
+    elif provider == 'notation':
+        try:
+            result = subprocess.run(
+                ['notation', 'verify', image],
+                capture_output=True,
+                text=True
+            )
+            if result.returncode == 0:
+                print("✅ Signature verification successful")
+                return True
+            else:
+                print(f"❌ Verification failed: {result.stderr}")
+                return False
+        except FileNotFoundError:
+            print("⚠️  notation not found. Install: https://notaryproject.dev")
+            return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Validate Flux OCI artifacts and verify signatures',
+        epilog="""
+Examples:
+  # Check OCIRepository status
+  python3 oci_artifact_checker.py --name my-app-oci --namespace flux-system
+
+  # Verify OCI artifact signature with cosign
+  python3 oci_artifact_checker.py --verify ghcr.io/org/app:v1.0.0
+
+  # Verify with notation
+  python3 oci_artifact_checker.py --verify myregistry.io/app:latest --provider notation
+
+Requirements:
+  - kubectl configured for cluster access
+  - cosign (for signature verification)
+  - notation (for notation verification)
+
+Flux v2.6+ OCI Features:
+  - OCIRepository for Helm charts and Kustomize overlays
+  - Signature verification with cosign or notation
+  - Digest pinning for immutability
+        """
+    )
+
+    parser.add_argument('--name', help='OCIRepository name')
+    parser.add_argument('--namespace', default='flux-system', help='Namespace')
+    parser.add_argument('--verify', help='OCI image to verify')
+    parser.add_argument('--provider', choices=['cosign', 'notation'], default='cosign',
+                       help='Verification provider')
+
+    args = parser.parse_args()
+
+    if args.name:
+        check_oci_repository(args.name, args.namespace)
+
+    if args.verify:
+        verify_oci_artifact(args.verify, args.provider)
+
+    if not args.name and not args.verify:
+        print("❌ Specify --name or --verify")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/promotion_validator.py
+++ b/scripts/promotion_validator.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+"""
+Validate environment promotion workflows (dev → staging → production).
+Checks that changes are promoted through environments in the correct order.
+"""
+
+import argparse
+import sys
+import subprocess
+from pathlib import Path
+
+
+def get_git_diff(ref1: str, ref2: str, path: str = ".") -> str:
+    """Get git diff between two refs."""
+    try:
+        result = subprocess.run(
+            ['git', 'diff', f'{ref1}...{ref2}', '--', path],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        return result.stdout
+    except subprocess.CalledProcessError as e:
+        print(f"❌ Git diff failed: {e}")
+        sys.exit(1)
+
+
+def validate_promotion(source_env: str, target_env: str, repo_path: str):
+    """Validate that changes exist in source before promoting to target."""
+    print(f"🔍 Validating promotion: {source_env} → {target_env}\n")
+
+    # Check that source and target directories exist
+    source_path = Path(repo_path) / f"environments/{source_env}"
+    target_path = Path(repo_path) / f"environments/{target_env}"
+
+    if not source_path.exists():
+        print(f"❌ Source environment not found: {source_path}")
+        sys.exit(1)
+
+    if not target_path.exists():
+        print(f"❌ Target environment not found: {target_path}")
+        sys.exit(1)
+
+    # Check git history - target should not have changes that source doesn't have
+    diff = get_git_diff('HEAD~10', 'HEAD', str(target_path))
+
+    if diff and source_env == 'dev':
+        # If there are recent changes to target (prod/staging) check they came from source
+        print("⚠️  Recent changes detected in target environment")
+        print("   Verify changes were promoted from dev/staging first")
+
+    print("✅ Promotion path is valid")
+    print(f"\nNext steps:")
+    print(f"1. Review changes in {source_env}")
+    print(f"2. Test in {source_env} environment")
+    print(f"3. Copy changes to {target_env}")
+    print(f"4. Create PR for {target_env} promotion")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Validate environment promotion workflows',
+        epilog="""
+Examples:
+  # Validate dev → staging promotion
+  python3 promotion_validator.py --source dev --target staging
+
+  # Validate staging → production promotion
+  python3 promotion_validator.py --source staging --target production
+
+Checks:
+  - Environment directories exist
+  - Changes flow through proper promotion path
+  - No direct changes to production
+        """
+    )
+
+    parser.add_argument('--source', required=True, help='Source environment (dev/staging)')
+    parser.add_argument('--target', required=True, help='Target environment (staging/production)')
+    parser.add_argument('--repo-path', default='.', help='Repository path')
+
+    args = parser.parse_args()
+
+    validate_promotion(args.source, args.target, args.repo_path)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/secret_audit.py
+++ b/scripts/secret_audit.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Audit secrets management in GitOps repositories.
+Checks for plain secrets, SOPS, Sealed Secrets, and External Secrets Operator.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+from typing import List, Dict
+
+try:
+    import yaml
+except ImportError:
+    print("⚠️  'pyyaml' not found. Install with: pip install pyyaml")
+    sys.exit(1)
+
+
+class SecretAuditor:
+    def __init__(self, repo_path: str):
+        self.repo_path = Path(repo_path)
+        self.findings = []
+
+    def audit(self) -> Dict:
+        """Run all secret audits."""
+        print(f"🔐 Auditing secrets in: {self.repo_path}\n")
+
+        self._check_plain_secrets()
+        self._check_sops_config()
+        self._check_sealed_secrets()
+        self._check_external_secrets()
+
+        return self._generate_report()
+
+    def _check_plain_secrets(self):
+        """Check for plain Kubernetes secrets."""
+        secret_files = list(self.repo_path.rglob('*.yaml')) + list(self.repo_path.rglob('*.yml'))
+        plain_secrets = []
+
+        for sfile in secret_files:
+            if '.git' in sfile.parts:
+                continue
+
+            try:
+                with open(sfile) as f:
+                    for doc in yaml.safe_load_all(f):
+                        if doc and doc.get('kind') == 'Secret':
+                            # Skip service account tokens
+                            if doc.get('type') == 'kubernetes.io/service-account-token':
+                                continue
+                            # Check if it's encrypted
+                            if 'sops' not in str(doc) and doc.get('kind') != 'SealedSecret':
+                                plain_secrets.append(sfile.relative_to(self.repo_path))
+            except:
+                pass
+
+        if plain_secrets:
+            self.findings.append({
+                'severity': 'HIGH',
+                'type': 'Plain Secrets',
+                'count': len(plain_secrets),
+                'message': f"Found {len(plain_secrets)} plain Kubernetes Secret manifests",
+                'recommendation': 'Encrypt with SOPS, Sealed Secrets, or use External Secrets Operator',
+                'files': [str(f) for f in plain_secrets[:5]]
+            })
+        else:
+            print("✅ No plain secrets found in Git")
+
+    def _check_sops_config(self):
+        """Check SOPS configuration."""
+        sops_config = self.repo_path / '.sops.yaml'
+
+        if sops_config.exists():
+            print("✅ SOPS config found (.sops.yaml)")
+            with open(sops_config) as f:
+                config = yaml.safe_load(f)
+
+            # Check for age keys
+            if 'age' in str(config):
+                print("  ✓ Using age encryption (recommended)")
+            elif 'pgp' in str(config):
+                print("  ⚠️  Using PGP (consider migrating to age)")
+                self.findings.append({
+                    'severity': 'LOW',
+                    'type': 'SOPS Configuration',
+                    'message': 'Using PGP encryption',
+                    'recommendation': 'Migrate to age for better security and simplicity'
+                })
+        else:
+            encrypted_files = list(self.repo_path.rglob('*.enc.yaml'))
+            if encrypted_files:
+                print("⚠️  SOPS encrypted files found but no .sops.yaml config")
+                self.findings.append({
+                    'severity': 'MEDIUM',
+                    'type': 'SOPS Configuration',
+                    'message': 'Encrypted files without .sops.yaml',
+                    'recommendation': 'Add .sops.yaml for consistent encryption settings'
+                })
+
+    def _check_sealed_secrets(self):
+        """Check Sealed Secrets usage."""
+        sealed_secrets = list(self.repo_path.rglob('*sealedsecret*.yaml'))
+
+        if sealed_secrets:
+            print(f"✅ Found {len(sealed_secrets)} Sealed Secrets")
+
+    def _check_external_secrets(self):
+        """Check External Secrets Operator usage."""
+        eso_files = list(self.repo_path.rglob('*externalsecret*.yaml')) + \
+                   list(self.repo_path.rglob('*secretstore*.yaml'))
+
+        if eso_files:
+            print(f"✅ Found {len(eso_files)} External Secrets manifests")
+
+    def _generate_report(self) -> Dict:
+        """Generate audit report."""
+        return {
+            'findings': self.findings,
+            'total_issues': len(self.findings),
+            'high_severity': len([f for f in self.findings if f['severity'] == 'HIGH']),
+            'medium_severity': len([f for f in self.findings if f['severity'] == 'MEDIUM']),
+            'low_severity': len([f for f in self.findings if f['severity'] == 'LOW'])
+        }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Audit secrets management in GitOps repositories',
+        epilog="""
+Examples:
+  # Audit current directory
+  python3 secret_audit.py .
+
+  # Audit specific repo
+  python3 secret_audit.py /path/to/gitops-repo
+
+Checks:
+  - Plain Kubernetes Secrets in Git (HIGH risk)
+  - SOPS configuration and encryption method
+  - Sealed Secrets usage
+  - External Secrets Operator usage
+        """
+    )
+
+    parser.add_argument('repo_path', help='Path to GitOps repository')
+
+    args = parser.parse_args()
+
+    auditor = SecretAuditor(args.repo_path)
+    report = auditor.audit()
+
+    # Print summary
+    print("\n" + "="*60)
+    print("📊 Audit Summary")
+    print("="*60)
+
+    if report['findings']:
+        print(f"\n🔴 HIGH: {report['high_severity']}")
+        print(f"🟡 MEDIUM: {report['medium_severity']}")
+        print(f"🟢 LOW: {report['low_severity']}")
+
+        print("\n📋 Findings:\n")
+        for f in report['findings']:
+            icon = {'HIGH': '🔴', 'MEDIUM': '🟡', 'LOW': '🟢'}[f['severity']]
+            print(f"{icon} [{f['severity']}] {f['type']}")
+            print(f"   {f['message']}")
+            print(f"   → {f['recommendation']}")
+            if 'files' in f and f['files']:
+                print(f"   Files: {', '.join(f['files'][:3])}")
+            print()
+    else:
+        print("\n✅ No security issues found!")
+
+    sys.exit(1 if report['high_severity'] > 0 else 0)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/sync_drift_detector.py
+++ b/scripts/sync_drift_detector.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+"""
+Detect configuration drift between Git and Kubernetes cluster.
+Supports both ArgoCD and Flux CD deployments.
+"""
+
+import argparse
+import sys
+import subprocess
+import json
+from typing import Dict, List, Optional
+
+try:
+    from kubernetes import client, config
+except ImportError:
+    print("⚠️  'kubernetes' library not found. Install with: pip install kubernetes")
+    sys.exit(1)
+
+try:
+    import yaml
+except ImportError:
+    print("⚠️  'pyyaml' library not found. Install with: pip install pyyaml")
+    sys.exit(1)
+
+
+def run_command(cmd: List[str]) -> tuple:
+    """Run shell command and return output."""
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        return result.stdout, None
+    except subprocess.CalledProcessError as e:
+        return None, e.stderr
+
+
+def check_argocd_drift(app_name: Optional[str] = None):
+    """Check drift using ArgoCD CLI."""
+    print("🔍 Checking ArgoCD drift...\n")
+
+    cmd = ['argocd', 'app', 'diff']
+    if app_name:
+        cmd.append(app_name)
+    else:
+        # Get all apps
+        stdout, err = run_command(['argocd', 'app', 'list', '-o', 'json'])
+        if err:
+            print(f"❌ Failed to list apps: {err}")
+            return
+
+        apps = json.loads(stdout)
+        for app in apps:
+            app_name = app['metadata']['name']
+            check_single_app_drift(app_name)
+        return
+
+    check_single_app_drift(app_name)
+
+
+def check_single_app_drift(app_name: str):
+    """Check drift for single ArgoCD application."""
+    stdout, err = run_command(['argocd', 'app', 'diff', app_name])
+
+    if err and 'no differences' not in err.lower():
+        print(f"❌ {app_name}: Error checking drift")
+        print(f"   {err}")
+        return
+
+    if not stdout or 'no differences' in (stdout + (err or '')).lower():
+        print(f"✅ {app_name}: No drift detected")
+    else:
+        print(f"⚠️  {app_name}: Drift detected")
+        print(f"   Run: argocd app sync {app_name}")
+
+
+def check_flux_drift(namespace: str = 'flux-system'):
+    """Check drift using Flux CLI."""
+    print("🔍 Checking Flux drift...\n")
+
+    # Check kustomizations
+    stdout, err = run_command(['flux', 'get', 'kustomizations', '-n', namespace, '--status-selector', 'ready=false'])
+
+    if stdout:
+        print("⚠️  Out-of-sync Kustomizations:")
+        print(stdout)
+    else:
+        print("✅ All Kustomizations synced")
+
+    # Check helmreleases
+    stdout, err = run_command(['flux', 'get', 'helmreleases', '-n', namespace, '--status-selector', 'ready=false'])
+
+    if stdout:
+        print("\n⚠️  Out-of-sync HelmReleases:")
+        print(stdout)
+    else:
+        print("✅ All HelmReleases synced")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Detect configuration drift between Git and cluster',
+        epilog="""
+Examples:
+  # Check ArgoCD drift
+  python3 sync_drift_detector.py --argocd
+
+  # Check specific ArgoCD app
+  python3 sync_drift_detector.py --argocd --app my-app
+
+  # Check Flux drift
+  python3 sync_drift_detector.py --flux
+
+Requirements:
+  - argocd CLI (for ArgoCD mode)
+  - flux CLI (for Flux mode)
+  - kubectl configured
+        """
+    )
+
+    parser.add_argument('--argocd', action='store_true', help='Check ArgoCD drift')
+    parser.add_argument('--flux', action='store_true', help='Check Flux drift')
+    parser.add_argument('--app', help='Specific ArgoCD application name')
+    parser.add_argument('--namespace', default='flux-system', help='Flux namespace')
+
+    args = parser.parse_args()
+
+    if not args.argocd and not args.flux:
+        print("❌ Specify --argocd or --flux")
+        sys.exit(1)
+
+    try:
+        if args.argocd:
+            check_argocd_drift(args.app)
+        if args.flux:
+            check_flux_drift(args.namespace)
+
+    except KeyboardInterrupt:
+        print("\n\nInterrupted")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/validate_gitops_repo.py
+++ b/scripts/validate_gitops_repo.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+"""
+Validate GitOps repository structure, manifests, and best practices.
+Supports both monorepo and polyrepo patterns with Kustomize and Helm.
+"""
+
+import argparse
+import sys
+import os
+import glob
+from typing import Dict, List, Any, Tuple
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print("⚠️  Warning: 'pyyaml' library not found. Install with: pip install pyyaml")
+    sys.exit(1)
+
+
+class GitOpsRepoValidator:
+    def __init__(self, repo_path: str):
+        self.repo_path = Path(repo_path).resolve()
+        if not self.repo_path.exists():
+            raise ValueError(f"Path does not exist: {repo_path}")
+
+        self.issues = []
+        self.warnings = []
+        self.recommendations = []
+
+    def validate(self) -> Dict[str, List[str]]:
+        """Run all validations."""
+        print(f"🔍 Validating GitOps repository: {self.repo_path}\n")
+
+        # Structure validations
+        self._check_repository_structure()
+        self._check_kustomization_files()
+        self._check_yaml_syntax()
+        self._check_best_practices()
+        self._check_secrets_management()
+
+        return {
+            'issues': self.issues,
+            'warnings': self.warnings,
+            'recommendations': self.recommendations
+        }
+
+    def _check_repository_structure(self):
+        """Check repository structure and organization."""
+        print("📁 Checking repository structure...")
+
+        # Check for common patterns
+        has_apps = (self.repo_path / 'apps').exists()
+        has_clusters = (self.repo_path / 'clusters').exists()
+        has_infrastructure = (self.repo_path / 'infrastructure').exists()
+        has_base = (self.repo_path / 'base').exists()
+        has_overlays = (self.repo_path / 'overlays').exists()
+
+        if not any([has_apps, has_clusters, has_infrastructure, has_base]):
+            self.warnings.append("No standard directory structure detected (apps/, clusters/, infrastructure/, base/)")
+            self.recommendations.append("Consider organizing with: apps/ (applications), infrastructure/ (cluster config), clusters/ (per-cluster)")
+
+        # Check for Flux bootstrap (if Flux)
+        flux_system = self.repo_path / 'clusters' / 'flux-system'
+        if flux_system.exists():
+            print("  ✓ Flux bootstrap detected")
+            if not (flux_system / 'gotk-components.yaml').exists():
+                self.warnings.append("Flux bootstrap directory exists but gotk-components.yaml not found")
+
+        # Check for ArgoCD bootstrap (if ArgoCD)
+        argocd_patterns = list(self.repo_path.rglob('*argocd-*.yaml'))
+        if argocd_patterns:
+            print("  ✓ ArgoCD manifests detected")
+
+    def _check_kustomization_files(self):
+        """Check Kustomization files for validity."""
+        print("\n🔧 Checking Kustomization files...")
+
+        kustomization_files = list(self.repo_path.rglob('kustomization.yaml')) + \
+                             list(self.repo_path.rglob('kustomization.yml'))
+
+        if not kustomization_files:
+            self.warnings.append("No kustomization.yaml files found")
+            return
+
+        print(f"  Found {len(kustomization_files)} kustomization files")
+
+        for kfile in kustomization_files:
+            try:
+                with open(kfile, 'r') as f:
+                    content = yaml.safe_load(f)
+
+                if not content:
+                    self.issues.append(f"Empty kustomization file: {kfile.relative_to(self.repo_path)}")
+                    continue
+
+                # Check for required fields
+                if 'resources' not in content and 'bases' not in content and 'components' not in content:
+                    self.warnings.append(f"Kustomization has no resources/bases: {kfile.relative_to(self.repo_path)}")
+
+                # Check for deprecated 'bases' (Kustomize 5.7+)
+                if 'bases' in content:
+                    self.warnings.append(f"Using deprecated 'bases' field: {kfile.relative_to(self.repo_path)}")
+                    self.recommendations.append("Migrate 'bases:' to 'resources:' (Kustomize 5.0+)")
+
+            except yaml.YAMLError as e:
+                self.issues.append(f"Invalid YAML in {kfile.relative_to(self.repo_path)}: {e}")
+            except Exception as e:
+                self.issues.append(f"Error reading {kfile.relative_to(self.repo_path)}: {e}")
+
+    def _check_yaml_syntax(self):
+        """Check YAML files for syntax errors."""
+        print("\n📝 Checking YAML syntax...")
+
+        yaml_files = list(self.repo_path.rglob('*.yaml')) + list(self.repo_path.rglob('*.yml'))
+
+        # Exclude certain directories
+        exclude_dirs = {'.git', 'node_modules', 'vendor', '.github'}
+        yaml_files = [f for f in yaml_files if not any(ex in f.parts for ex in exclude_dirs)]
+
+        syntax_errors = 0
+        for yfile in yaml_files:
+            try:
+                with open(yfile, 'r') as f:
+                    yaml.safe_load_all(f)
+            except yaml.YAMLError as e:
+                self.issues.append(f"YAML syntax error in {yfile.relative_to(self.repo_path)}: {e}")
+                syntax_errors += 1
+
+        if syntax_errors == 0:
+            print(f"  ✓ All {len(yaml_files)} YAML files are valid")
+        else:
+            print(f"  ✗ {syntax_errors} YAML files have syntax errors")
+
+    def _check_best_practices(self):
+        """Check GitOps best practices."""
+        print("\n✨ Checking best practices...")
+
+        # Check for namespace definitions
+        namespace_files = list(self.repo_path.rglob('*namespace*.yaml'))
+        if not namespace_files:
+            self.recommendations.append("No namespace definitions found. Consider explicitly defining namespaces.")
+
+        # Check for image tags (not 'latest')
+        all_yamls = list(self.repo_path.rglob('*.yaml')) + list(self.repo_path.rglob('*.yml'))
+        latest_tag_count = 0
+
+        for yfile in all_yamls:
+            try:
+                with open(yfile, 'r') as f:
+                    content = f.read()
+                    if ':latest' in content or 'image: latest' in content:
+                        latest_tag_count += 1
+            except:
+                pass
+
+        if latest_tag_count > 0:
+            self.warnings.append(f"Found {latest_tag_count} files using ':latest' image tag")
+            self.recommendations.append("Pin image tags to specific versions or digests for reproducibility")
+
+        # Check for resource limits
+        deployment_files = [f for f in all_yamls if 'deployment' in str(f).lower() or 'statefulset' in str(f).lower()]
+        missing_limits = 0
+
+        for dfile in deployment_files:
+            try:
+                with open(dfile, 'r') as f:
+                    content = yaml.safe_load_all(f)
+                    for doc in content:
+                        if not doc or doc.get('kind') not in ['Deployment', 'StatefulSet']:
+                            continue
+
+                        containers = doc.get('spec', {}).get('template', {}).get('spec', {}).get('containers', [])
+                        for container in containers:
+                            if 'resources' not in container or 'limits' not in container.get('resources', {}):
+                                missing_limits += 1
+                                break
+            except:
+                pass
+
+        if missing_limits > 0:
+            self.recommendations.append(f"{missing_limits} Deployments/StatefulSets missing resource limits")
+
+    def _check_secrets_management(self):
+        """Check for secrets management practices."""
+        print("\n🔐 Checking secrets management...")
+
+        # Check for plain Kubernetes secrets
+        secret_files = list(self.repo_path.rglob('*secret*.yaml'))
+        plain_secrets = []
+
+        for sfile in secret_files:
+            try:
+                with open(sfile, 'r') as f:
+                    for doc in yaml.safe_load_all(f):
+                        if doc and doc.get('kind') == 'Secret' and doc.get('type') != 'kubernetes.io/service-account-token':
+                            # Check if it's a SealedSecret or ExternalSecret
+                            if doc.get('kind') not in ['SealedSecret'] and 'external-secrets.io' not in doc.get('apiVersion', ''):
+                                plain_secrets.append(sfile.relative_to(self.repo_path))
+            except:
+                pass
+
+        if plain_secrets:
+            self.issues.append(f"Found {len(plain_secrets)} plain Kubernetes Secret manifests in Git")
+            self.recommendations.append("Use Sealed Secrets, External Secrets Operator, or SOPS for secrets management")
+            for s in plain_secrets[:3]:  # Show first 3
+                self.issues.append(f"  - {s}")
+
+        # Check for SOPS configuration
+        sops_config = self.repo_path / '.sops.yaml'
+        if sops_config.exists():
+            print("  ✓ SOPS configuration found (.sops.yaml)")
+
+        # Check for Sealed Secrets
+        sealed_secrets = list(self.repo_path.rglob('*sealedsecret*.yaml'))
+        if sealed_secrets:
+            print(f"  ✓ Found {len(sealed_secrets)} SealedSecret manifests")
+
+        # Check for External Secrets
+        external_secrets = [f for f in self.repo_path.rglob('*.yaml')
+                           if 'externalsecret' in str(f).lower() or 'secretstore' in str(f).lower()]
+        if external_secrets:
+            print(f"  ✓ Found {len(external_secrets)} External Secrets manifests")
+
+        if not sops_config.exists() and not sealed_secrets and not external_secrets and plain_secrets:
+            self.recommendations.append("No secrets management solution detected. Consider implementing Sealed Secrets, ESO, or SOPS+age")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Validate GitOps repository structure and manifests',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Validate current directory
+  python3 validate_gitops_repo.py .
+
+  # Validate specific repository
+  python3 validate_gitops_repo.py /path/to/gitops-repo
+
+  # Show only issues (no warnings)
+  python3 validate_gitops_repo.py . --errors-only
+
+Checks:
+  - Repository structure (monorepo/polyrepo patterns)
+  - Kustomization file validity
+  - YAML syntax errors
+  - Best practices (image tags, resource limits, namespaces)
+  - Secrets management (detect plain secrets, check for SOPS/Sealed Secrets/ESO)
+        """
+    )
+
+    parser.add_argument('repo_path', help='Path to GitOps repository')
+    parser.add_argument('--errors-only', action='store_true', help='Show only errors, not warnings')
+
+    args = parser.parse_args()
+
+    try:
+        validator = GitOpsRepoValidator(args.repo_path)
+        results = validator.validate()
+
+        # Print summary
+        print("\n" + "="*60)
+        print("📊 Validation Summary")
+        print("="*60)
+
+        if results['issues']:
+            print(f"\n❌ Issues ({len(results['issues'])}):")
+            for issue in results['issues']:
+                print(f"  • {issue}")
+
+        if results['warnings'] and not args.errors_only:
+            print(f"\n⚠️  Warnings ({len(results['warnings'])}):")
+            for warning in results['warnings']:
+                print(f"  • {warning}")
+
+        if results['recommendations'] and not args.errors_only:
+            print(f"\n💡 Recommendations ({len(results['recommendations'])}):")
+            for rec in results['recommendations']:
+                print(f"  → {rec}")
+
+        if not results['issues'] and not results['warnings']:
+            print("\n✅ No issues found! Repository structure looks good.")
+
+        # Exit code
+        sys.exit(1 if results['issues'] else 0)
+
+    except KeyboardInterrupt:
+        print("\n\nInterrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()