#!/usr/bin/env python3 """ Workflow executor for Tailscale SSH Sync Agent. Common multi-machine workflow automation. """ import sys from pathlib import Path from typing import Dict, List, Optional import time import logging # Add utils to path sys.path.insert(0, str(Path(__file__).parent)) from utils.helpers import format_duration, get_timestamp from sshsync_wrapper import execute_on_group, execute_on_host, push_to_hosts from load_balancer import get_group_capacity logger = logging.getLogger(__name__) def deploy_workflow(code_path: str, staging_group: str, prod_group: str, run_tests: bool = True) -> Dict: """ Full deployment pipeline: staging → test → production. Args: code_path: Path to code to deploy staging_group: Staging server group prod_group: Production server group run_tests: Whether to run tests on staging Returns: Dict with deployment results Example: >>> result = deploy_workflow("./dist", "staging", "production") >>> result['success'] True >>> result['duration'] "12m 45s" """ start_time = time.time() results = { 'stages': {}, 'success': False, 'start_time': get_timestamp() } try: # Stage 1: Deploy to staging logger.info("Stage 1: Deploying to staging...") stage1 = push_to_hosts( local_path=code_path, remote_path="/var/www/app", group=staging_group, recurse=True ) results['stages']['staging_deploy'] = stage1 if not stage1.get('success'): results['error'] = 'Staging deployment failed' return results # Build on staging logger.info("Building on staging...") build_result = execute_on_group( staging_group, "cd /var/www/app && npm run build", timeout=300 ) results['stages']['staging_build'] = build_result if not build_result.get('success'): results['error'] = 'Staging build failed' return results # Stage 2: Run tests (if enabled) if run_tests: logger.info("Stage 2: Running tests...") test_result = execute_on_group( staging_group, "cd /var/www/app && npm test", timeout=600 ) results['stages']['tests'] = test_result if not test_result.get('success'): results['error'] = 'Tests failed on staging' return results # Stage 3: Validation logger.info("Stage 3: Validating staging...") health_result = execute_on_group( staging_group, "curl -f http://localhost:3000/health || echo 'Health check failed'", timeout=10 ) results['stages']['staging_validation'] = health_result # Stage 4: Deploy to production logger.info("Stage 4: Deploying to production...") prod_deploy = push_to_hosts( local_path=code_path, remote_path="/var/www/app", group=prod_group, recurse=True ) results['stages']['production_deploy'] = prod_deploy if not prod_deploy.get('success'): results['error'] = 'Production deployment failed' return results # Build and restart on production logger.info("Building and restarting production...") prod_build = execute_on_group( prod_group, "cd /var/www/app && npm run build && pm2 restart app", timeout=300 ) results['stages']['production_build'] = prod_build # Stage 5: Production verification logger.info("Stage 5: Verifying production...") prod_health = execute_on_group( prod_group, "curl -f http://localhost:3000/health", timeout=15 ) results['stages']['production_verification'] = prod_health # Success! results['success'] = True results['duration'] = format_duration(time.time() - start_time) return results except Exception as e: logger.error(f"Deployment workflow error: {e}") results['error'] = str(e) results['duration'] = format_duration(time.time() - start_time) return results def backup_workflow(hosts: List[str], backup_paths: List[str], destination: str) -> Dict: """ Backup files from multiple hosts. Args: hosts: List of hosts to backup from backup_paths: Paths to backup on each host destination: Local destination directory Returns: Dict with backup results Example: >>> result = backup_workflow( ... ["db-01", "db-02"], ... ["/var/lib/mysql"], ... "./backups" ... ) >>> result['backed_up_hosts'] 2 """ from sshsync_wrapper import pull_from_host start_time = time.time() results = { 'hosts': {}, 'success': True, 'backed_up_hosts': 0 } for host in hosts: host_results = [] for backup_path in backup_paths: # Create timestamped backup directory timestamp = time.strftime("%Y%m%d_%H%M%S") host_dest = f"{destination}/{host}_{timestamp}" result = pull_from_host( host=host, remote_path=backup_path, local_path=host_dest, recurse=True ) host_results.append(result) if not result.get('success'): results['success'] = False results['hosts'][host] = host_results if all(r.get('success') for r in host_results): results['backed_up_hosts'] += 1 results['duration'] = format_duration(time.time() - start_time) return results def sync_workflow(source_host: str, target_group: str, paths: List[str]) -> Dict: """ Sync files from one host to many. Args: source_host: Host to pull from target_group: Group to push to paths: Paths to sync Returns: Dict with sync results Example: >>> result = sync_workflow( ... "master-db", ... "replica-dbs", ... ["/var/lib/mysql/config"] ... ) >>> result['success'] True """ from sshsync_wrapper import pull_from_host, push_to_hosts import tempfile import shutil start_time = time.time() results = {'paths': {}, 'success': True} # Create temp directory with tempfile.TemporaryDirectory() as temp_dir: for path in paths: # Pull from source pull_result = pull_from_host( host=source_host, remote_path=path, local_path=f"{temp_dir}/{Path(path).name}", recurse=True ) if not pull_result.get('success'): results['paths'][path] = { 'success': False, 'error': 'Pull from source failed' } results['success'] = False continue # Push to targets push_result = push_to_hosts( local_path=f"{temp_dir}/{Path(path).name}", remote_path=path, group=target_group, recurse=True ) results['paths'][path] = { 'pull': pull_result, 'push': push_result, 'success': push_result.get('success', False) } if not push_result.get('success'): results['success'] = False results['duration'] = format_duration(time.time() - start_time) return results def rolling_restart(group: str, service_name: str, wait_between: int = 30) -> Dict: """ Zero-downtime rolling restart of a service across group. Args: group: Group to restart service_name: Service name (e.g., "nginx", "app") wait_between: Seconds to wait between restarts Returns: Dict with restart results Example: >>> result = rolling_restart("web-servers", "nginx") >>> result['restarted_count'] 3 """ from utils.helpers import parse_sshsync_config start_time = time.time() groups_config = parse_sshsync_config() hosts = groups_config.get(group, []) if not hosts: return { 'success': False, 'error': f'Group {group} not found or empty' } results = { 'hosts': {}, 'restarted_count': 0, 'failed_count': 0, 'success': True } for host in hosts: logger.info(f"Restarting {service_name} on {host}...") # Restart service restart_result = execute_on_host( host, f"sudo systemctl restart {service_name} || sudo service {service_name} restart", timeout=30 ) # Health check time.sleep(5) # Wait for service to start health_result = execute_on_host( host, f"sudo systemctl is-active {service_name} || sudo service {service_name} status", timeout=10 ) success = restart_result.get('success') and health_result.get('success') results['hosts'][host] = { 'restart': restart_result, 'health': health_result, 'success': success } if success: results['restarted_count'] += 1 logger.info(f"✓ {host} restarted successfully") else: results['failed_count'] += 1 results['success'] = False logger.error(f"✗ {host} restart failed") # Wait before next restart (except last) if host != hosts[-1]: time.sleep(wait_between) results['duration'] = format_duration(time.time() - start_time) return results def health_check_workflow(group: str, endpoint: str = "/health", timeout: int = 10) -> Dict: """ Check health endpoint across group. Args: group: Group to check endpoint: Health endpoint path timeout: Request timeout Returns: Dict with health check results Example: >>> result = health_check_workflow("production", "/health") >>> result['healthy_count'] 3 """ from utils.helpers import parse_sshsync_config groups_config = parse_sshsync_config() hosts = groups_config.get(group, []) if not hosts: return { 'success': False, 'error': f'Group {group} not found or empty' } results = { 'hosts': {}, 'healthy_count': 0, 'unhealthy_count': 0 } for host in hosts: health_result = execute_on_host( host, f"curl -f -s -o /dev/null -w '%{{http_code}}' http://localhost:3000{endpoint}", timeout=timeout ) is_healthy = ( health_result.get('success') and '200' in health_result.get('stdout', '') ) results['hosts'][host] = { 'healthy': is_healthy, 'response': health_result.get('stdout', '').strip() } if is_healthy: results['healthy_count'] += 1 else: results['unhealthy_count'] += 1 results['success'] = results['unhealthy_count'] == 0 return results def main(): """Test workflow executor functions.""" print("Testing workflow executor...\n") print("Note: Workflow executor requires configured hosts and groups.") print("Tests would execute real operations, so showing dry-run simulations.\n") print("✅ Workflow executor ready") if __name__ == "__main__": main()