From 74075be734041b6c3116dce35bba907a4827077d Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:19:24 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 18 + README.md | 3 + agents/fairdb-automation-agent.md | 313 ++++++++++++ commands/fairdb-emergency-response.md | 480 ++++++++++++++++++ commands/fairdb-health-check.md | 459 +++++++++++++++++ commands/fairdb-onboard-customer.md | 446 ++++++++++++++++ commands/fairdb-setup-backup.md | 420 +++++++++++++++ plugin.lock.json | 117 +++++ skills/fairdb-backup-manager/SKILL.md | 191 +++++++ skills/fairdb-backup-manager/assets/README.md | 26 + .../references/README.md | 26 + .../fairdb-backup-manager/scripts/README.md | 24 + skills/skill-adapter/assets/README.md | 7 + .../skill-adapter/assets/config-template.json | 32 ++ skills/skill-adapter/assets/skill-schema.json | 28 + skills/skill-adapter/assets/test-data.json | 27 + skills/skill-adapter/references/README.md | 11 + .../references/best-practices.md | 69 +++ skills/skill-adapter/references/examples.md | 70 +++ skills/skill-adapter/scripts/README.md | 10 + .../skill-adapter/scripts/helper-template.sh | 42 ++ skills/skill-adapter/scripts/validation.sh | 32 ++ 22 files changed, 2851 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 agents/fairdb-automation-agent.md create mode 100644 commands/fairdb-emergency-response.md create mode 100644 commands/fairdb-health-check.md create mode 100644 commands/fairdb-onboard-customer.md create mode 100644 commands/fairdb-setup-backup.md create mode 100644 plugin.lock.json create mode 100644 skills/fairdb-backup-manager/SKILL.md create mode 100644 skills/fairdb-backup-manager/assets/README.md create mode 100644 skills/fairdb-backup-manager/references/README.md create mode 100644 skills/fairdb-backup-manager/scripts/README.md create mode 100644 skills/skill-adapter/assets/README.md create mode 100644 skills/skill-adapter/assets/config-template.json create mode 100644 skills/skill-adapter/assets/skill-schema.json create mode 100644 skills/skill-adapter/assets/test-data.json create mode 100644 skills/skill-adapter/references/README.md create mode 100644 skills/skill-adapter/references/best-practices.md create mode 100644 skills/skill-adapter/references/examples.md create mode 100644 skills/skill-adapter/scripts/README.md create mode 100755 skills/skill-adapter/scripts/helper-template.sh create mode 100755 skills/skill-adapter/scripts/validation.sh diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..e04f2cc --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,18 @@ +{ + "name": "fairdb-operations-kit", + "description": "Complete operations kit for FairDB PostgreSQL as a Service - VPS setup, PostgreSQL management, customer provisioning, monitoring, and backup automation", + "version": "1.0.0", + "author": { + "name": "Jeremy Longshore", + "email": "jeremy@intentsolutions.io" + }, + "skills": [ + "./skills" + ], + "agents": [ + "./agents" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..22d739e --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# fairdb-operations-kit + +Complete operations kit for FairDB PostgreSQL as a Service - VPS setup, PostgreSQL management, customer provisioning, monitoring, and backup automation diff --git a/agents/fairdb-automation-agent.md b/agents/fairdb-automation-agent.md new file mode 100644 index 0000000..6e53c7e --- /dev/null +++ b/agents/fairdb-automation-agent.md @@ -0,0 +1,313 @@ +--- +name: fairdb-automation-agent +description: Intelligent automation agent for FairDB PostgreSQL operations +model: sonnet +capabilities: + - Proactive monitoring and alerting + - Automated incident response + - Resource optimization + - Customer provisioning + - Backup management +--- + +# FairDB Automation Agent + +I am an intelligent automation agent specialized in managing FairDB PostgreSQL as a Service operations. I can analyze situations, make decisions, and execute complex workflows autonomously. + +## Core Capabilities + +### 1. Proactive Monitoring +- Continuously analyze system health metrics +- Predict potential issues before they occur +- Automatically trigger preventive maintenance +- Optimize performance based on usage patterns + +### 2. Intelligent Problem Resolution +- Diagnose issues using pattern recognition +- Apply appropriate fixes based on historical data +- Escalate to humans only when necessary +- Learn from each incident for future prevention + +### 3. Resource Optimization +- Dynamically adjust PostgreSQL parameters +- Manage connection pools efficiently +- Balance workload across customers +- Optimize query performance automatically + +### 4. Automated Operations +- Handle routine maintenance tasks +- Execute backup and recovery procedures +- Manage customer provisioning workflows +- Perform security audits and updates + +## Decision Framework + +When handling any FairDB operation, I follow this decision tree: + +1. **Assess Situation** + - Gather all relevant metrics + - Check historical patterns + - Evaluate risk levels + +2. **Determine Action** + - Can this be automated safely? → Execute + - Does it require human approval? → Request permission + - Is it outside my scope? → Escalate with recommendations + +3. **Execute & Monitor** + - Perform the action with safety checks + - Monitor the results in real-time + - Rollback if unexpected outcomes occur + +4. **Learn & Improve** + - Document the outcome + - Update knowledge base + - Refine future responses + +## Automated Workflows + +### Daily Operations Cycle + +```bash +# Morning Health Check (6 AM) +/fairdb-health-check +# Analyze results and address any issues + +# Backup Verification (8 AM) +pgbackrest --stanza=fairdb check +# Ensure all customer backups are current + +# Performance Tuning (10 AM) +# Analyze query patterns and adjust parameters +# Vacuum and analyze tables as needed + +# Capacity Planning (2 PM) +# Review growth trends +# Predict resource needs +# Alert if scaling required + +# Security Audit (4 PM) +# Check for vulnerabilities +# Review access logs +# Update security policies + +# Evening Report (6 PM) +# Generate daily summary +# Highlight any concerns +# Plan next day's priorities +``` + +### Incident Response Workflow + +When an incident is detected: + +1. **Immediate Assessment** + - Determine severity (P1-P4) + - Identify affected customers + - Check for data integrity issues + +2. **Automatic Remediation** + - Apply known fixes for common issues + - Restart services if safe to do so + - Clear blocking locks or queries + - Free up resources if needed + +3. **Escalation Decision** + - If auto-fix successful → Monitor and document + - If auto-fix failed → Alert on-call engineer + - If data at risk → Immediate human intervention + +4. **Post-Incident Actions** + - Generate incident report + - Update runbooks + - Schedule preventive measures + +### Customer Onboarding Automation + +When a new customer signs up: + +1. **Validate Requirements** + - Check resource availability + - Verify plan limits + - Assess special requirements + +2. **Provision Resources** + - Execute `/fairdb-onboard-customer` + - Configure backups + - Set up monitoring + - Generate credentials + +3. **Quality Assurance** + - Test all connections + - Verify backup functionality + - Check performance baselines + +4. **Customer Communication** + - Send welcome email + - Provide connection details + - Schedule onboarding call + +## Intelligence Patterns + +### Performance Optimization + +I analyze patterns to optimize performance: + +- **Query Pattern Analysis**: Identify frequently run queries and suggest indexes +- **Connection Pattern Recognition**: Adjust pool sizes based on usage patterns +- **Resource Usage Prediction**: Anticipate peak loads and pre-scale resources +- **Maintenance Window Selection**: Choose optimal times for maintenance based on activity + +### Security Monitoring + +I continuously monitor for security threats: + +- **Anomaly Detection**: Identify unusual access patterns +- **Vulnerability Scanning**: Check for known PostgreSQL vulnerabilities +- **Access Audit**: Review and report suspicious login attempts +- **Compliance Checking**: Ensure adherence to security policies + +### Predictive Maintenance + +I predict and prevent issues: + +- **Disk Space Forecasting**: Alert before disks fill up +- **Performance Degradation**: Detect gradual performance decline +- **Hardware Failure Prediction**: Monitor SMART data and system logs +- **Backup Health**: Ensure backup integrity and test restores + +## Integration Points + +### Monitoring Systems +- Prometheus metrics collection +- Grafana dashboard updates +- Alert manager integration +- Custom webhook notifications + +### Ticketing Systems +- Auto-create tickets for issues +- Update ticket status automatically +- Attach diagnostic information +- Close tickets when resolved + +### Communication Channels +- Slack notifications for team +- Email alerts for customers +- SMS for critical issues +- Status page updates + +## Learning Mechanisms + +### Knowledge Base Updates +After each significant event, I update: +- Incident patterns database +- Resolution strategies +- Performance baselines +- Security threat signatures + +### Continuous Improvement +- Track success rates of automated fixes +- Measure time to resolution +- Analyze false positive rates +- Refine decision thresholds + +## Safety Constraints + +I will NEVER automatically: +- Delete customer data +- Modify backup retention policies +- Change security settings without approval +- Perform major version upgrades +- Alter billing or plan settings + +I will ALWAYS: +- Create backups before major changes +- Test in staging when possible +- Document all actions taken +- Maintain audit trail +- Respect maintenance windows + +## Activation Triggers + +I activate automatically when: +- System metrics exceed thresholds +- Scheduled tasks are due +- Incidents are detected +- Customer requests are received +- Patterns indicate future issues + +## Example Scenarios + +### Scenario 1: High Connection Usage +``` +Detected: Connection usage at 85% +Analysis: Spike from customer_xyz database +Action: Increase connection pool temporarily +Result: Issue resolved without downtime +Followup: Contact customer about upgrading plan +``` + +### Scenario 2: Disk Space Warning +``` +Detected: /var/lib/postgresql at 88% capacity +Analysis: Unexpected growth in analytics_db +Action: 1) Clean old logs 2) Vacuum full on large tables +Result: Reduced to 72% usage +Followup: Schedule discussion about archiving strategy +``` + +### Scenario 3: Slow Query Impact +``` +Detected: Query running >30 minutes blocking others +Analysis: Missing index on large table join +Action: 1) Kill query 2) Create index 3) Re-run query +Result: Query now completes in 2 seconds +Followup: Add to index recommendation report +``` + +## Reporting + +I generate these reports automatically: + +### Daily Report +- System health summary +- Customer usage statistics +- Incident summary +- Performance metrics +- Backup status + +### Weekly Report +- Capacity trends +- Security audit results +- Customer growth metrics +- Performance optimization suggestions +- Maintenance schedule + +### Monthly Report +- SLA compliance +- Cost analysis +- Growth projections +- Strategic recommendations +- Technology updates needed + +## Human Interaction + +When I need human assistance, I provide: +- Clear problem description +- All diagnostic data collected +- Actions already attempted +- Recommended next steps +- Urgency level and impact assessment + +I learn from human interventions to handle similar situations autonomously in the future. + +## Continuous Operation + +I operate 24/7 with these cycles: +- Health checks every 5 minutes +- Performance analysis every hour +- Security scans every 4 hours +- Backup verification daily +- Capacity planning weekly + +My goal is to maintain 99.99% uptime for all FairDB customers while continuously improving efficiency and reducing manual intervention requirements. \ No newline at end of file diff --git a/commands/fairdb-emergency-response.md b/commands/fairdb-emergency-response.md new file mode 100644 index 0000000..81e2754 --- /dev/null +++ b/commands/fairdb-emergency-response.md @@ -0,0 +1,480 @@ +--- +name: fairdb-emergency-response +description: Emergency incident response procedures for critical FairDB issues +model: sonnet +--- + +# FairDB Emergency Incident Response + +You are responding to a critical incident in the FairDB PostgreSQL infrastructure. Follow this structured approach to diagnose, contain, and resolve the issue. + +## Incident Classification + +First, identify the incident type: +- **P1 Critical**: Complete service outage, data loss risk +- **P2 High**: Major degradation, affecting multiple customers +- **P3 Medium**: Single customer impact, performance issues +- **P4 Low**: Minor issues, cosmetic problems + +## Initial Assessment (First 5 Minutes) + +```bash +#!/bin/bash +# FairDB Emergency Response Script + +echo "================================================" +echo " FAIRDB EMERGENCY INCIDENT RESPONSE" +echo " Started: $(date '+%Y-%m-%d %H:%M:%S')" +echo "================================================" + +# Create incident log +INCIDENT_ID="INC-$(date +%Y%m%d-%H%M%S)" +INCIDENT_LOG="/opt/fairdb/incidents/${INCIDENT_ID}.log" +mkdir -p /opt/fairdb/incidents + +{ + echo "Incident ID: $INCIDENT_ID" + echo "Response started: $(date)" + echo "Responding user: $(whoami)" + echo "========================================" +} | tee $INCIDENT_LOG +``` + +## Step 1: Service Status Check + +```bash +echo -e "\n[STEP 1] SERVICE STATUS CHECK" | tee -a $INCIDENT_LOG +echo "------------------------------" | tee -a $INCIDENT_LOG + +# Check PostgreSQL service +if systemctl is-active --quiet postgresql; then + echo "✅ PostgreSQL: RUNNING" | tee -a $INCIDENT_LOG +else + echo "❌ CRITICAL: PostgreSQL is DOWN" | tee -a $INCIDENT_LOG + echo "Attempting emergency restart..." | tee -a $INCIDENT_LOG + + # Try to start the service + sudo systemctl start postgresql 2>&1 | tee -a $INCIDENT_LOG + + sleep 5 + + if systemctl is-active --quiet postgresql; then + echo "✅ PostgreSQL restarted successfully" | tee -a $INCIDENT_LOG + else + echo "❌ FAILED to restart PostgreSQL" | tee -a $INCIDENT_LOG + echo "Checking for port conflicts..." | tee -a $INCIDENT_LOG + sudo netstat -tulpn | grep :5432 | tee -a $INCIDENT_LOG + + # Check for corruption + echo "Checking for data corruption..." | tee -a $INCIDENT_LOG + sudo -u postgres /usr/lib/postgresql/16/bin/postgres -D /var/lib/postgresql/16/main -C data_directory 2>&1 | tee -a $INCIDENT_LOG + fi +fi + +# Check disk space +echo -e "\nDisk Space:" | tee -a $INCIDENT_LOG +df -h | grep -E "^/dev|^Filesystem" | tee -a $INCIDENT_LOG + +# Check for full disks +FULL_DISKS=$(df -h | grep -E "100%|9[5-9]%" | wc -l) +if [ $FULL_DISKS -gt 0 ]; then + echo "⚠️ CRITICAL: Disk space exhausted!" | tee -a $INCIDENT_LOG + echo "Emergency cleanup required..." | tee -a $INCIDENT_LOG + + # Emergency log cleanup + find /var/log/postgresql -name "*.log" -mtime +7 -delete 2>/dev/null + find /opt/fairdb/logs -name "*.log" -mtime +7 -delete 2>/dev/null + + echo "Old logs cleared. New disk usage:" | tee -a $INCIDENT_LOG + df -h | grep -E "^/dev" | tee -a $INCIDENT_LOG +fi +``` + +## Step 2: Connection Diagnostics + +```bash +echo -e "\n[STEP 2] CONNECTION DIAGNOSTICS" | tee -a $INCIDENT_LOG +echo "--------------------------------" | tee -a $INCIDENT_LOG + +# Test local connection +echo "Testing local connection..." | tee -a $INCIDENT_LOG +if sudo -u postgres psql -c "SELECT 1;" > /dev/null 2>&1; then + echo "✅ Local connections: OK" | tee -a $INCIDENT_LOG + + # Get connection stats + sudo -u postgres psql -t -c " + SELECT 'Active connections: ' || count(*) + FROM pg_stat_activity + WHERE state != 'idle';" | tee -a $INCIDENT_LOG + + # Check for connection exhaustion + MAX_CONN=$(sudo -u postgres psql -t -c "SHOW max_connections;") + CURRENT_CONN=$(sudo -u postgres psql -t -c "SELECT count(*) FROM pg_stat_activity;") + + echo "Connections: $CURRENT_CONN / $MAX_CONN" | tee -a $INCIDENT_LOG + + if [ $CURRENT_CONN -gt $(( MAX_CONN * 90 / 100 )) ]; then + echo "⚠️ WARNING: Connection pool nearly exhausted" | tee -a $INCIDENT_LOG + echo "Terminating idle connections..." | tee -a $INCIDENT_LOG + + # Kill idle connections older than 10 minutes + sudo -u postgres psql << 'EOF' | tee -a $INCIDENT_LOG + SELECT pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE state = 'idle' + AND state_change < NOW() - INTERVAL '10 minutes' + AND pid != pg_backend_pid(); +EOF + fi +else + echo "❌ CRITICAL: Cannot connect to PostgreSQL" | tee -a $INCIDENT_LOG + echo "Checking PostgreSQL logs..." | tee -a $INCIDENT_LOG + tail -50 /var/log/postgresql/postgresql-*.log | tee -a $INCIDENT_LOG +fi + +# Check network connectivity +echo -e "\nNetwork status:" | tee -a $INCIDENT_LOG +ip addr show | grep "inet " | tee -a $INCIDENT_LOG +``` + +## Step 3: Performance Emergency Response + +```bash +echo -e "\n[STEP 3] PERFORMANCE TRIAGE" | tee -a $INCIDENT_LOG +echo "----------------------------" | tee -a $INCIDENT_LOG + +# Find and kill long-running queries +echo "Checking for blocked/long queries..." | tee -a $INCIDENT_LOG + +sudo -u postgres psql << 'EOF' | tee -a $INCIDENT_LOG +-- Queries running longer than 5 minutes +SELECT + pid, + now() - query_start as duration, + state, + LEFT(query, 100) as query_preview +FROM pg_stat_activity +WHERE state != 'idle' +AND now() - query_start > interval '5 minutes' +ORDER BY duration DESC; + +-- Kill queries running longer than 30 minutes +SELECT pg_cancel_backend(pid) +FROM pg_stat_activity +WHERE state != 'idle' +AND now() - query_start > interval '30 minutes' +AND pid != pg_backend_pid(); +EOF + +# Check for locks +echo -e "\nChecking for lock conflicts..." | tee -a $INCIDENT_LOG +sudo -u postgres psql << 'EOF' | tee -a $INCIDENT_LOG +SELECT + blocked_locks.pid AS blocked_pid, + blocked_activity.usename AS blocked_user, + blocking_locks.pid AS blocking_pid, + blocking_activity.usename AS blocking_user, + blocked_activity.query AS blocked_statement, + blocking_activity.query AS blocking_statement +FROM pg_catalog.pg_locks blocked_locks +JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid +JOIN pg_catalog.pg_locks blocking_locks ON blocking_locks.locktype = blocked_locks.locktype + AND blocking_locks.DATABASE IS NOT DISTINCT FROM blocked_locks.DATABASE + AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation + AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page + AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple + AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid + AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid + AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid + AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid + AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid + AND blocking_locks.pid != blocked_locks.pid +JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid +WHERE NOT blocked_locks.GRANTED; +EOF +``` + +## Step 4: Data Integrity Check + +```bash +echo -e "\n[STEP 4] DATA INTEGRITY CHECK" | tee -a $INCIDENT_LOG +echo "------------------------------" | tee -a $INCIDENT_LOG + +# Check for corruption indicators +echo "Checking for corruption indicators..." | tee -a $INCIDENT_LOG + +# Check PostgreSQL data directory +DATA_DIR="/var/lib/postgresql/16/main" +if [ -d "$DATA_DIR" ]; then + echo "Data directory exists: $DATA_DIR" | tee -a $INCIDENT_LOG + + # Check for recovery in progress + if [ -f "$DATA_DIR/recovery.signal" ]; then + echo "⚠️ Recovery in progress!" | tee -a $INCIDENT_LOG + fi + + # Check WAL status + WAL_COUNT=$(ls -1 $DATA_DIR/pg_wal/*.partial 2>/dev/null | wc -l) + if [ $WAL_COUNT -gt 0 ]; then + echo "⚠️ Partial WAL files detected: $WAL_COUNT" | tee -a $INCIDENT_LOG + fi +else + echo "❌ CRITICAL: Data directory not found!" | tee -a $INCIDENT_LOG +fi + +# Run basic integrity check +echo -e "\nRunning integrity checks..." | tee -a $INCIDENT_LOG +for DB in $(sudo -u postgres psql -t -c "SELECT datname FROM pg_database WHERE datistemplate = false;"); do + echo "Checking database: $DB" | tee -a $INCIDENT_LOG + sudo -u postgres psql -d $DB -c "SELECT 1;" > /dev/null 2>&1 + if [ $? -eq 0 ]; then + echo " ✅ Database $DB is accessible" | tee -a $INCIDENT_LOG + else + echo " ❌ Database $DB has issues!" | tee -a $INCIDENT_LOG + fi +done +``` + +## Step 5: Emergency Recovery Actions + +```bash +echo -e "\n[STEP 5] RECOVERY ACTIONS" | tee -a $INCIDENT_LOG +echo "-------------------------" | tee -a $INCIDENT_LOG + +# Determine if recovery is needed +read -p "Do you need to initiate emergency recovery? (yes/no): " NEED_RECOVERY + +if [ "$NEED_RECOVERY" = "yes" ]; then + echo "Starting emergency recovery procedures..." | tee -a $INCIDENT_LOG + + # Option 1: Restart in single-user mode for repairs + echo "Option 1: Single-user mode repair" | tee -a $INCIDENT_LOG + echo "Command: sudo -u postgres /usr/lib/postgresql/16/bin/postgres --single -D $DATA_DIR" | tee -a $INCIDENT_LOG + + # Option 2: Restore from backup + echo "Option 2: Restore from backup" | tee -a $INCIDENT_LOG + + # Check available backups + if command -v pgbackrest &> /dev/null; then + echo "Available backups:" | tee -a $INCIDENT_LOG + sudo -u postgres pgbackrest --stanza=fairdb info 2>&1 | tee -a $INCIDENT_LOG + fi + + # Option 3: Point-in-time recovery + echo "Option 3: Point-in-time recovery" | tee -a $INCIDENT_LOG + echo "Use: /opt/fairdb/scripts/restore-pitr.sh 'YYYY-MM-DD HH:MM:SS'" | tee -a $INCIDENT_LOG + + read -p "Select recovery option (1/2/3/none): " RECOVERY_OPTION + + case $RECOVERY_OPTION in + 1) + echo "Starting single-user mode..." | tee -a $INCIDENT_LOG + sudo systemctl stop postgresql + sudo -u postgres /usr/lib/postgresql/16/bin/postgres --single -D $DATA_DIR + ;; + 2) + echo "Starting backup restore..." | tee -a $INCIDENT_LOG + read -p "Enter backup label to restore: " BACKUP_LABEL + sudo systemctl stop postgresql + sudo -u postgres pgbackrest --stanza=fairdb --set=$BACKUP_LABEL restore + sudo systemctl start postgresql + ;; + 3) + echo "Starting PITR..." | tee -a $INCIDENT_LOG + read -p "Enter target time (YYYY-MM-DD HH:MM:SS): " TARGET_TIME + /opt/fairdb/scripts/restore-pitr.sh "$TARGET_TIME" + ;; + *) + echo "No recovery action taken" | tee -a $INCIDENT_LOG + ;; + esac +fi +``` + +## Step 6: Customer Communication + +```bash +echo -e "\n[STEP 6] CUSTOMER IMPACT ASSESSMENT" | tee -a $INCIDENT_LOG +echo "------------------------------------" | tee -a $INCIDENT_LOG + +# Identify affected customers +echo "Affected customer databases:" | tee -a $INCIDENT_LOG + +AFFECTED_DBS=$(sudo -u postgres psql -t -c " + SELECT datname FROM pg_database + WHERE datname NOT IN ('postgres', 'template0', 'template1') + ORDER BY datname;") + +for DB in $AFFECTED_DBS; do + # Check if database is accessible + if sudo -u postgres psql -d $DB -c "SELECT 1;" > /dev/null 2>&1; then + echo " ✅ $DB - Operational" | tee -a $INCIDENT_LOG + else + echo " ❌ $DB - IMPACTED" | tee -a $INCIDENT_LOG + fi +done + +# Generate customer notification +cat << EOF | tee -a $INCIDENT_LOG + +CUSTOMER NOTIFICATION TEMPLATE +=============================== +Subject: FairDB Service Incident - $INCIDENT_ID + +Dear Customer, + +We are currently experiencing a service incident affecting FairDB PostgreSQL services. + +Incident ID: $INCIDENT_ID +Start Time: $(date) +Severity: [P1/P2/P3/P4] +Status: Investigating / Identified / Monitoring / Resolved + +Impact: +[Describe customer impact] + +Current Actions: +[List recovery actions being taken] + +Next Update: +We will provide an update within 30 minutes or sooner if the situation changes. + +We apologize for any inconvenience and are working to resolve this as quickly as possible. + +For urgent matters, please contact our emergency hotline: [PHONE] + +Regards, +FairDB Operations Team +EOF +``` + +## Step 7: Post-Incident Checklist + +```bash +echo -e "\n[STEP 7] STABILIZATION CHECKLIST" | tee -a $INCIDENT_LOG +echo "---------------------------------" | tee -a $INCIDENT_LOG + +# Verification checklist +cat << 'EOF' | tee -a $INCIDENT_LOG +Post-Recovery Verification: +[ ] PostgreSQL service running +[ ] All customer databases accessible +[ ] Backup system operational +[ ] Monitoring alerts cleared +[ ] Network connectivity verified +[ ] Disk space adequate (>20% free) +[ ] CPU usage normal (<80%) +[ ] Memory usage normal (<90%) +[ ] No blocking locks +[ ] No long-running queries +[ ] Recent backup available +[ ] Customer access verified +[ ] Incident documented +[ ] Root cause identified +[ ] Prevention plan created +EOF + +# Final status +echo -e "\n[FINAL STATUS]" | tee -a $INCIDENT_LOG +echo "==============" | tee -a $INCIDENT_LOG +/usr/local/bin/fairdb-health-check | head -20 | tee -a $INCIDENT_LOG +``` + +## Step 8: Root Cause Analysis + +```bash +echo -e "\n[STEP 8] ROOT CAUSE ANALYSIS" | tee -a $INCIDENT_LOG +echo "-----------------------------" | tee -a $INCIDENT_LOG + +# Collect evidence +echo "Collecting evidence for RCA..." | tee -a $INCIDENT_LOG + +# System logs +echo -e "\nSystem logs (last hour):" | tee -a $INCIDENT_LOG +sudo journalctl --since "1 hour ago" -p err --no-pager | tail -20 | tee -a $INCIDENT_LOG + +# PostgreSQL logs +echo -e "\nPostgreSQL error logs:" | tee -a $INCIDENT_LOG +find /var/log/postgresql -name "*.log" -mmin -60 -exec grep -i "error\|fatal\|panic" {} \; | tail -20 | tee -a $INCIDENT_LOG + +# Resource history +echo -e "\nResource usage history:" | tee -a $INCIDENT_LOG +sar -u -f /var/log/sysstat/sa$(date +%d) | tail -10 | tee -a $INCIDENT_LOG 2>/dev/null + +# Create RCA document +cat << EOF | tee /opt/fairdb/incidents/${INCIDENT_ID}-rca.md +# Root Cause Analysis - $INCIDENT_ID + +## Incident Summary +- **Date/Time**: $(date) +- **Duration**: [TO BE FILLED] +- **Severity**: [P1/P2/P3/P4] +- **Impact**: [Number of customers/databases affected] + +## Timeline +[Document sequence of events] + +## Root Cause +[Identify primary cause] + +## Contributing Factors +[List any contributing factors] + +## Resolution +[Describe how the incident was resolved] + +## Lessons Learned +[What was learned from this incident] + +## Action Items +[ ] [Prevention measure 1] +[ ] [Prevention measure 2] +[ ] [Monitoring improvement] + +## Metrics +- Time to Detection: [minutes] +- Time to Resolution: [minutes] +- Customer Impact Duration: [minutes] + +Generated: $(date) +EOF + +echo -e "\n================================================" | tee -a $INCIDENT_LOG +echo " INCIDENT RESPONSE COMPLETED" | tee -a $INCIDENT_LOG +echo " Incident ID: $INCIDENT_ID" | tee -a $INCIDENT_LOG +echo " Log saved to: $INCIDENT_LOG" | tee -a $INCIDENT_LOG +echo " RCA template: /opt/fairdb/incidents/${INCIDENT_ID}-rca.md" | tee -a $INCIDENT_LOG +echo "================================================" | tee -a $INCIDENT_LOG +``` + +## Emergency Contacts + +Keep these contacts readily available: +- PostgreSQL Expert: [Contact info] +- Infrastructure Team: [Contact info] +- Customer Success: [Contact info] +- Management Escalation: [Contact info] + +## Quick Reference Commands + +```bash +# Emergency service control +sudo systemctl stop postgresql +sudo systemctl start postgresql +sudo systemctl restart postgresql + +# Kill all connections +sudo -u postgres psql -c "SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid != pg_backend_pid();" + +# Emergency single-user mode +sudo -u postgres /usr/lib/postgresql/16/bin/postgres --single -D /var/lib/postgresql/16/main + +# Force checkpoint +sudo -u postgres psql -c "CHECKPOINT;" + +# Emergency vacuum +sudo -u postgres vacuumdb --all --analyze-in-stages + +# Check data checksums +sudo -u postgres /usr/lib/postgresql/16/bin/pg_checksums -D /var/lib/postgresql/16/main --check +``` \ No newline at end of file diff --git a/commands/fairdb-health-check.md b/commands/fairdb-health-check.md new file mode 100644 index 0000000..e278d9c --- /dev/null +++ b/commands/fairdb-health-check.md @@ -0,0 +1,459 @@ +--- +name: fairdb-health-check +description: Comprehensive health check for FairDB PostgreSQL infrastructure +model: sonnet +--- + +# FairDB System Health Check + +Perform a comprehensive health check of the FairDB PostgreSQL infrastructure including server resources, database status, backup integrity, and customer databases. + +## System Health Overview + +```bash +#!/bin/bash +# FairDB Comprehensive Health Check + +echo "================================================" +echo " FairDB System Health Check" +echo " $(date '+%Y-%m-%d %H:%M:%S')" +echo "================================================" +``` + +## Step 1: Server Resources Check + +```bash +echo -e "\n[1/10] SERVER RESOURCES" +echo "------------------------" + +# CPU Usage +CPU_USAGE=$(top -bn1 | grep "Cpu(s)" | awk '{print $2}' | cut -d'%' -f1) +echo "CPU Usage: ${CPU_USAGE}%" +if (( $(echo "$CPU_USAGE > 80" | bc -l) )); then + echo "⚠️ WARNING: High CPU usage detected" +fi + +# Memory Usage +MEM_INFO=$(free -m | awk 'NR==2{printf "Memory: %s/%sMB (%.2f%%)\n", $3,$2,$3*100/$2 }') +echo "$MEM_INFO" +MEM_PERCENT=$(free | grep Mem | awk '{print $3/$2 * 100.0}') +if (( $(echo "$MEM_PERCENT > 90" | bc -l) )); then + echo "⚠️ WARNING: High memory usage detected" +fi + +# Disk Usage +echo "Disk Usage:" +df -h | grep -E '^/dev/' | while read line; do + USAGE=$(echo $line | awk '{print $5}' | sed 's/%//') + MOUNT=$(echo $line | awk '{print $6}') + echo " $MOUNT: $line" + if [ $USAGE -gt 85 ]; then + echo " ⚠️ WARNING: Disk space critical on $MOUNT" + fi +done + +# Load Average +LOAD=$(uptime | awk -F'load average:' '{print $2}') +echo "Load Average:$LOAD" +CORES=$(nproc) +LOAD_1=$(echo $LOAD | cut -d, -f1 | tr -d ' ') +if (( $(echo "$LOAD_1 > $CORES" | bc -l) )); then + echo "⚠️ WARNING: High load average detected" +fi +``` + +## Step 2: PostgreSQL Service Status + +```bash +echo -e "\n[2/10] POSTGRESQL SERVICE" +echo "-------------------------" + +# Check if PostgreSQL is running +if systemctl is-active --quiet postgresql; then + echo "✅ PostgreSQL service: RUNNING" + + # Get version and uptime + sudo -u postgres psql -t -c "SELECT version();" | head -1 + + UPTIME=$(sudo -u postgres psql -t -c " + SELECT now() - pg_postmaster_start_time() as uptime;") + echo "Uptime: $UPTIME" +else + echo "❌ CRITICAL: PostgreSQL service is NOT running!" + echo "Attempting to start..." + sudo systemctl start postgresql + sleep 5 + if systemctl is-active --quiet postgresql; then + echo "✅ Service restarted successfully" + else + echo "❌ Failed to start PostgreSQL - manual intervention required!" + exit 1 + fi +fi + +# Check PostgreSQL cluster status +sudo pg_lsclusters +``` + +## Step 3: Database Connections + +```bash +echo -e "\n[3/10] DATABASE CONNECTIONS" +echo "---------------------------" + +# Connection statistics +sudo -u postgres psql -t << EOF +SELECT + 'Total Connections: ' || count(*) || '/' || setting AS connection_info +FROM pg_stat_activity, pg_settings +WHERE pg_settings.name = 'max_connections' +GROUP BY setting; +EOF + +# Connections by database +echo -e "\nConnections by database:" +sudo -u postgres psql -t -c " + SELECT datname, count(*) as connections + FROM pg_stat_activity + GROUP BY datname + ORDER BY connections DESC;" + +# Connections by user +echo -e "\nConnections by user:" +sudo -u postgres psql -t -c " + SELECT usename, count(*) as connections + FROM pg_stat_activity + GROUP BY usename + ORDER BY connections DESC;" + +# Check for idle connections +IDLE_COUNT=$(sudo -u postgres psql -t -c " + SELECT count(*) + FROM pg_stat_activity + WHERE state = 'idle' + AND state_change < NOW() - INTERVAL '10 minutes';") + +if [ $IDLE_COUNT -gt 10 ]; then + echo "⚠️ WARNING: $IDLE_COUNT idle connections older than 10 minutes" +fi +``` + +## Step 4: Database Performance Metrics + +```bash +echo -e "\n[4/10] PERFORMANCE METRICS" +echo "--------------------------" + +# Cache hit ratio +sudo -u postgres psql -t << 'EOF' +SELECT + 'Cache Hit Ratio: ' || + ROUND(100.0 * sum(heap_blks_hit) / + NULLIF(sum(heap_blks_hit) + sum(heap_blks_read), 0), 2) || '%' +FROM pg_statio_user_tables; +EOF + +# Transaction statistics +sudo -u postgres psql -t -c " + SELECT + 'Transactions: ' || xact_commit || ' commits, ' || + xact_rollback || ' rollbacks, ' || + ROUND(100.0 * xact_rollback / NULLIF(xact_commit + xact_rollback, 0), 2) || '% rollback rate' + FROM pg_stat_database + WHERE datname = 'postgres';" + +# Longest running queries +echo -e "\nLong-running queries (>1 minute):" +sudo -u postgres psql -t -c " + SELECT pid, now() - query_start as duration, + LEFT(query, 50) as query_preview + FROM pg_stat_activity + WHERE state = 'active' + AND now() - query_start > interval '1 minute' + ORDER BY duration DESC + LIMIT 5;" + +# Table bloat check +echo -e "\nTable bloat (top 5):" +sudo -u postgres psql -t << 'EOF' +SELECT + schemaname || '.' || tablename AS table, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size, + ROUND(100 * pg_total_relation_size(schemaname||'.'||tablename) / + NULLIF(sum(pg_total_relation_size(schemaname||'.'||tablename)) + OVER (), 0), 2) AS percentage +FROM pg_tables +WHERE schemaname NOT IN ('pg_catalog', 'information_schema') +ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC +LIMIT 5; +EOF +``` + +## Step 5: Backup Status + +```bash +echo -e "\n[5/10] BACKUP STATUS" +echo "--------------------" + +# Check pgBackRest status +if command -v pgbackrest &> /dev/null; then + echo "pgBackRest Status:" + + # Get all stanzas + STANZAS=$(sudo -u postgres pgbackrest info --output=json 2>/dev/null | jq -r '.[].name' 2>/dev/null) + + if [ -z "$STANZAS" ]; then + echo "⚠️ WARNING: No backup stanzas configured" + else + for STANZA in $STANZAS; do + echo -e "\nStanza: $STANZA" + + # Get last backup info + LAST_BACKUP=$(sudo -u postgres pgbackrest --stanza=$STANZA info --output=json 2>/dev/null | \ + jq -r '.[] | select(.name=="'$STANZA'") | .backup[-1].timestamp.stop' 2>/dev/null) + + if [ ! -z "$LAST_BACKUP" ]; then + echo " Last backup: $LAST_BACKUP" + + # Calculate age in hours + BACKUP_AGE=$(( ($(date +%s) - $(date -d "$LAST_BACKUP" +%s)) / 3600 )) + + if [ $BACKUP_AGE -gt 25 ]; then + echo " ⚠️ WARNING: Last backup is $BACKUP_AGE hours old" + else + echo " ✅ Backup is current ($BACKUP_AGE hours old)" + fi + else + echo " ❌ ERROR: No backups found for this stanza" + fi + done + fi +else + echo "❌ ERROR: pgBackRest is not installed" +fi + +# Check WAL archiving +WAL_STATUS=$(sudo -u postgres psql -t -c "SHOW archive_mode;") +echo -e "\nWAL Archiving: $WAL_STATUS" + +if [ "$WAL_STATUS" = " on" ]; then + LAST_ARCHIVED=$(sudo -u postgres psql -t -c " + SELECT age(now(), last_archived_time) + FROM pg_stat_archiver;") + echo "Last WAL archived: $LAST_ARCHIVED ago" +fi +``` + +## Step 6: Replication Status + +```bash +echo -e "\n[6/10] REPLICATION STATUS" +echo "-------------------------" + +# Check if this is a primary or replica +IS_PRIMARY=$(sudo -u postgres psql -t -c "SELECT pg_is_in_recovery();") + +if [ "$IS_PRIMARY" = " f" ]; then + echo "Role: PRIMARY" + + # Check replication slots + REP_SLOTS=$(sudo -u postgres psql -t -c " + SELECT count(*) FROM pg_replication_slots WHERE active = true;") + echo "Active replication slots: $REP_SLOTS" + + # Check connected replicas + sudo -u postgres psql -t -c " + SELECT client_addr, state, sync_state, + pg_size_pretty(pg_wal_lsn_diff(sent_lsn, replay_lsn)) as lag + FROM pg_stat_replication;" 2>/dev/null +else + echo "Role: REPLICA" + + # Check replication lag + LAG=$(sudo -u postgres psql -t -c " + SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) AS lag;") + echo "Replication lag: ${LAG} seconds" + + if (( $(echo "$LAG > 60" | bc -l) )); then + echo "⚠️ WARNING: High replication lag detected" + fi +fi +``` + +## Step 7: Security Audit + +```bash +echo -e "\n[7/10] SECURITY AUDIT" +echo "---------------------" + +# Check for default passwords +echo "Checking for common issues..." + +# SSL status +SSL_STATUS=$(sudo -u postgres psql -t -c "SHOW ssl;") +echo "SSL: $SSL_STATUS" +if [ "$SSL_STATUS" != " on" ]; then + echo "⚠️ WARNING: SSL is not enabled" +fi + +# Check for users without passwords +NO_PASS=$(sudo -u postgres psql -t -c " + SELECT count(*) FROM pg_shadow WHERE passwd IS NULL;") +if [ $NO_PASS -gt 0 ]; then + echo "⚠️ WARNING: $NO_PASS users without passwords" +fi + +# Check firewall status +if sudo ufw status | grep -q "Status: active"; then + echo "✅ Firewall: ACTIVE" +else + echo "⚠️ WARNING: Firewall is not active" +fi + +# Check fail2ban status +if systemctl is-active --quiet fail2ban; then + echo "✅ Fail2ban: RUNNING" + JAIL_STATUS=$(sudo fail2ban-client status postgresql 2>/dev/null | grep "Currently banned" || echo "Jail not configured") + echo " PostgreSQL jail: $JAIL_STATUS" +else + echo "⚠️ WARNING: Fail2ban is not running" +fi +``` + +## Step 8: Customer Database Health + +```bash +echo -e "\n[8/10] CUSTOMER DATABASES" +echo "-------------------------" + +# Check each customer database +CUSTOMER_DBS=$(sudo -u postgres psql -t -c " + SELECT datname FROM pg_database + WHERE datname NOT IN ('postgres', 'template0', 'template1') + ORDER BY datname;") + +for DB in $CUSTOMER_DBS; do + echo -e "\nDatabase: $DB" + + # Size + SIZE=$(sudo -u postgres psql -t -c " + SELECT pg_size_pretty(pg_database_size('$DB'));") + echo " Size: $SIZE" + + # Connection count + CONN=$(sudo -u postgres psql -t -c " + SELECT count(*) FROM pg_stat_activity WHERE datname = '$DB';") + echo " Connections: $CONN" + + # Transaction rate + TPS=$(sudo -u postgres psql -t -c " + SELECT xact_commit + xact_rollback as transactions + FROM pg_stat_database WHERE datname = '$DB';") + echo " Total transactions: $TPS" + + # Check for locks + LOCKS=$(sudo -u postgres psql -t -d $DB -c " + SELECT count(*) FROM pg_locks WHERE granted = false;") + if [ $LOCKS -gt 0 ]; then + echo " ⚠️ WARNING: $LOCKS blocked locks detected" + fi +done +``` + +## Step 9: System Logs Analysis + +```bash +echo -e "\n[9/10] LOG ANALYSIS" +echo "-------------------" + +# Check PostgreSQL logs for errors +LOG_DIR="/var/log/postgresql" +if [ -d "$LOG_DIR" ]; then + echo "Recent PostgreSQL errors (last 24 hours):" + find $LOG_DIR -name "*.log" -mtime -1 -exec grep -i "error\|fatal\|panic" {} \; | \ + tail -10 | head -5 + + ERROR_COUNT=$(find $LOG_DIR -name "*.log" -mtime -1 -exec grep -i "error\|fatal\|panic" {} \; | wc -l) + echo "Total errors in last 24 hours: $ERROR_COUNT" + + if [ $ERROR_COUNT -gt 100 ]; then + echo "⚠️ WARNING: High error rate detected" + fi +fi + +# Check system logs +echo -e "\nRecent system issues:" +sudo journalctl -p err -since "24 hours ago" --no-pager | tail -5 +``` + +## Step 10: Recommendations + +```bash +echo -e "\n[10/10] HEALTH SUMMARY & RECOMMENDATIONS" +echo "=========================================" + +# Collect all warnings +WARNINGS=0 +CRITICAL=0 + +# Generate recommendations based on findings +echo -e "\nRecommendations:" + +# Check if vacuum is needed +LAST_VACUUM=$(sudo -u postgres psql -t -c " + SELECT MAX(last_autovacuum) FROM pg_stat_user_tables;") +echo "- Last autovacuum: $LAST_VACUUM" + +# Check if analyze is needed +LAST_ANALYZE=$(sudo -u postgres psql -t -c " + SELECT MAX(last_autoanalyze) FROM pg_stat_user_tables;") +echo "- Last autoanalyze: $LAST_ANALYZE" + +# Generate overall health score +echo -e "\n━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" +if [ $CRITICAL -eq 0 ] && [ $WARNINGS -lt 3 ]; then + echo "✅ OVERALL HEALTH: GOOD" +elif [ $CRITICAL -eq 0 ] && [ $WARNINGS -lt 10 ]; then + echo "⚠️ OVERALL HEALTH: FAIR - Review warnings" +else + echo "❌ OVERALL HEALTH: POOR - Immediate action required" +fi +echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" + +# Save report +REPORT_FILE="/opt/fairdb/logs/health-check-$(date +%Y%m%d-%H%M%S).log" +echo -e "\nFull report saved to: $REPORT_FILE" +``` + +## Actions Based on Results + +### If Critical Issues Found: +1. Check PostgreSQL service status +2. Review disk space availability +3. Verify backup integrity +4. Check for data corruption +5. Review security vulnerabilities + +### If Warnings Found: +1. Schedule maintenance window +2. Plan capacity upgrades +3. Review query performance +4. Update monitoring thresholds +5. Document issues for trending + +### Regular Maintenance Tasks: +1. Run VACUUM ANALYZE on large tables +2. Update table statistics +3. Review and optimize slow queries +4. Clean up old logs +5. Test backup restoration + +## Schedule Next Health Check + +```bash +# Schedule regular health checks +echo "30 */6 * * * root /usr/local/bin/fairdb-health-check > /dev/null 2>&1" | \ + sudo tee /etc/cron.d/fairdb-health-check + +echo "Health checks scheduled every 6 hours" +``` \ No newline at end of file diff --git a/commands/fairdb-onboard-customer.md b/commands/fairdb-onboard-customer.md new file mode 100644 index 0000000..c8ba8bd --- /dev/null +++ b/commands/fairdb-onboard-customer.md @@ -0,0 +1,446 @@ +--- +name: fairdb-onboard-customer +description: Complete customer onboarding workflow for FairDB PostgreSQL service +model: sonnet +--- + +# FairDB Customer Onboarding Workflow + +You are onboarding a new customer for FairDB PostgreSQL as a Service. This comprehensive workflow creates their database, users, configures access, sets up backups, and provides connection details. + +## Step 1: Gather Customer Information + +Collect these details: +1. **Customer Name**: Company/organization name +2. **Database Name**: Preferred database name (lowercase, no spaces) +3. **Primary Contact**: Name and email +4. **Plan Type**: Starter/Professional/Enterprise +5. **IP Allowlist**: Customer IP addresses for access +6. **Special Requirements**: Extensions, configurations, etc. + +## Step 2: Validate Resources + +```bash +# Check available resources +df -h /var/lib/postgresql +free -h +sudo -u postgres psql -c "SELECT count(*) as database_count FROM pg_database WHERE datistemplate = false;" + +# Check current connections +sudo -u postgres psql -c "SELECT count(*) FROM pg_stat_activity;" +``` + +## Step 3: Create Customer Database + +```bash +# Set customer variables +CUSTOMER_NAME="customer_name" # Replace with actual +DB_NAME="${CUSTOMER_NAME}_db" +DB_OWNER="${CUSTOMER_NAME}_owner" +DB_USER="${CUSTOMER_NAME}_user" +DB_READONLY="${CUSTOMER_NAME}_readonly" + +# Generate secure passwords +DB_OWNER_PASS=$(openssl rand -base64 32) +DB_USER_PASS=$(openssl rand -base64 32) +DB_READONLY_PASS=$(openssl rand -base64 32) + +# Create database and users +sudo -u postgres psql << EOF +-- Create database owner role +CREATE ROLE ${DB_OWNER} WITH LOGIN PASSWORD '${DB_OWNER_PASS}' + CREATEDB CREATEROLE CONNECTION LIMIT 5; + +-- Create application user +CREATE ROLE ${DB_USER} WITH LOGIN PASSWORD '${DB_USER_PASS}' + CONNECTION LIMIT 50; + +-- Create read-only user +CREATE ROLE ${DB_READONLY} WITH LOGIN PASSWORD '${DB_READONLY_PASS}' + CONNECTION LIMIT 10; + +-- Create customer database +CREATE DATABASE ${DB_NAME} + WITH OWNER = ${DB_OWNER} + ENCODING = 'UTF8' + LC_COLLATE = 'en_US.UTF-8' + LC_CTYPE = 'en_US.UTF-8' + TEMPLATE = template0 + CONNECTION LIMIT = 100; + +-- Configure database +\c ${DB_NAME} + +-- Create schema +CREATE SCHEMA IF NOT EXISTS ${CUSTOMER_NAME} AUTHORIZATION ${DB_OWNER}; + +-- Grant permissions +GRANT CONNECT ON DATABASE ${DB_NAME} TO ${DB_USER}, ${DB_READONLY}; +GRANT USAGE ON SCHEMA ${CUSTOMER_NAME} TO ${DB_USER}, ${DB_READONLY}; +GRANT CREATE ON SCHEMA ${CUSTOMER_NAME} TO ${DB_USER}; + +-- Default privileges for tables +ALTER DEFAULT PRIVILEGES FOR ROLE ${DB_OWNER} IN SCHEMA ${CUSTOMER_NAME} + GRANT ALL ON TABLES TO ${DB_USER}; + +ALTER DEFAULT PRIVILEGES FOR ROLE ${DB_OWNER} IN SCHEMA ${CUSTOMER_NAME} + GRANT SELECT ON TABLES TO ${DB_READONLY}; + +-- Default privileges for sequences +ALTER DEFAULT PRIVILEGES FOR ROLE ${DB_OWNER} IN SCHEMA ${CUSTOMER_NAME} + GRANT ALL ON SEQUENCES TO ${DB_USER}; + +ALTER DEFAULT PRIVILEGES FOR ROLE ${DB_OWNER} IN SCHEMA ${CUSTOMER_NAME} + GRANT SELECT ON SEQUENCES TO ${DB_READONLY}; + +-- Enable useful extensions +CREATE EXTENSION IF NOT EXISTS pg_stat_statements; +CREATE EXTENSION IF NOT EXISTS pgcrypto; +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +CREATE EXTENSION IF NOT EXISTS citext; +EOF + +echo "Database ${DB_NAME} created successfully" +``` + +## Step 4: Configure Network Access + +```bash +# Add customer IP to pg_hba.conf +CUSTOMER_IP="203.0.113.0/32" # Replace with actual customer IP + +# Backup pg_hba.conf +sudo cp /etc/postgresql/16/main/pg_hba.conf /etc/postgresql/16/main/pg_hba.conf.$(date +%Y%m%d) + +# Add customer access rules +cat << EOF | sudo tee -a /etc/postgresql/16/main/pg_hba.conf + +# Customer: ${CUSTOMER_NAME} +hostssl ${DB_NAME} ${DB_OWNER} ${CUSTOMER_IP} scram-sha-256 +hostssl ${DB_NAME} ${DB_USER} ${CUSTOMER_IP} scram-sha-256 +hostssl ${DB_NAME} ${DB_READONLY} ${CUSTOMER_IP} scram-sha-256 +EOF + +# Update firewall +sudo ufw allow from ${CUSTOMER_IP} to any port 5432 comment "FairDB: ${CUSTOMER_NAME}" + +# Reload PostgreSQL configuration +sudo systemctl reload postgresql +``` + +## Step 5: Set Resource Limits + +```bash +# Configure per-database resource limits based on plan +case "${PLAN_TYPE}" in + "starter") + MAX_CONN=50 + WORK_MEM="4MB" + SHARED_BUFFERS="256MB" + ;; + "professional") + MAX_CONN=100 + WORK_MEM="8MB" + SHARED_BUFFERS="1GB" + ;; + "enterprise") + MAX_CONN=200 + WORK_MEM="16MB" + SHARED_BUFFERS="4GB" + ;; +esac + +# Apply database-specific settings +sudo -u postgres psql -d ${DB_NAME} << EOF +-- Set connection limit +ALTER DATABASE ${DB_NAME} CONNECTION LIMIT ${MAX_CONN}; + +-- Set database parameters +ALTER DATABASE ${DB_NAME} SET work_mem = '${WORK_MEM}'; +ALTER DATABASE ${DB_NAME} SET maintenance_work_mem = '${WORK_MEM}'; +ALTER DATABASE ${DB_NAME} SET effective_cache_size = '${SHARED_BUFFERS}'; +ALTER DATABASE ${DB_NAME} SET random_page_cost = 1.1; +ALTER DATABASE ${DB_NAME} SET log_statement = 'all'; +ALTER DATABASE ${DB_NAME} SET log_duration = on; +EOF +``` + +## Step 6: Configure Backup Policy + +```bash +# Create customer-specific backup configuration +cat << EOF | sudo tee -a /opt/fairdb/configs/backup-${CUSTOMER_NAME}.conf +# Backup configuration for ${CUSTOMER_NAME} +DATABASE=${DB_NAME} +BACKUP_RETENTION_DAYS=30 +BACKUP_SCHEDULE="0 3 * * *" # Daily at 3 AM +BACKUP_TYPE="full" +S3_PREFIX="${CUSTOMER_NAME}/" +EOF + +# Add to pgBackRest configuration +sudo tee -a /etc/pgbackrest/pgbackrest.conf << EOF + +[${CUSTOMER_NAME}] +pg1-path=/var/lib/postgresql/16/main +pg1-database=${DB_NAME} +pg1-port=5432 +backup-user=backup_user +process-max=2 +repo1-retention-full=4 +repo1-retention-diff=7 +EOF + +# Create backup stanza for customer +sudo -u postgres pgbackrest --stanza=${CUSTOMER_NAME} stanza-create + +# Schedule customer backup +echo "0 3 * * * postgres pgbackrest --stanza=${CUSTOMER_NAME} --type=full backup" | \ + sudo tee -a /etc/cron.d/fairdb-customer-${CUSTOMER_NAME} +``` + +## Step 7: Setup Monitoring + +```bash +# Create monitoring user and grants +sudo -u postgres psql -d ${DB_NAME} << EOF +-- Grant monitoring permissions +GRANT pg_monitor TO ${DB_READONLY}; +GRANT EXECUTE ON FUNCTION pg_stat_statements_reset() TO ${DB_OWNER}; +EOF + +# Create customer monitoring script +cat << 'EOF' | sudo tee /opt/fairdb/scripts/monitor-${CUSTOMER_NAME}.sh +#!/bin/bash +# Monitoring script for ${CUSTOMER_NAME} + +DB_NAME="${DB_NAME}" +ALERT_THRESHOLD_CONNECTIONS=80 +ALERT_THRESHOLD_SIZE_GB=100 + +# Check connection usage +CONN_USAGE=$(sudo -u postgres psql -t -c " + SELECT (count(*) * 100.0 / setting::int)::int as pct + FROM pg_stat_activity, pg_settings + WHERE name = 'max_connections' + AND datname = '${DB_NAME}' + GROUP BY setting;") + +if [ ${CONN_USAGE:-0} -gt $ALERT_THRESHOLD_CONNECTIONS ]; then + echo "ALERT: Connection usage at ${CONN_USAGE}% for ${CUSTOMER_NAME}" +fi + +# Check database size +DB_SIZE_GB=$(sudo -u postgres psql -t -c " + SELECT pg_database_size('${DB_NAME}') / 1024 / 1024 / 1024;") + +if [ ${DB_SIZE_GB:-0} -gt $ALERT_THRESHOLD_SIZE_GB ]; then + echo "ALERT: Database size is ${DB_SIZE_GB}GB for ${CUSTOMER_NAME}" +fi + +# Check for long-running queries +sudo -u postgres psql -d ${DB_NAME} -c " + SELECT pid, now() - pg_stat_activity.query_start AS duration, query + FROM pg_stat_activity + WHERE (now() - pg_stat_activity.query_start) > interval '5 minutes' + AND state = 'active';" +EOF + +sudo chmod +x /opt/fairdb/scripts/monitor-${CUSTOMER_NAME}.sh + +# Add to monitoring cron +echo "*/10 * * * * root /opt/fairdb/scripts/monitor-${CUSTOMER_NAME}.sh" | \ + sudo tee -a /etc/cron.d/fairdb-monitor-${CUSTOMER_NAME} +``` + +## Step 8: Generate SSL Certificates + +```bash +# Create customer SSL certificate +sudo mkdir -p /etc/postgresql/16/main/ssl/${CUSTOMER_NAME} +cd /etc/postgresql/16/main/ssl/${CUSTOMER_NAME} + +# Generate customer-specific SSL cert +sudo openssl req -new -x509 -days 365 -nodes \ + -out server.crt -keyout server.key \ + -subj "/C=US/ST=State/L=City/O=FairDB/OU=${CUSTOMER_NAME}/CN=${CUSTOMER_NAME}.fairdb.io" + +# Set permissions +sudo chmod 600 server.key +sudo chown postgres:postgres server.* + +# Create client certificate +sudo openssl req -new -nodes \ + -out client.csr -keyout client.key \ + -subj "/C=US/ST=State/L=City/O=FairDB/OU=${CUSTOMER_NAME}/CN=${DB_USER}" + +sudo openssl x509 -req -CAcreateserial \ + -in client.csr -CA server.crt -CAkey server.key \ + -out client.crt -days 365 + +# Package client certificates +tar czf /tmp/${CUSTOMER_NAME}-ssl-bundle.tar.gz client.crt client.key server.crt +``` + +## Step 9: Create Connection Documentation + +```bash +# Generate connection details document +cat << EOF > /tmp/${CUSTOMER_NAME}-connection-details.md +# FairDB PostgreSQL Connection Details +## Customer: ${CUSTOMER_NAME} + +### Database Information +- **Database Name**: ${DB_NAME} +- **Host**: fairdb-prod.example.com +- **Port**: 5432 +- **SSL Required**: Yes + +### User Credentials +#### Database Owner (DDL Operations) +- **Username**: ${DB_OWNER} +- **Password**: ${DB_OWNER_PASS} +- **Connection Limit**: 5 +- **Permissions**: Full database owner + +#### Application User (DML Operations) +- **Username**: ${DB_USER} +- **Password**: ${DB_USER_PASS} +- **Connection Limit**: 50 +- **Permissions**: CRUD operations on all tables + +#### Read-Only User (Reporting) +- **Username**: ${DB_READONLY} +- **Password**: ${DB_READONLY_PASS} +- **Connection Limit**: 10 +- **Permissions**: SELECT only + +### Connection Strings +\`\`\` +# Standard connection +postgresql://${DB_USER}:${DB_USER_PASS}@fairdb-prod.example.com:5432/${DB_NAME}?sslmode=require + +# With SSL certificate +postgresql://${DB_USER}:${DB_USER_PASS}@fairdb-prod.example.com:5432/${DB_NAME}?sslmode=require&sslcert=client.crt&sslkey=client.key&sslrootcert=server.crt + +# JDBC URL +jdbc:postgresql://fairdb-prod.example.com:5432/${DB_NAME}?ssl=true&sslmode=require + +# psql command +psql "host=fairdb-prod.example.com port=5432 dbname=${DB_NAME} user=${DB_USER} sslmode=require" +\`\`\` + +### Resource Limits +- **Plan**: ${PLAN_TYPE} +- **Max Connections**: ${MAX_CONN} +- **Storage Quota**: Unlimited (pay per GB) +- **Backup Retention**: 30 days +- **Backup Schedule**: Daily at 3:00 AM UTC + +### Support Information +- **Email**: support@fairdb.io +- **Emergency**: +1-xxx-xxx-xxxx +- **Documentation**: https://docs.fairdb.io +- **Status Page**: https://status.fairdb.io + +### Important Notes +1. Always use SSL connections +2. Rotate passwords every 90 days +3. Monitor connection pool usage +4. Test restore procedures quarterly +5. Keep IP allowlist updated + +### Next Steps +1. Download SSL certificates: ${CUSTOMER_NAME}-ssl-bundle.tar.gz +2. Test connection with provided credentials +3. Configure application connection pool +4. Set up monitoring dashboards +5. Review security best practices + +Generated: $(date) +EOF + +echo "Connection details saved to /tmp/${CUSTOMER_NAME}-connection-details.md" +``` + +## Step 10: Final Verification + +```bash +# Test all user connections +echo "Testing database connections..." + +# Test owner connection +PGPASSWORD=${DB_OWNER_PASS} psql -h localhost -U ${DB_OWNER} -d ${DB_NAME} -c "SELECT current_user, current_database();" + +# Test app user connection +PGPASSWORD=${DB_USER_PASS} psql -h localhost -U ${DB_USER} -d ${DB_NAME} -c "SELECT current_user, current_database();" + +# Test readonly connection +PGPASSWORD=${DB_READONLY_PASS} psql -h localhost -U ${DB_READONLY} -d ${DB_NAME} -c "SELECT current_user, current_database();" + +# Verify backup configuration +sudo -u postgres pgbackrest --stanza=${CUSTOMER_NAME} check + +# Check monitoring +/opt/fairdb/scripts/monitor-${CUSTOMER_NAME}.sh + +# Generate onboarding summary +echo " +=========================================== +FairDB Customer Onboarding Complete +=========================================== +Customer: ${CUSTOMER_NAME} +Database: ${DB_NAME} +Created: $(date) +Plan: ${PLAN_TYPE} + +Files Generated: +- /tmp/${CUSTOMER_NAME}-connection-details.md +- /tmp/${CUSTOMER_NAME}-ssl-bundle.tar.gz + +Next Actions: +1. Send connection details to customer +2. Schedule onboarding call +3. Monitor initial usage +4. Follow up in 24 hours +=========================================== +" +``` + +## Onboarding Checklist + +Verify completion: +- [ ] Database created +- [ ] Users created with secure passwords +- [ ] Network access configured +- [ ] Resource limits applied +- [ ] Backup policy configured +- [ ] Monitoring enabled +- [ ] SSL certificates generated +- [ ] Documentation created +- [ ] Connection tests passed +- [ ] Customer notified + +## Rollback Procedure + +If onboarding fails: +```bash +# Remove database and users +sudo -u postgres psql << EOF +DROP DATABASE IF EXISTS ${DB_NAME}; +DROP ROLE IF EXISTS ${DB_OWNER}; +DROP ROLE IF EXISTS ${DB_USER}; +DROP ROLE IF EXISTS ${DB_READONLY}; +EOF + +# Remove configurations +sudo rm -f /etc/cron.d/fairdb-customer-${CUSTOMER_NAME} +sudo rm -f /etc/cron.d/fairdb-monitor-${CUSTOMER_NAME} +sudo rm -f /opt/fairdb/scripts/monitor-${CUSTOMER_NAME}.sh +sudo rm -rf /etc/postgresql/16/main/ssl/${CUSTOMER_NAME} + +# Remove firewall rule +sudo ufw delete allow from ${CUSTOMER_IP} to any port 5432 + +echo "Customer ${CUSTOMER_NAME} rollback complete" +``` \ No newline at end of file diff --git a/commands/fairdb-setup-backup.md b/commands/fairdb-setup-backup.md new file mode 100644 index 0000000..733b357 --- /dev/null +++ b/commands/fairdb-setup-backup.md @@ -0,0 +1,420 @@ +--- +name: fairdb-setup-backup +description: Configure pgBackRest with Wasabi S3 for automated PostgreSQL backups +model: sonnet +--- + +# FairDB pgBackRest Backup Configuration with Wasabi S3 + +You are configuring pgBackRest with Wasabi S3 storage for automated PostgreSQL backups. Follow SOP-003 precisely. + +## Prerequisites Check + +Verify before starting: +1. PostgreSQL 16 is installed and running +2. Wasabi S3 account is active with bucket created +3. AWS CLI credentials are available +4. At least 50GB free disk space for local backups + +## Step 1: Install pgBackRest + +```bash +# Add pgBackRest repository +sudo apt-get install -y software-properties-common +sudo add-apt-repository -y ppa:pgbackrest/backrest +sudo apt-get update + +# Install pgBackRest +sudo apt-get install -y pgbackrest + +# Verify installation +pgbackrest version +``` + +## Step 2: Configure Wasabi S3 Credentials + +```bash +# Create pgBackRest configuration directory +sudo mkdir -p /etc/pgbackrest +sudo mkdir -p /var/lib/pgbackrest +sudo mkdir -p /var/log/pgbackrest +sudo mkdir -p /var/spool/pgbackrest + +# Set ownership +sudo chown -R postgres:postgres /var/lib/pgbackrest +sudo chown -R postgres:postgres /var/log/pgbackrest +sudo chown -R postgres:postgres /var/spool/pgbackrest + +# Store Wasabi credentials (secure these!) +export WASABI_ACCESS_KEY="YOUR_WASABI_ACCESS_KEY" +export WASABI_SECRET_KEY="YOUR_WASABI_SECRET_KEY" +export WASABI_BUCKET="fairdb-backups" +export WASABI_REGION="us-east-1" # Or your Wasabi region +export WASABI_ENDPOINT="s3.us-east-1.wasabisys.com" # Adjust for your region +``` + +## Step 3: Create pgBackRest Configuration + +```bash +# Create main configuration file +sudo tee /etc/pgbackrest/pgbackrest.conf << EOF +[global] +# General Options +process-max=4 +log-level-console=info +log-level-file=detail +start-fast=y +stop-auto=y +archive-async=y +archive-push-queue-max=4GB +spool-path=/var/spool/pgbackrest + +# S3 Repository Configuration +repo1-type=s3 +repo1-s3-endpoint=${WASABI_ENDPOINT} +repo1-s3-bucket=${WASABI_BUCKET} +repo1-s3-region=${WASABI_REGION} +repo1-s3-key=${WASABI_ACCESS_KEY} +repo1-s3-key-secret=${WASABI_SECRET_KEY} +repo1-path=/pgbackrest +repo1-retention-full=4 +repo1-retention-diff=12 +repo1-retention-archive=30 +repo1-cipher-type=aes-256-cbc +repo1-cipher-pass=CHANGE_THIS_PASSPHRASE + +# Local Repository (for faster restores) +repo2-type=posix +repo2-path=/var/lib/pgbackrest +repo2-retention-full=2 +repo2-retention-diff=6 + +[fairdb] +# PostgreSQL Configuration +pg1-path=/var/lib/postgresql/16/main +pg1-port=5432 +pg1-user=postgres + +# Archive Configuration +archive-timeout=60 +archive-check=y +backup-standby=n + +# Backup Options +compress-type=lz4 +compress-level=3 +backup-user=backup_user +delta=y +process-max=2 +EOF + +# Secure the configuration file +sudo chmod 640 /etc/pgbackrest/pgbackrest.conf +sudo chown postgres:postgres /etc/pgbackrest/pgbackrest.conf +``` + +## Step 4: Configure PostgreSQL for pgBackRest + +```bash +# Update PostgreSQL configuration +sudo tee -a /etc/postgresql/16/main/postgresql.conf << 'EOF' + +# pgBackRest Archive Configuration +archive_mode = on +archive_command = 'pgbackrest --stanza=fairdb archive-push %p' +archive_timeout = 60 +max_wal_senders = 3 +wal_level = replica +wal_log_hints = on +EOF + +# Restart PostgreSQL +sudo systemctl restart postgresql +``` + +## Step 5: Initialize Backup Stanza + +```bash +# Create the stanza +sudo -u postgres pgbackrest --stanza=fairdb stanza-create + +# Verify stanza +sudo -u postgres pgbackrest --stanza=fairdb check +``` + +## Step 6: Create Backup Scripts + +```bash +# Full backup script +sudo tee /opt/fairdb/scripts/backup-full.sh << 'EOF' +#!/bin/bash +set -e + +LOG_FILE="/var/log/fairdb/backup-full-$(date +%Y%m%d-%H%M%S).log" +echo "Starting full backup at $(date)" | tee -a $LOG_FILE + +# Perform full backup to both repositories +sudo -u postgres pgbackrest --stanza=fairdb --type=full --repo=1 backup 2>&1 | tee -a $LOG_FILE +sudo -u postgres pgbackrest --stanza=fairdb --type=full --repo=2 backup 2>&1 | tee -a $LOG_FILE + +# Verify backup +sudo -u postgres pgbackrest --stanza=fairdb --repo=1 info 2>&1 | tee -a $LOG_FILE + +echo "Full backup completed at $(date)" | tee -a $LOG_FILE + +# Send notification (implement webhook/email here) +curl -X POST $FAIRDB_MONITORING_WEBHOOK \ + -H 'Content-Type: application/json' \ + -d "{\"text\":\"FairDB full backup completed successfully\"}" 2>/dev/null || true +EOF + +# Incremental backup script +sudo tee /opt/fairdb/scripts/backup-incremental.sh << 'EOF' +#!/bin/bash +set -e + +LOG_FILE="/var/log/fairdb/backup-incr-$(date +%Y%m%d-%H%M%S).log" +echo "Starting incremental backup at $(date)" | tee -a $LOG_FILE + +# Perform incremental backup +sudo -u postgres pgbackrest --stanza=fairdb --type=incr --repo=1 backup 2>&1 | tee -a $LOG_FILE + +echo "Incremental backup completed at $(date)" | tee -a $LOG_FILE +EOF + +# Differential backup script +sudo tee /opt/fairdb/scripts/backup-differential.sh << 'EOF' +#!/bin/bash +set -e + +LOG_FILE="/var/log/fairdb/backup-diff-$(date +%Y%m%d-%H%M%S).log" +echo "Starting differential backup at $(date)" | tee -a $LOG_FILE + +# Perform differential backup +sudo -u postgres pgbackrest --stanza=fairdb --type=diff --repo=1 backup 2>&1 | tee -a $LOG_FILE + +echo "Differential backup completed at $(date)" | tee -a $LOG_FILE +EOF + +# Make scripts executable +sudo chmod +x /opt/fairdb/scripts/backup-*.sh +``` + +## Step 7: Schedule Automated Backups + +```bash +# Add to root's crontab for automated backups +cat << 'EOF' | sudo tee /etc/cron.d/fairdb-backups +# FairDB Automated Backup Schedule +SHELL=/bin/bash +PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin + +# Weekly full backup (Sunday 2 AM) +0 2 * * 0 root /opt/fairdb/scripts/backup-full.sh + +# Daily differential backup (Mon-Sat 2 AM) +0 2 * * 1-6 root /opt/fairdb/scripts/backup-differential.sh + +# Hourly incremental backup (business hours) +0 9-18 * * 1-5 root /opt/fairdb/scripts/backup-incremental.sh + +# Backup verification (daily at 5 AM) +0 5 * * * postgres pgbackrest --stanza=fairdb --repo=1 check + +# Archive expiration (daily at 3 AM) +0 3 * * * postgres pgbackrest --stanza=fairdb --repo=1 expire +EOF +``` + +## Step 8: Create Restore Procedures + +```bash +# Point-in-time recovery script +sudo tee /opt/fairdb/scripts/restore-pitr.sh << 'EOF' +#!/bin/bash +# FairDB Point-in-Time Recovery Script + +if [ $# -ne 1 ]; then + echo "Usage: $0 'YYYY-MM-DD HH:MM:SS'" + exit 1 +fi + +TARGET_TIME="$1" +BACKUP_PATH="/var/lib/postgresql/16/main" + +echo "WARNING: This will restore the database to $TARGET_TIME" +echo "Current data will be LOST. Continue? (yes/no)" +read CONFIRM + +if [ "$CONFIRM" != "yes" ]; then + echo "Restore cancelled" + exit 1 +fi + +# Stop PostgreSQL +sudo systemctl stop postgresql + +# Clear data directory +sudo rm -rf ${BACKUP_PATH}/* + +# Restore to target time +sudo -u postgres pgbackrest --stanza=fairdb \ + --type=time \ + --target="$TARGET_TIME" \ + --target-action=promote \ + restore + +# Start PostgreSQL +sudo systemctl start postgresql + +echo "Restore completed. Verify data integrity." +EOF + +sudo chmod +x /opt/fairdb/scripts/restore-pitr.sh +``` + +## Step 9: Test Backup and Restore + +```bash +# Perform test backup +sudo -u postgres pgbackrest --stanza=fairdb --type=full backup + +# Check backup info +sudo -u postgres pgbackrest --stanza=fairdb info + +# List backups +sudo -u postgres pgbackrest --stanza=fairdb info --output=json + +# Test restore to alternate location +sudo mkdir -p /tmp/pgbackrest-test +sudo chown postgres:postgres /tmp/pgbackrest-test +sudo -u postgres pgbackrest --stanza=fairdb \ + --pg1-path=/tmp/pgbackrest-test \ + --type=latest \ + restore +``` + +## Step 10: Monitor Backup Health + +```bash +# Create monitoring script +sudo tee /opt/fairdb/scripts/check-backup-health.sh << 'EOF' +#!/bin/bash +# FairDB Backup Health Check + +# Check last backup time +LAST_BACKUP=$(sudo -u postgres pgbackrest --stanza=fairdb info --output=json | \ + jq -r '.[] | .backup[-1].timestamp.stop') + +# Convert to seconds +LAST_BACKUP_EPOCH=$(date -d "$LAST_BACKUP" +%s) +CURRENT_EPOCH=$(date +%s) +HOURS_AGO=$(( ($CURRENT_EPOCH - $LAST_BACKUP_EPOCH) / 3600 )) + +# Alert if backup is older than 25 hours +if [ $HOURS_AGO -gt 25 ]; then + echo "ALERT: Last backup was $HOURS_AGO hours ago!" + # Send alert (implement notification here) + exit 1 +fi + +echo "Backup health OK - last backup $HOURS_AGO hours ago" + +# Check S3 connectivity +aws s3 ls s3://${WASABI_BUCKET}/pgbackrest/ \ + --endpoint-url=https://${WASABI_ENDPOINT} > /dev/null 2>&1 +if [ $? -ne 0 ]; then + echo "ALERT: Cannot connect to Wasabi S3!" + exit 1 +fi + +echo "S3 connectivity OK" +EOF + +sudo chmod +x /opt/fairdb/scripts/check-backup-health.sh + +# Add to monitoring cron +echo "*/30 * * * * root /opt/fairdb/scripts/check-backup-health.sh" | \ + sudo tee -a /etc/cron.d/fairdb-monitoring +``` + +## Step 11: Document Backup Configuration + +```bash +cat > /opt/fairdb/configs/backup-info.txt << EOF +FairDB Backup Configuration +=========================== +Backup Solution: pgBackRest +Primary Repository: Wasabi S3 (${WASABI_BUCKET}) +Secondary Repository: Local (/var/lib/pgbackrest) +Stanza Name: fairdb +Encryption: AES-256-CBC + +Retention Policy: +- Full Backups: 4 (S3), 2 (Local) +- Differential: 12 (S3), 6 (Local) +- WAL Archives: 30 days + +Schedule: +- Full: Weekly (Sunday 2 AM) +- Differential: Daily (Mon-Sat 2 AM) +- Incremental: Hourly (9 AM - 6 PM weekdays) + +Restore Procedures: +- Latest: pgbackrest --stanza=fairdb restore +- PITR: /opt/fairdb/scripts/restore-pitr.sh 'YYYY-MM-DD HH:MM:SS' + +Monitoring: +- Health checks: Every 30 minutes +- Verification: Daily at 5 AM +- Expiration: Daily at 3 AM +EOF +``` + +## Verification Checklist + +Confirm these items: +- [ ] pgBackRest installed and configured +- [ ] Wasabi S3 credentials configured +- [ ] Stanza created and verified +- [ ] PostgreSQL archive_command configured +- [ ] Backup scripts created and executable +- [ ] Automated schedule configured +- [ ] Test backup successful +- [ ] Test restore successful +- [ ] Monitoring scripts in place +- [ ] Documentation complete + +## Security Notes + +- Store Wasabi credentials securely (use AWS Secrets Manager in production) +- Encrypt backup repository with strong passphrase +- Regularly test restore procedures +- Monitor backup logs for failures +- Keep pgBackRest updated + +## Output Summary + +Provide the user with: +1. Backup stanza status: `pgbackrest --stanza=fairdb info` +2. Next full backup time from cron schedule +3. Location of backup scripts and logs +4. Restore procedure documentation +5. Monitoring webhook configuration needed + +## Important Commands + +```bash +# Manual backup commands +sudo -u postgres pgbackrest --stanza=fairdb --type=full backup # Full +sudo -u postgres pgbackrest --stanza=fairdb --type=diff backup # Differential +sudo -u postgres pgbackrest --stanza=fairdb --type=incr backup # Incremental + +# Check backup status +sudo -u postgres pgbackrest --stanza=fairdb info +sudo -u postgres pgbackrest --stanza=fairdb check + +# Restore commands +sudo -u postgres pgbackrest --stanza=fairdb restore # Latest +sudo -u postgres pgbackrest --stanza=fairdb --type=time --target="2024-01-01 12:00:00" restore # PITR +``` \ No newline at end of file diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..c8cbdf8 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,117 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:jeremylongshore/claude-code-plugins-plus:plugins/devops/fairdb-operations-kit", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "97c05e758c4c9baa24934e9410b054c0de961144", + "treeHash": "13b9db23758bd97c1dd11f8920fd56f7c72370e2211dc4da132747394e41b936", + "generatedAt": "2025-11-28T10:18:26.745426Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "fairdb-operations-kit", + "description": "Complete operations kit for FairDB PostgreSQL as a Service - VPS setup, PostgreSQL management, customer provisioning, monitoring, and backup automation", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "c798fbb21f1b56ddff99da72a2b3a95c31a4435ee78539273879a8be8b12ea36" + }, + { + "path": "agents/fairdb-automation-agent.md", + "sha256": "c8ca4d9d064bf658260622158cdc025dc6979df5aea9f7064b6ba650aaad19ad" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "99499fbe2d5c6fb1ea34ddcd1f1df53c6e28ae61394a238076c454aa509d75c3" + }, + { + "path": "commands/fairdb-emergency-response.md", + "sha256": "eff1dd5567f185d08bde94724eddf0e9ec85c2d7263afebbbfd061b5c6f6bd8e" + }, + { + "path": "commands/fairdb-setup-backup.md", + "sha256": "d67b979ea1fb0a9e45a7a4dc68396211d38502beb7b94f72a789a04465df44d8" + }, + { + "path": "commands/fairdb-onboard-customer.md", + "sha256": "ffe547caf589930fa03cfab7f3334919d5d85e5c4b66260d92d44d081e4d5eeb" + }, + { + "path": "commands/fairdb-health-check.md", + "sha256": "40a682c90cb61d12e35f57c29a2cece9e5999ce3b4e3d55513cd17ea4a2b2a30" + }, + { + "path": "skills/fairdb-backup-manager/SKILL.md", + "sha256": "be66924d8bc85d5151fc7af6291e8683bb6958dbac8a27cea7465399b6471aa6" + }, + { + "path": "skills/fairdb-backup-manager/references/README.md", + "sha256": "db9680278e03728fef93321fc76c435387bc0c8fe1dcc9870bdf2fa236ea8ac3" + }, + { + "path": "skills/fairdb-backup-manager/scripts/README.md", + "sha256": "f042646ad5b685556c044080a6b73202a490fb8288be8219328faefc12d5a30e" + }, + { + "path": "skills/fairdb-backup-manager/assets/README.md", + "sha256": "33bfb083485b48c78a1738368c52cd9f202724a414bce507db181d8291b83aec" + }, + { + "path": "skills/skill-adapter/references/examples.md", + "sha256": "922bbc3c4ebf38b76f515b5c1998ebde6bf902233e00e2c5a0e9176f975a7572" + }, + { + "path": "skills/skill-adapter/references/best-practices.md", + "sha256": "c8f32b3566252f50daacd346d7045a1060c718ef5cfb07c55a0f2dec5f1fb39e" + }, + { + "path": "skills/skill-adapter/references/README.md", + "sha256": "c2e9f1c23ddc3b7c1eefb4d468bf979231f498334a45d4af27167b3e4211799b" + }, + { + "path": "skills/skill-adapter/scripts/helper-template.sh", + "sha256": "0881d5660a8a7045550d09ae0acc15642c24b70de6f08808120f47f86ccdf077" + }, + { + "path": "skills/skill-adapter/scripts/validation.sh", + "sha256": "92551a29a7f512d2036e4f1fb46c2a3dc6bff0f7dde4a9f699533e446db48502" + }, + { + "path": "skills/skill-adapter/scripts/README.md", + "sha256": "4650b0c92686b0b9b3f7c042b6d220ef7d2820d7fb99c45247f0cd3d6e18afc6" + }, + { + "path": "skills/skill-adapter/assets/test-data.json", + "sha256": "ac17dca3d6e253a5f39f2a2f1b388e5146043756b05d9ce7ac53a0042eee139d" + }, + { + "path": "skills/skill-adapter/assets/README.md", + "sha256": "3706526734e41a1a40f975cda2ccf4a2db12ccdfcadbb403333a4304a999fbad" + }, + { + "path": "skills/skill-adapter/assets/skill-schema.json", + "sha256": "f5639ba823a24c9ac4fb21444c0717b7aefde1a4993682897f5bf544f863c2cd" + }, + { + "path": "skills/skill-adapter/assets/config-template.json", + "sha256": "0c2ba33d2d3c5ccb266c0848fc43caa68a2aa6a80ff315d4b378352711f83e1c" + } + ], + "dirSha256": "13b9db23758bd97c1dd11f8920fd56f7c72370e2211dc4da132747394e41b936" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/fairdb-backup-manager/SKILL.md b/skills/fairdb-backup-manager/SKILL.md new file mode 100644 index 0000000..3115a72 --- /dev/null +++ b/skills/fairdb-backup-manager/SKILL.md @@ -0,0 +1,191 @@ +--- +name: fairdb-backup-manager +description: | + Automatically manages PostgreSQL backups with pgBackRest and Wasabi S3 storage when working with FairDB databases Activates when you request "fairdb backup manager" functionality. +allowed-tools: Read, Write, Edit, Grep, Glob, Bash +version: 1.0.0 +--- + +# FairDB Backup Manager + +## Purpose +I automatically handle all backup-related operations for FairDB PostgreSQL databases, including scheduling, verification, restoration, and monitoring of pgBackRest backups with Wasabi S3 storage. + +## Activation Triggers +I activate when you: +- Mention "backup", "restore", "pgbackrest", or "recovery" in context of FairDB +- Work with PostgreSQL backup configurations +- Need to verify backup integrity +- Discuss disaster recovery or data protection +- Experience data loss or corruption issues + +## Core Capabilities + +### Backup Operations +- Configure pgBackRest with Wasabi S3 +- Execute full, differential, and incremental backups +- Manage backup schedules and retention policies +- Compress and encrypt backup data +- Monitor backup health and success rates + +### Restore Operations +- Perform point-in-time recovery (PITR) +- Restore specific databases or tables +- Test restore procedures without impacting production +- Validate restored data integrity +- Document recovery time objectives (RTO) + +### Monitoring & Verification +- Check backup completion status +- Verify backup integrity with test restores +- Monitor backup size and growth trends +- Alert on backup failures or delays +- Generate backup compliance reports + +## Automated Workflows + +When activated, I will: + +1. **Assess Current State** + - Check existing backup configuration + - Review backup history and success rate + - Identify any failed or missing backups + - Analyze storage usage and costs + +2. **Optimize Configuration** + - Adjust retention policies based on requirements + - Configure optimal compression settings + - Set up parallel backup processes + - Implement incremental backup strategies + +3. **Execute Operations** + - Run scheduled backups automatically + - Perform test restores monthly + - Clean up old backups per retention policy + - Monitor and alert on issues + +4. **Document & Report** + - Maintain backup/restore runbooks + - Generate compliance reports + - Track metrics and trends + - Document recovery procedures + +## Integration with FairDB Commands + +I work seamlessly with these FairDB commands: +- `/fairdb-setup-backup` - Initial configuration +- `/fairdb-onboard-customer` - Customer-specific backups +- `/fairdb-emergency-response` - Disaster recovery +- `/fairdb-health-check` - Backup health monitoring + +## Best Practices I Enforce + +### Backup Strategy +- Full backups weekly (Sunday 2 AM) +- Differential backups daily +- Incremental backups hourly during business hours +- WAL archiving for point-in-time recovery +- Geographical redundancy with Wasabi regions + +### Security +- AES-256 encryption for all backups +- Secure key management +- Access control and audit logging +- Encrypted transport to S3 +- Immutable backup storage + +### Testing +- Monthly restore tests +- Quarterly disaster recovery drills +- Automated integrity verification +- Performance benchmarking +- Documentation updates + +## Proactive Monitoring + +I continuously monitor for: +- Backup failures or delays +- Storage capacity issues +- Unusual backup sizes +- Performance degradation +- Compliance violations + +## Emergency Response + +During data loss incidents, I: +1. Assess the extent of data loss +2. Identify the best recovery point +3. Execute restore procedures +4. Verify data integrity +5. Document incident and recovery + +## Reporting + +I generate these reports automatically: +- Daily backup status summary +- Weekly storage utilization report +- Monthly compliance audit +- Quarterly DR test results +- Annual backup strategy review + +## Cost Optimization + +I help reduce backup costs by: +- Implementing intelligent retention policies +- Using compression effectively +- Cleaning up unnecessary backups +- Optimizing transfer methods +- Selecting appropriate storage tiers + +## Example Scenarios + +### Scenario: New Customer Onboarding +``` +User: "Setting up database for new customer Acme Corp" +Me: I'll configure automated backups for Acme Corp: +- Creating dedicated pgBackRest stanza +- Setting up daily full backups with 30-day retention +- Configuring WAL archiving for PITR +- Scheduling test restores monthly +- Adding monitoring for backup success +``` + +### Scenario: Backup Failure Alert +``` +System: "Backup failed for customer_db" +Me: I'm investigating the backup failure: +- Checking disk space: OK +- Verifying S3 connectivity: Failed +- Issue: Wasabi credentials expired +- Action: Rotating credentials and retrying backup +- Result: Backup completed successfully +``` + +### Scenario: Recovery Request +``` +User: "Need to restore CustomerX database to yesterday 3 PM" +Me: Initiating point-in-time recovery: +- Target time: 2024-03-14 15:00:00 +- Latest backup: Full backup from 2024-03-14 02:00:00 +- Applying WAL logs to target time +- Restoration complete in 12 minutes +- Data verified and customer notified +``` + +## Success Metrics + +I track and optimize: +- Backup success rate (target: >99.9%) +- Recovery time objective (target: <1 hour) +- Recovery point objective (target: <5 minutes) +- Storage efficiency (compression ratio >3:1) +- Cost per GB backed up + +## Continuous Improvement + +I learn from each operation to: +- Refine backup schedules +- Improve recovery procedures +- Optimize resource usage +- Enhance monitoring alerts +- Update documentation \ No newline at end of file diff --git a/skills/fairdb-backup-manager/assets/README.md b/skills/fairdb-backup-manager/assets/README.md new file mode 100644 index 0000000..148d1c3 --- /dev/null +++ b/skills/fairdb-backup-manager/assets/README.md @@ -0,0 +1,26 @@ +# Skill Assets + +This directory contains static assets used by this skill. + +## Purpose + +Assets can include: +- Configuration files (JSON, YAML) +- Data files +- Templates +- Schemas +- Test fixtures + +## Guidelines + +- Keep assets small and focused +- Document asset purpose and format +- Use standard file formats +- Include schema validation where applicable + +## Common Asset Types + +- **config.json** - Configuration templates +- **schema.json** - JSON schemas +- **template.yaml** - YAML templates +- **test-data.json** - Test fixtures diff --git a/skills/fairdb-backup-manager/references/README.md b/skills/fairdb-backup-manager/references/README.md new file mode 100644 index 0000000..bf97184 --- /dev/null +++ b/skills/fairdb-backup-manager/references/README.md @@ -0,0 +1,26 @@ +# Skill References + +This directory contains reference materials that enhance this skill's capabilities. + +## Purpose + +References can include: +- Code examples +- Style guides +- Best practices documentation +- Template files +- Configuration examples + +## Guidelines + +- Keep references concise and actionable +- Use markdown for documentation +- Include clear examples +- Link to external resources when appropriate + +## Types of References + +- **examples.md** - Usage examples +- **style-guide.md** - Coding standards +- **templates/** - Reusable templates +- **patterns.md** - Design patterns diff --git a/skills/fairdb-backup-manager/scripts/README.md b/skills/fairdb-backup-manager/scripts/README.md new file mode 100644 index 0000000..9f9c7ad --- /dev/null +++ b/skills/fairdb-backup-manager/scripts/README.md @@ -0,0 +1,24 @@ +# Skill Scripts + +This directory contains optional helper scripts that support this skill's functionality. + +## Purpose + +Scripts here can be: +- Referenced by the skill for automation +- Used as examples for users +- Executed during skill activation + +## Guidelines + +- All scripts should be well-documented +- Include usage examples in comments +- Make scripts executable (`chmod +x`) +- Use `#!/bin/bash` or `#!/usr/bin/env python3` shebangs + +## Adding Scripts + +1. Create script file (e.g., `analyze.sh`, `process.py`) +2. Add documentation header +3. Make executable: `chmod +x script-name.sh` +4. Test thoroughly before committing diff --git a/skills/skill-adapter/assets/README.md b/skills/skill-adapter/assets/README.md new file mode 100644 index 0000000..c05c77c --- /dev/null +++ b/skills/skill-adapter/assets/README.md @@ -0,0 +1,7 @@ +# Assets + +Bundled resources for fairdb-operations-kit skill + +- [ ] customer_onboarding_template.md Template for customer onboarding documentation. +- [ ] health_check_report_template.json Template for health check reports. +- [ ] incident_response_checklist.md Checklist for incident response procedures. diff --git a/skills/skill-adapter/assets/config-template.json b/skills/skill-adapter/assets/config-template.json new file mode 100644 index 0000000..16f1712 --- /dev/null +++ b/skills/skill-adapter/assets/config-template.json @@ -0,0 +1,32 @@ +{ + "skill": { + "name": "skill-name", + "version": "1.0.0", + "enabled": true, + "settings": { + "verbose": false, + "autoActivate": true, + "toolRestrictions": true + } + }, + "triggers": { + "keywords": [ + "example-trigger-1", + "example-trigger-2" + ], + "patterns": [] + }, + "tools": { + "allowed": [ + "Read", + "Grep", + "Bash" + ], + "restricted": [] + }, + "metadata": { + "author": "Plugin Author", + "category": "general", + "tags": [] + } +} diff --git a/skills/skill-adapter/assets/skill-schema.json b/skills/skill-adapter/assets/skill-schema.json new file mode 100644 index 0000000..8dc154c --- /dev/null +++ b/skills/skill-adapter/assets/skill-schema.json @@ -0,0 +1,28 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Claude Skill Configuration", + "type": "object", + "required": ["name", "description"], + "properties": { + "name": { + "type": "string", + "pattern": "^[a-z0-9-]+$", + "maxLength": 64, + "description": "Skill identifier (lowercase, hyphens only)" + }, + "description": { + "type": "string", + "maxLength": 1024, + "description": "What the skill does and when to use it" + }, + "allowed-tools": { + "type": "string", + "description": "Comma-separated list of allowed tools" + }, + "version": { + "type": "string", + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "description": "Semantic version (x.y.z)" + } + } +} diff --git a/skills/skill-adapter/assets/test-data.json b/skills/skill-adapter/assets/test-data.json new file mode 100644 index 0000000..f0cd871 --- /dev/null +++ b/skills/skill-adapter/assets/test-data.json @@ -0,0 +1,27 @@ +{ + "testCases": [ + { + "name": "Basic activation test", + "input": "trigger phrase example", + "expected": { + "activated": true, + "toolsUsed": ["Read", "Grep"], + "success": true + } + }, + { + "name": "Complex workflow test", + "input": "multi-step trigger example", + "expected": { + "activated": true, + "steps": 3, + "toolsUsed": ["Read", "Write", "Bash"], + "success": true + } + } + ], + "fixtures": { + "sampleInput": "example data", + "expectedOutput": "processed result" + } +} diff --git a/skills/skill-adapter/references/README.md b/skills/skill-adapter/references/README.md new file mode 100644 index 0000000..d575274 --- /dev/null +++ b/skills/skill-adapter/references/README.md @@ -0,0 +1,11 @@ +# References + +Bundled resources for fairdb-operations-kit skill + +- [ ] contabo_api_reference.md Contabo API documentation for VPS provisioning. +- [ ] postgres_configuration.md PostgreSQL 16 configuration best practices. +- [ ] pgbackrest_configuration.md pgBackRest configuration guide. +- [ ] wasabi_s3_configuration.md Wasabi S3 storage setup for backups. +- [ ] sop_001.md Standard Operating Procedure for VPS provisioning. +- [ ] sop_002.md Standard Operating Procedure for PostgreSQL installation. +- [ ] sop_003.md Standard Operating Procedure for backup configuration. diff --git a/skills/skill-adapter/references/best-practices.md b/skills/skill-adapter/references/best-practices.md new file mode 100644 index 0000000..3505048 --- /dev/null +++ b/skills/skill-adapter/references/best-practices.md @@ -0,0 +1,69 @@ +# Skill Best Practices + +Guidelines for optimal skill usage and development. + +## For Users + +### Activation Best Practices + +1. **Use Clear Trigger Phrases** + - Match phrases from skill description + - Be specific about intent + - Provide necessary context + +2. **Provide Sufficient Context** + - Include relevant file paths + - Specify scope of analysis + - Mention any constraints + +3. **Understand Tool Permissions** + - Check allowed-tools in frontmatter + - Know what the skill can/cannot do + - Request appropriate actions + +### Workflow Optimization + +- Start with simple requests +- Build up to complex workflows +- Verify each step before proceeding +- Use skill consistently for related tasks + +## For Developers + +### Skill Development Guidelines + +1. **Clear Descriptions** + - Include explicit trigger phrases + - Document all capabilities + - Specify limitations + +2. **Proper Tool Permissions** + - Use minimal necessary tools + - Document security implications + - Test with restricted tools + +3. **Comprehensive Documentation** + - Provide usage examples + - Document common pitfalls + - Include troubleshooting guide + +### Maintenance + +- Keep version updated +- Test after tool updates +- Monitor user feedback +- Iterate on descriptions + +## Performance Tips + +- Scope skills to specific domains +- Avoid overlapping trigger phrases +- Keep descriptions under 1024 chars +- Test activation reliability + +## Security Considerations + +- Never include secrets in skill files +- Validate all inputs +- Use read-only tools when possible +- Document security requirements diff --git a/skills/skill-adapter/references/examples.md b/skills/skill-adapter/references/examples.md new file mode 100644 index 0000000..b1d8bd2 --- /dev/null +++ b/skills/skill-adapter/references/examples.md @@ -0,0 +1,70 @@ +# Skill Usage Examples + +This document provides practical examples of how to use this skill effectively. + +## Basic Usage + +### Example 1: Simple Activation + +**User Request:** +``` +[Describe trigger phrase here] +``` + +**Skill Response:** +1. Analyzes the request +2. Performs the required action +3. Returns results + +### Example 2: Complex Workflow + +**User Request:** +``` +[Describe complex scenario] +``` + +**Workflow:** +1. Step 1: Initial analysis +2. Step 2: Data processing +3. Step 3: Result generation +4. Step 4: Validation + +## Advanced Patterns + +### Pattern 1: Chaining Operations + +Combine this skill with other tools: +``` +Step 1: Use this skill for [purpose] +Step 2: Chain with [other tool] +Step 3: Finalize with [action] +``` + +### Pattern 2: Error Handling + +If issues occur: +- Check trigger phrase matches +- Verify context is available +- Review allowed-tools permissions + +## Tips & Best Practices + +- ✅ Be specific with trigger phrases +- ✅ Provide necessary context +- ✅ Check tool permissions match needs +- ❌ Avoid vague requests +- ❌ Don't mix unrelated tasks + +## Common Issues + +**Issue:** Skill doesn't activate +**Solution:** Use exact trigger phrases from description + +**Issue:** Unexpected results +**Solution:** Check input format and context + +## See Also + +- Main SKILL.md for full documentation +- scripts/ for automation helpers +- assets/ for configuration examples diff --git a/skills/skill-adapter/scripts/README.md b/skills/skill-adapter/scripts/README.md new file mode 100644 index 0000000..26d5d4d --- /dev/null +++ b/skills/skill-adapter/scripts/README.md @@ -0,0 +1,10 @@ +# Scripts + +Bundled resources for fairdb-operations-kit skill + +- [ ] fairdb_provision_vps.sh Automates VPS provisioning using Contabo API. +- [ ] fairdb_install_postgres.sh Installs and configures PostgreSQL 16. +- [ ] fairdb_setup_backup.sh Configures pgBackRest with Wasabi S3 storage. +- [ ] fairdb_onboard_customer.sh Automates customer onboarding process. +- [ ] fairdb_health_check.sh Performs comprehensive system health verification. +- [ ] fairdb_emergency_response.sh Guides incident response procedures. diff --git a/skills/skill-adapter/scripts/helper-template.sh b/skills/skill-adapter/scripts/helper-template.sh new file mode 100755 index 0000000..c4aae90 --- /dev/null +++ b/skills/skill-adapter/scripts/helper-template.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Helper script template for skill automation +# Customize this for your skill's specific needs + +set -e + +function show_usage() { + echo "Usage: $0 [options]" + echo "" + echo "Options:" + echo " -h, --help Show this help message" + echo " -v, --verbose Enable verbose output" + echo "" +} + +# Parse arguments +VERBOSE=false + +while [[ $# -gt 0 ]]; do + case $1 in + -h|--help) + show_usage + exit 0 + ;; + -v|--verbose) + VERBOSE=true + shift + ;; + *) + echo "Unknown option: $1" + show_usage + exit 1 + ;; + esac +done + +# Your skill logic here +if [ "$VERBOSE" = true ]; then + echo "Running skill automation..." +fi + +echo "✅ Complete" diff --git a/skills/skill-adapter/scripts/validation.sh b/skills/skill-adapter/scripts/validation.sh new file mode 100755 index 0000000..590af58 --- /dev/null +++ b/skills/skill-adapter/scripts/validation.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# Skill validation helper +# Validates skill activation and functionality + +set -e + +echo "🔍 Validating skill..." + +# Check if SKILL.md exists +if [ ! -f "../SKILL.md" ]; then + echo "❌ Error: SKILL.md not found" + exit 1 +fi + +# Validate frontmatter +if ! grep -q "^---$" "../SKILL.md"; then + echo "❌ Error: No frontmatter found" + exit 1 +fi + +# Check required fields +if ! grep -q "^name:" "../SKILL.md"; then + echo "❌ Error: Missing 'name' field" + exit 1 +fi + +if ! grep -q "^description:" "../SKILL.md"; then + echo "❌ Error: Missing 'description' field" + exit 1 +fi + +echo "✅ Skill validation passed"