Initial commit

2025-11-30 08:19:54 +08:00
commit 67c918fc64
11 changed files with 616 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,15 @@
+{
+  "name": "monitoring-stack-deployer",
+  "description": "Deploy monitoring stacks (Prometheus, Grafana, Datadog)",
+  "version": "1.0.0",
+  "author": {
+    "name": "Claude Code Plugins",
+    "email": "[email protected]"
+  },
+  "skills": [
+    "./skills"
+  ],
+  "commands": [
+    "./commands"
+  ]
+}
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
+# monitoring-stack-deployer
+
+Deploy monitoring stacks (Prometheus, Grafana, Datadog)
--- a/commands/monitor-deploy.md
+++ b/commands/monitor-deploy.md
@@ -0,0 +1,25 @@
+---
+description: $(echo "$description" | cut -d' ' -f1-5)
+---
+
+# $(echo "$name" | sed 's/-/ /g' | sed 's/\b\(.\)/\u\1/g')
+
+$(echo "$description")
+
+## Key Features
+
+- Production-ready configurations
+- Best practices implementation
+- Security-first approach
+- Scalable architecture
+- Comprehensive documentation
+- Multi-platform support
+
+## Example Usage
+
+This plugin generates complete configurations for your DevOps needs.
+Specify your requirements and get production-ready code instantly.
+
+## When Invoked
+
+Generate configurations and setup code based on your specific requirements and infrastructure needs.
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,73 @@
+{
+  "$schema": "internal://schemas/plugin.lock.v1.json",
+  "pluginId": "gh:jeremylongshore/claude-code-plugins-plus:plugins/devops/monitoring-stack-deployer",
+  "normalized": {
+    "repo": null,
+    "ref": "refs/tags/v20251128.0",
+    "commit": "906077f1c052705a79e8c0fea7dd082db8ac8a6b",
+    "treeHash": "75882ab9ae372304934bd1f91f2f94e36c81418adda5a043bcd141a9aff18c60",
+    "generatedAt": "2025-11-28T10:18:35.776603Z",
+    "toolVersion": "publish_plugins.py@0.2.0"
+  },
+  "origin": {
+    "remote": "git@github.com:zhongweili/42plugin-data.git",
+    "branch": "master",
+    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
+    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
+  },
+  "manifest": {
+    "name": "monitoring-stack-deployer",
+    "description": "Deploy monitoring stacks (Prometheus, Grafana, Datadog)",
+    "version": "1.0.0"
+  },
+  "content": {
+    "files": [
+      {
+        "path": "README.md",
+        "sha256": "cc1e6aef7b3e912930c45891026c28612dc931d0d147bc46573caa5a8d8dc838"
+      },
+      {
+        "path": ".claude-plugin/plugin.json",
+        "sha256": "81f2d68d42c22730cca785bda013c6fe374f9b399dbacf637b77a04958abf513"
+      },
+      {
+        "path": "commands/monitor-deploy.md",
+        "sha256": "353f80054a90cda1e6716da3628115ce829307fbbb83a15b64f1d37c96224a99"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/SKILL.md",
+        "sha256": "a263dfb49bf94d03240d39d48acf04845dfce04143290ed60a9c5fda3a72bad8"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/references/README.md",
+        "sha256": "00660baca83cc7f54955ac4ef73cb48c009e65f5761686ea4a240af6258d5516"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/scripts/README.md",
+        "sha256": "e8da841ad87123d378711d00f0a26b453249861ced42d7e94a5e4db1d655a6ae"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/assets/prometheus_config_template.yml",
+        "sha256": "516a85b113c9273d1519a5a817bfe1ac6721337c7630f0f5cd0e0924256c5348"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/assets/datadog_agent_config_template.yml",
+        "sha256": "de09213e794646acbe9abb8ec56ddb77df1be8a2bf57f762971f7789f4851b62"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/assets/README.md",
+        "sha256": "ec88b8a4d0bb790f7d3c2c079c77de2fe87b1af5b8ece2f4d71d795d110078ef"
+      },
+      {
+        "path": "skills/monitoring-stack-deployer/assets/grafana_dashboard_template.json",
+        "sha256": "52c7eb0c2553715ff9a59edc8850b7ce10acaf9dbf0a6b00bf2ee3d314876c8e"
+      }
+    ],
+    "dirSha256": "75882ab9ae372304934bd1f91f2f94e36c81418adda5a043bcd141a9aff18c60"
+  },
+  "security": {
+    "scannedAt": null,
+    "scannerVersion": null,
+    "flags": []
+  }
+}
--- a/skills/monitoring-stack-deployer/SKILL.md
+++ b/skills/monitoring-stack-deployer/SKILL.md
@@ -0,0 +1,52 @@
+---
+name: deploying-monitoring-stacks
+description: |
+  This skill deploys monitoring stacks, including Prometheus, Grafana, and Datadog. It is used when the user needs to set up or configure monitoring infrastructure for applications or systems. The skill generates production-ready configurations, implements best practices, and supports multi-platform deployments. Use this when the user explicitly requests to deploy a monitoring stack, or mentions Prometheus, Grafana, or Datadog in the context of infrastructure setup.
+allowed-tools: Read, Write, Edit, Grep, Glob, Bash
+version: 1.0.0
+---
+
+## Overview
+
+This skill empowers Claude to automate the deployment of comprehensive monitoring solutions. It simplifies the setup of Prometheus, Grafana, and Datadog, ensuring best practices and production-ready configurations.
+
+## How It Works
+
+1. **Configuration Gathering**: Claude gathers the specific requirements for the monitoring stack, including the desired platform and tools.
+2. **Stack Generation**: Based on the requirements, Claude generates the necessary configuration files and deployment scripts for the selected monitoring stack.
+3. **Deployment Instructions**: Claude provides clear, step-by-step instructions for deploying the generated configuration to the target environment.
+
+## When to Use This Skill
+
+This skill activates when you need to:
+- Deploy a new monitoring stack (Prometheus, Grafana, Datadog).
+- Configure an existing monitoring stack.
+- Generate production-ready monitoring configurations.
+
+## Examples
+
+### Example 1: Setting up Prometheus and Grafana on Kubernetes
+
+User request: "I need to set up Prometheus and Grafana on my Kubernetes cluster to monitor my application."
+
+The skill will:
+1. Generate Kubernetes manifests for deploying Prometheus and Grafana.
+2. Provide instructions for configuring Prometheus to scrape application metrics and Grafana to visualize them.
+
+### Example 2: Deploying Datadog Agent
+
+User request: "Deploy Datadog agent to monitor our servers."
+
+The skill will:
+1. Generate configuration files for the Datadog agent based on the target environment.
+2. Provide instructions for installing and configuring the Datadog agent on the specified servers.
+
+## Best Practices
+
+- **Security**: Always follow security best practices when deploying monitoring stacks, including using secure credentials and limiting access to sensitive data.
+- **Scalability**: Design your monitoring stack to be scalable to handle increasing data volumes and traffic.
+- **Documentation**: Thoroughly document your monitoring setup, including configuration details and deployment procedures.
+
+## Integration
+
+This skill works seamlessly with other Claude Code skills for infrastructure provisioning and application deployment. It can be integrated into automated CI/CD pipelines for continuous monitoring.
--- a/skills/monitoring-stack-deployer/assets/README.md
+++ b/skills/monitoring-stack-deployer/assets/README.md
@@ -0,0 +1,9 @@
+# Assets
+
+Bundled resources for monitoring-stack-deployer skill
+
+- [ ] prometheus_config_template.yml: Template for Prometheus configuration files.
+- [ ] grafana_dashboard_template.json: Template for Grafana dashboard configurations.
+- [ ] datadog_agent_config_template.yml: Template for Datadog agent configuration files.
+- [ ] example_k8s_manifests/: Example Kubernetes manifests for deploying the monitoring stack.
+- [ ] example_terraform_configurations/: Example Terraform configurations for deploying the monitoring stack.
--- a/skills/monitoring-stack-deployer/assets/datadog_agent_config_template.yml
+++ b/skills/monitoring-stack-deployer/assets/datadog_agent_config_template.yml
@@ -0,0 +1,90 @@
+# Datadog Agent Configuration Template
+
+# This file provides a template for configuring the Datadog agent.
+# Replace placeholders with your specific values.
+
+# Agent Configuration
+agent_hostname: REPLACE_ME_HOSTNAME # The hostname reported to Datadog.  Defaults to system hostname.
+# agent_tags:  # Optional: List of tags to apply to all metrics sent by this agent.
+#   - env:production
+#   - role:webserver
+
+# API Key
+api_key: YOUR_DATADOG_API_KEY # Your Datadog API key. Required.
+
+# Site
+# site: datadoghq.com  # The Datadog site.  Defaults to datadoghq.com
+
+# Listen Address
+# listen_address: 0.0.0.0  # The address the agent listens on for HTTP requests. Defaults to 127.0.0.1
+
+# Log Level
+log_level: INFO # Valid values: DEBUG, INFO, WARN, ERROR, CRITICAL
+
+# Log to file
+# log_file: /var/log/datadog/agent.log  # Uncomment to log to a file.  Ensure proper permissions are set.
+
+# Enable/Disable Agent
+# enabled: true  # Defaults to true
+
+# --- Integrations ---
+
+# Example: System Check
+system_core_check:
+  enabled: true
+  collect_count: true # Collect CPU core counts
+  # use_mount: false # Disable mount point metrics (defaults to true)
+
+# Example: Network Check
+network:
+  enabled: true
+  collect_connection_state: true # Collect TCP connection states
+  excluded_interfaces:
+    - lo # Exclude loopback interface
+
+# Example: Disk Check
+disk:
+  enabled: true
+  all_partitions: false # Only monitor certain partitions
+  partitions:
+    - /
+    - /var
+    - /tmp
+  # use_mount: false # Disable mount point metrics (defaults to true)
+  # excluded_filesystems:  # Optional: List of filesystem types to exclude.
+  #   - tmpfs
+
+# --- Advanced Configuration ---
+
+# Proxy settings (if required)
+# proxy:
+#   http: http://YOUR_PROXY_SERVER:YOUR_PROXY_PORT
+#   https: https://YOUR_PROXY_SERVER:YOUR_PROXY_PORT
+#   no_proxy: localhost,127.0.0.1,YOUR_INTERNAL_DOMAIN
+
+# DogStatsD Configuration (for custom metrics)
+dogstatsd_port: 8125 # UDP port for DogStatsD
+# dogstatsd_non_local_traffic: true # Enable for non-local traffic (security implications!)
+
+# --- Autodiscovery ---
+# For containerized environments (Docker, Kubernetes)
+# autodiscovery_listeners:
+#   - name: docker
+#   - name: kubelet
+
+# Example: Custom check
+# confd_path: /etc/datadog-agent/conf.d
+
+# --- Process Agent ---
+# process_config:
+#   enabled: true # Enable process collection
+#   process_collection:
+#     enabled: true
+#   container_collection:
+#     enabled: true
+#   process_discovery:
+#     enabled: false # Disable process discovery by default for security
+
+# --- Security Agent ---
+# security_agent:
+#   enabled: false # Disable security agent by default
--- a/skills/monitoring-stack-deployer/assets/grafana_dashboard_template.json
+++ b/skills/monitoring-stack-deployer/assets/grafana_dashboard_template.json
@@ -0,0 +1,258 @@
+{
+  "_comment": "Grafana Dashboard Template for Monitoring Stack Deployer",
+  "dashboard": {
+    "annotations": {
+      "list": []
+    },
+    "editable": true,
+    "gnetId": null,
+    "graphTooltip": 0,
+    "id": null,
+    "links": [],
+    "panels": [
+      {
+        "_comment": "Example: CPU Usage Panel",
+        "aliasColors": {},
+        "bars": false,
+        "dashLength": 10,
+        "dashes": false,
+        "datasource": "${DS_PROMETHEUS}",
+        "decimals": 2,
+        "fill": 1,
+        "fillGradient": 0,
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 0,
+          "y": 0
+        },
+        "hiddenSeries": false,
+        "id": 1,
+        "interval": null,
+        "legend": {
+          "avg": true,
+          "current": true,
+          "max": true,
+          "min": true,
+          "show": true,
+          "total": false,
+          "values": true
+        },
+        "lines": true,
+        "linewidth": 1,
+        "nullPointMode": "null",
+        "options": {
+          "dataLinks": []
+        },
+        "percentage": false,
+        "pluginVersion": "7.5.7",
+        "pointradius": 2,
+        "points": false,
+        "renderer": "flot",
+        "seriesOverrides": [],
+        "spaceLength": 10,
+        "stack": false,
+        "steppedLine": false,
+        "targets": [
+          {
+            "expr": "rate(process_cpu_seconds_total[5m])",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "refId": "A"
+          }
+        ],
+        "thresholds": [],
+        "timeFrom": null,
+        "timeRegions": [],
+        "timeShift": null,
+        "title": "CPU Usage",
+        "tooltip": {
+          "shared": true,
+          "sort": 0,
+          "value_type": "individual"
+        },
+        "type": "graph",
+        "xaxis": {
+          "buckets": null,
+          "mode": "time",
+          "name": null,
+          "show": true,
+          "values": []
+        },
+        "yaxes": [
+          {
+            "decimal": 2,
+            "format": "percent",
+            "label": null,
+            "logBase": 1,
+            "max": null,
+            "min": "0",
+            "show": true
+          },
+          {
+            "format": "short",
+            "label": null,
+            "logBase": 1,
+            "max": null,
+            "min": null,
+            "show": true
+          }
+        ]
+      },
+      {
+        "_comment": "Example: Memory Usage Panel",
+        "aliasColors": {},
+        "bars": false,
+        "dashLength": 10,
+        "dashes": false,
+        "datasource": "${DS_PROMETHEUS}",
+        "decimals": 2,
+        "fill": 1,
+        "fillGradient": 0,
+        "gridPos": {
+          "h": 8,
+          "w": 12,
+          "x": 12,
+          "y": 0
+        },
+        "hiddenSeries": false,
+        "id": 2,
+        "interval": null,
+        "legend": {
+          "avg": true,
+          "current": true,
+          "max": true,
+          "min": true,
+          "show": true,
+          "total": false,
+          "values": true
+        },
+        "lines": true,
+        "linewidth": 1,
+        "nullPointMode": "null",
+        "options": {
+          "dataLinks": []
+        },
+        "percentage": false,
+        "pluginVersion": "7.5.7",
+        "pointradius": 2,
+        "points": false,
+        "renderer": "flot",
+        "seriesOverrides": [],
+        "spaceLength": 10,
+        "stack": false,
+        "steppedLine": false,
+        "targets": [
+          {
+            "expr": "sum(container_memory_usage_bytes) by (instance)",
+            "instant": false,
+            "legendFormat": "{{instance}}",
+            "refId": "A"
+          }
+        ],
+        "thresholds": [],
+        "timeFrom": null,
+        "timeRegions": [],
+        "timeShift": null,
+        "title": "Memory Usage",
+        "tooltip": {
+          "shared": true,
+          "sort": 0,
+          "value_type": "individual"
+        },
+        "type": "graph",
+        "xaxis": {
+          "buckets": null,
+          "mode": "time",
+          "name": null,
+          "show": true,
+          "values": []
+        },
+        "yaxes": [
+          {
+            "format": "bytes",
+            "label": null,
+            "logBase": 1,
+            "max": null,
+            "min": "0",
+            "show": true
+          },
+          {
+            "format": "short",
+            "label": null,
+            "logBase": 1,
+            "max": null,
+            "min": null,
+            "show": true
+          }
+        ]
+      }
+    ],
+    "refresh": "5s",
+    "schemaVersion": 26,
+    "style": "dark",
+    "tags": [
+      "monitoring"
+    ],
+    "templating": {
+      "list": [
+        {
+          "_comment": "Example Prometheus Datasource Variable",
+          "current": {
+            "selected": false,
+            "text": "Prometheus",
+            "value": "Prometheus"
+          },
+          "datasource": null,
+          "definition": "Prometheus",
+          "hide": 0,
+          "includeAll": false,
+          "label": "Prometheus",
+          "multi": false,
+          "name": "DS_PROMETHEUS",
+          "options": [],
+          "query": "Prometheus",
+          "refresh": 1,
+          "regex": "",
+          "sort": 0,
+          "tagValuesQuery": "",
+          "tagsQuery": "",
+          "type": "datasource"
+        }
+      ]
+    },
+    "time": {
+      "from": "now-1h",
+      "to": "now"
+    },
+    "timepicker": {
+      "refresh_intervals": [
+        "5s",
+        "10s",
+        "30s",
+        "1m",
+        "5m",
+        "15m",
+        "30m",
+        "1h",
+        "2h",
+        "1d"
+      ],
+      "time_options": [
+        "5m",
+        "15m",
+        "1h",
+        "6h",
+        "12h",
+        "24h",
+        "2d",
+        "7d",
+        "30d"
+      ]
+    },
+    "timezone": "",
+    "title": "Monitoring Dashboard",
+    "uid": "example-dashboard",
+    "version": 1
+  }
+}
--- a/skills/monitoring-stack-deployer/assets/prometheus_config_template.yml
+++ b/skills/monitoring-stack-deployer/assets/prometheus_config_template.yml
@@ -0,0 +1,73 @@
+# Global configuration for Prometheus
+global:
+  scrape_interval:     30s  # How frequently to scrape targets. Adjust based on your needs.
+  evaluation_interval: 30s  # How frequently to evaluate rules. Adjust based on your needs.
+  # scrape_timeout is set to the global default (10s).
+
+  # External labels to identify the Prometheus instance.
+  external_labels:
+    monitor: 'REPLACE_ME_MONITORING_INSTANCE'  # A label identifying this Prometheus instance.
+
+# Rule files that define alerting rules.
+rule_files:
+  # - "prometheus.rules" # Example: Uncomment and adjust path to include your rules file.
+
+# Scrape configuration for Prometheus.
+scrape_configs:
+  # Example: Scrape Prometheus itself.  Good for monitoring Prometheus's own health.
+  - job_name: 'prometheus'
+    # metrics_path defaults to '/metrics'
+    # scheme defaults to 'http'.
+
+    static_configs:
+      - targets: ['localhost:9090']  # Prometheus's default port.  Change if needed.
+
+  # Example: Scrape node_exporter metrics.  Provides system-level metrics.
+  - job_name: 'node_exporter'
+    static_configs:
+      - targets: ['REPLACE_ME_NODE_EXPORTER_ADDRESS:9100'] # Replace with the actual node_exporter address and port.
+
+  # Example: Scrape cadvisor metrics.  Provides container-level metrics.
+  - job_name: 'cadvisor'
+    static_configs:
+      - targets: ['REPLACE_ME_CADVISOR_ADDRESS:8080'] # Replace with the actual cadvisor address and port.
+
+  # Example: Scrape Kubernetes pods with specific labels.
+  - job_name: 'kubernetes-pods'
+    kubernetes_sd_configs:
+      - role: pod
+
+    relabel_configs:
+      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
+        action: keep
+        regex: true
+      - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
+        action: replace
+        target_label: __metrics_path__
+        regex: (.+)
+      - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
+        action: replace
+        regex: ([^:]+)(?::\d+)?;(\d+)
+        replacement: $1:$2
+        target_label: __address__
+      - action: labelmap
+        regex: __meta_kubernetes_pod_annotation_prometheus_io_param_(.+)
+        replacement: __param_$1
+      - action: replace
+        source_labels: [__meta_kubernetes_namespace]
+        target_label: namespace
+      - source_labels: [__meta_kubernetes_pod_name]
+        target_label: pod
+
+# Alerting configuration
+alerting:
+  alertmanagers:
+  - static_configs:
+    - targets: ['REPLACE_ME_ALERTMANAGER_ADDRESS:9093']  # Replace with your Alertmanager address.
+
+# Remote write configuration to send metrics to a remote endpoint (e.g., Grafana Cloud, Cortex).
+# remote_write:
+#   - url: "YOUR_VALUE_HERE" # Example: Grafana Cloud remote write endpoint.
+#     basic_auth:
+#       username: "YOUR_VALUE_HERE" # Example: Grafana Cloud username.
+#       password: "YOUR_VALUE_HERE" # Example: Grafana Cloud API key.
--- a/skills/monitoring-stack-deployer/references/README.md
+++ b/skills/monitoring-stack-deployer/references/README.md
@@ -0,0 +1,9 @@
+# References
+
+Bundled resources for monitoring-stack-deployer skill
+
+- [ ] prometheus_config_best_practices.md: Provides best practices for configuring Prometheus.
+- [ ] grafana_dashboard_best_practices.md: Provides best practices for creating Grafana dashboards.
+- [ ] datadog_agent_best_practices.md: Provides best practices for configuring the Datadog agent.
+- [ ] monitoring_stack_architecture.md: Describes the overall architecture of the monitoring stack.
+- [ ] supported_platforms.md: Lists the supported platforms for deployment (e.g., Kubernetes, AWS, Azure, GCP).
--- a/skills/monitoring-stack-deployer/scripts/README.md
+++ b/skills/monitoring-stack-deployer/scripts/README.md
@@ -0,0 +1,9 @@
+# Scripts
+
+Bundled resources for monitoring-stack-deployer skill
+
+- [ ] deploy_prometheus.sh: Automates Prometheus deployment and configuration.
+- [ ] deploy_grafana.sh: Automates Grafana deployment and configuration.
+- [ ] deploy_datadog_agent.sh: Automates Datadog agent deployment and configuration.
+- [ ] validate_config.py: Validates the generated configuration files for Prometheus, Grafana, and Datadog.
+- [ ] rollback_deployment.sh: Provides a rollback mechanism to revert to a previous configuration.