Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:18:56 +08:00
commit 5ff654ec1a
11 changed files with 382 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
# Assets
Bundled resources for auto-scaling-configurator skill
- [ ] config_template.yaml: YAML template for auto-scaling configuration files, providing a starting point for users.
- [ ] example_hpa.yaml: Example Kubernetes Horizontal Pod Autoscaler (HPA) configuration file.
- [ ] example_aws_scaling_policy.json: Example AWS auto-scaling policy configuration file.

View File

@@ -0,0 +1,79 @@
# Auto-Scaling Configuration Template
# --- Global Settings ---
global:
# Enable or disable auto-scaling globally
enabled: true
# Default cooldown period (in seconds) after a scaling event
cooldown: 300 # seconds (5 minutes)
# --- Application Configuration ---
application:
name: REPLACE_ME # Application Name (e.g., web-app, api-server)
type: web # Application type (e.g., web, api, worker)
# Resource limits (CPU, Memory)
resources:
cpu:
min: 0.5 # Minimum CPU units
max: 4 # Maximum CPU units
memory:
min: 512 # Minimum memory in MB
max: 4096 # Maximum memory in MB
# --- Scaling Policies ---
scaling_policies:
# --- CPU Utilization Scaling ---
cpu_utilization:
enabled: true
target_utilization: 70 # Target CPU utilization percentage
scale_up_threshold: 80 # CPU usage percentage to trigger scale-up
scale_down_threshold: 40 # CPU usage percentage to trigger scale-down
scale_up_increment: 1 # Number of instances to add during scale-up
scale_down_decrement: 1 # Number of instances to remove during scale-down
# --- Memory Utilization Scaling ---
memory_utilization:
enabled: true
target_utilization: 75 # Target Memory utilization percentage
scale_up_threshold: 85 # Memory usage percentage to trigger scale-up
scale_down_threshold: 50 # Memory usage percentage to trigger scale-down
scale_up_increment: 1 # Number of instances to add during scale-up
scale_down_decrement: 1 # Number of instances to remove during scale-down
# --- Request Latency Scaling ---
request_latency:
enabled: false # Enable only if latency metrics are available
target_latency: 200 # Target request latency in milliseconds
scale_up_threshold: 500 # Latency in milliseconds to trigger scale-up
scale_down_threshold: 100 # Latency in milliseconds to trigger scale-down
scale_up_increment: 1 # Number of instances to add during scale-up
scale_down_decrement: 1 # Number of instances to remove during scale-down
# --- Custom Metric Scaling ---
custom_metric:
enabled: false # Enable only if a custom metric is available
metric_name: YOUR_VALUE_HERE # Name of the custom metric
target_value: YOUR_VALUE_HERE # Target value for the custom metric
scale_up_threshold: YOUR_VALUE_HERE # Threshold to trigger scale-up
scale_down_threshold: YOUR_VALUE_HERE # Threshold to trigger scale-down
scale_up_increment: 1 # Number of instances to add during scale-up
scale_down_decrement: 1 # Number of instances to remove during scale-down
# --- Infrastructure Configuration ---
infrastructure:
platform: aws # Cloud platform (e.g., aws, azure, gcp, on-prem)
region: us-east-1 # Cloud region
instance_type: t3.medium # Instance type for new instances
min_instances: 1 # Minimum number of instances
max_instances: 5 # Maximum number of instances
# --- Monitoring Configuration ---
monitoring:
# Integration with monitoring tools (e.g., CloudWatch, Prometheus)
# Configure details for your monitoring system here
# Example:
type: cloudwatch # Monitoring system type
namespace: MyApp # Monitoring namespace
metric_prefix: MyAppInstance # Metric prefix

View File

@@ -0,0 +1,70 @@
{
"_comment": "Example AWS Auto Scaling Policy Configuration",
"PolicyName": "WebAppCPUPolicy",
"PolicyType": "TargetTrackingScaling",
"TargetTrackingConfiguration": {
"_comment": "Target utilization for CPU",
"TargetValue": 70.0,
"_comment": "Metric to track",
"PredefinedMetricSpecification": {
"PredefinedMetricType": "ASGAverageCPUUtilization"
},
"_comment": "Cooldown periods",
"ScaleOutCooldown": 300,
"ScaleInCooldown": 600
},
"_comment": "Resource ID - Auto Scaling Group ARN",
"ResourceId": "autoScalingGroupName/my-web-app-asg",
"ScalableDimension": "autoscaling:autoScalingGroup:DesiredCapacity",
"ServiceNamespace": "autoscaling",
"_comment": "Optional - Adjustments for more control",
"StepScalingPolicyConfiguration": {
"_comment": "Adjustment type (ChangeInCapacity, PercentChangeInCapacity, ExactCapacity)",
"AdjustmentType": "ChangeInCapacity",
"_comment": "Minimum number of instances to add/remove",
"MinAdjustmentMagnitude": 1,
"_comment": "Cooldown period",
"Cooldown": 300,
"_comment": "Scaling adjustments based on metric value",
"StepAdjustments": [
{
"MetricIntervalLowerBound": 0.0,
"MetricIntervalUpperBound": 10.0,
"ScalingAdjustment": -1
},
{
"MetricIntervalLowerBound": 70.0,
"ScalingAdjustment": 1
}
]
},
"_comment": "Optional - Metric Alarms",
"Alarms": [
{
"AlarmName": "CPUHigh",
"MetricName": "CPUUtilization",
"Namespace": "AWS/EC2",
"Statistic": "Average",
"Period": 60,
"EvaluationPeriods": 5,
"Threshold": 80.0,
"ComparisonOperator": "GreaterThanThreshold",
"AlarmActions": [
"arn:aws:sns:us-east-1:123456789012:HighCPUAlarm"
]
},
{
"AlarmName": "CPULow",
"MetricName": "CPUUtilization",
"Namespace": "AWS/EC2",
"Statistic": "Average",
"Period": 60,
"EvaluationPeriods": 5,
"Threshold": 20.0,
"ComparisonOperator": "LessThanThreshold",
"AlarmActions": [
"arn:aws:sns:us-east-1:123456789012:LowCPUAlarm"
]
}
]
}

View File

@@ -0,0 +1,43 @@
# example_hpa.yaml
# This is an example Kubernetes Horizontal Pod Autoscaler (HPA) configuration file.
# It defines how the number of pods in a deployment or replication controller
# should be automatically scaled based on observed CPU utilization.
apiVersion: autoscaling/v2 # Use autoscaling/v2 for more features (e.g., resource metrics)
kind: HorizontalPodAutoscaler
metadata:
name: example-hpa # The name of the HPA resource
namespace: default # The namespace where the HPA should be deployed (REPLACE_ME if needed)
spec:
scaleTargetRef: # Defines the target resource to scale
apiVersion: apps/v1 # API version of the target resource
kind: Deployment # The type of resource to scale (e.g., Deployment, ReplicationController)
name: example-deployment # The name of the Deployment to scale (REPLACE_ME)
minReplicas: 2 # The minimum number of replicas to maintain
maxReplicas: 10 # The maximum number of replicas to scale to
metrics: # Defines the metrics used to trigger scaling
- type: Resource # Scale based on resource utilization
resource:
name: cpu # The resource to monitor (CPU in this case)
target:
type: Utilization # Target utilization percentage
averageUtilization: 70 # Target CPU utilization percentage (e.g., 70%)
- type: Resource # Scale based on memory utilization
resource:
name: memory # The resource to monitor (Memory in this case)
target:
type: Utilization # Target utilization percentage
averageUtilization: 80 # Target Memory utilization percentage (e.g., 80%)
behavior: # Optional: Define scaling behavior
scaleUp: # Define scale up behavior
stabilizationWindowSeconds: 300 # Delay for scaling up after a scaling event
policies: # Scaling policies (e.g., percentage or fixed number of replicas)
- type: Percent # Scale up by a percentage
value: 20 # Percentage increase
periodSeconds: 60 # Evaluate every 60 seconds
scaleDown: # Define scale down behavior
stabilizationWindowSeconds: 300 # Delay for scaling down after a scaling event
policies: # Scaling policies (e.g., percentage or fixed number of replicas)
- type: Percent # Scale down by a percentage
value: 10 # Percentage decrease
periodSeconds: 60 # Evaluate every 60 seconds