Initial commit
This commit is contained in:
11
.claude-plugin/plugin.json
Normal file
11
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "iac-terraform",
|
||||
"description": "Infrastructure as Code with Terraform and Terragrunt. Use for creating, validating, troubleshooting, and managing Terraform configurations, modules, and state. Covers Terraform workflows, best practices, module development, state management, Terragrunt patterns, and common issue resolution.",
|
||||
"version": "1.0.0",
|
||||
"author": {
|
||||
"name": "Ahmad Asmar"
|
||||
},
|
||||
"skills": [
|
||||
"./skills"
|
||||
]
|
||||
}
|
||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# iac-terraform
|
||||
|
||||
Infrastructure as Code with Terraform and Terragrunt. Use for creating, validating, troubleshooting, and managing Terraform configurations, modules, and state. Covers Terraform workflows, best practices, module development, state management, Terragrunt patterns, and common issue resolution.
|
||||
85
plugin.lock.json
Normal file
85
plugin.lock.json
Normal file
@@ -0,0 +1,85 @@
|
||||
{
|
||||
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||
"pluginId": "gh:ahmedasmar/devops-claude-skills:iac-terraform",
|
||||
"normalized": {
|
||||
"repo": null,
|
||||
"ref": "refs/tags/v20251128.0",
|
||||
"commit": "ef35d7225d53657284e4421b6f4afd1ea018ae4f",
|
||||
"treeHash": "ec9262ed7506c133df183d9d41d069e7bee2b50322269a241a2677eb1e1ce4ea",
|
||||
"generatedAt": "2025-11-28T10:13:02.517015Z",
|
||||
"toolVersion": "publish_plugins.py@0.2.0"
|
||||
},
|
||||
"origin": {
|
||||
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||
"branch": "master",
|
||||
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||
},
|
||||
"manifest": {
|
||||
"name": "iac-terraform",
|
||||
"description": "Infrastructure as Code with Terraform and Terragrunt. Use for creating, validating, troubleshooting, and managing Terraform configurations, modules, and state. Covers Terraform workflows, best practices, module development, state management, Terragrunt patterns, and common issue resolution.",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
"content": {
|
||||
"files": [
|
||||
{
|
||||
"path": "README.md",
|
||||
"sha256": "1dbc09c34503cc9ffa64944b47c8e463b2650c71f052eefa9ecaf521b5775739"
|
||||
},
|
||||
{
|
||||
"path": ".claude-plugin/plugin.json",
|
||||
"sha256": "2ed43cf8e3b5308fef8f392b7d4e25af9483817c273d36b82b34153ee6e89f00"
|
||||
},
|
||||
{
|
||||
"path": "skills/SKILL.md",
|
||||
"sha256": "5526132ddf44b78c057fbbcf7be324ac0492d2ca4a418873cf0ae0445034f0f0"
|
||||
},
|
||||
{
|
||||
"path": "skills/references/troubleshooting.md",
|
||||
"sha256": "c82b631b347b6008328f92a765e147edaf575152045a0fff24728480bd1ffb8d"
|
||||
},
|
||||
{
|
||||
"path": "skills/references/cost_optimization.md",
|
||||
"sha256": "9f7a61bbd4da91dc42251341bb2ad6e8c153eb8fe297209a27333b15e295e3af"
|
||||
},
|
||||
{
|
||||
"path": "skills/references/best_practices.md",
|
||||
"sha256": "6a99937e9af783588fcfd91671ee069831e7d3bb6914411dc918bb77c29a718b"
|
||||
},
|
||||
{
|
||||
"path": "skills/scripts/inspect_state.py",
|
||||
"sha256": "77e36f4d1f5f71e8a0b7ee701b15143108c51c738c7c7ffcd024af3d306b6962"
|
||||
},
|
||||
{
|
||||
"path": "skills/scripts/validate_module.py",
|
||||
"sha256": "a9780462cf52450718fef743d6784cdb519aee3815cbc742655f0cd487ab4791"
|
||||
},
|
||||
{
|
||||
"path": "skills/scripts/init_module.py",
|
||||
"sha256": "5ea86e54cdbedb963d63fe7a7bc25f7a3d33ef8effbabad5d3f81c74048e3f24"
|
||||
},
|
||||
{
|
||||
"path": "skills/assets/workflows/gitlab-ci-terraform.yml",
|
||||
"sha256": "6e9cfadd7d9d529d3bda4f60d87f82d0b1c990bd31be8be1ff7717bc49e14188"
|
||||
},
|
||||
{
|
||||
"path": "skills/assets/workflows/github-actions-terragrunt.yml",
|
||||
"sha256": "1e5207d1b711f76e359a4fa573b70ab20d7a4ffe7a74df6defa769482d06a5ef"
|
||||
},
|
||||
{
|
||||
"path": "skills/assets/workflows/github-actions-terraform.yml",
|
||||
"sha256": "6d4902021aea6e8addb58d22c9fec2446400a494f6ee5c5407f9cde5c1bf5e5b"
|
||||
},
|
||||
{
|
||||
"path": "skills/assets/templates/MODULE_TEMPLATE.md",
|
||||
"sha256": "ecec0831c41a49a3dbe3ffcea639a14d5ec9a5a6a374a52e80fcf1866a6944a6"
|
||||
}
|
||||
],
|
||||
"dirSha256": "ec9262ed7506c133df183d9d41d069e7bee2b50322269a241a2677eb1e1ce4ea"
|
||||
},
|
||||
"security": {
|
||||
"scannedAt": null,
|
||||
"scannerVersion": null,
|
||||
"flags": []
|
||||
}
|
||||
}
|
||||
653
skills/SKILL.md
Normal file
653
skills/SKILL.md
Normal file
@@ -0,0 +1,653 @@
|
||||
---
|
||||
name: iac-terraform
|
||||
description: Infrastructure as Code with Terraform and Terragrunt. Use for creating, validating, troubleshooting, and managing Terraform configurations, modules, and state. Covers Terraform workflows, best practices, module development, state management, Terragrunt patterns, and common issue resolution.
|
||||
---
|
||||
|
||||
# Infrastructure as Code - Terraform & Terragrunt
|
||||
|
||||
Comprehensive guidance for infrastructure as code using Terraform and Terragrunt, from development through production deployment.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use this skill when:
|
||||
- Writing or refactoring Terraform configurations
|
||||
- Creating reusable Terraform modules
|
||||
- Troubleshooting Terraform/Terragrunt errors
|
||||
- Managing Terraform state
|
||||
- Implementing IaC best practices
|
||||
- Setting up Terragrunt project structure
|
||||
- Reviewing infrastructure code
|
||||
- Debugging plan/apply issues
|
||||
|
||||
## Core Workflows
|
||||
|
||||
### 1. New Infrastructure Development
|
||||
|
||||
**Workflow Decision Tree:**
|
||||
|
||||
```
|
||||
Is this reusable across environments/projects?
|
||||
├─ Yes → Create a Terraform module
|
||||
│ └─ See "Creating Terraform Modules" below
|
||||
└─ No → Create environment-specific configuration
|
||||
└─ See "Environment Configuration" below
|
||||
```
|
||||
|
||||
#### Creating Terraform Modules
|
||||
|
||||
When building reusable infrastructure:
|
||||
|
||||
1. **Scaffold new module with script:**
|
||||
```bash
|
||||
python3 scripts/init_module.py my-module-name
|
||||
```
|
||||
|
||||
This automatically creates:
|
||||
- Standard module file structure
|
||||
- Template files with proper formatting
|
||||
- Examples directory
|
||||
- README with documentation
|
||||
|
||||
2. **Use module template structure:**
|
||||
- See `assets/templates/MODULE_TEMPLATE.md` for complete structure
|
||||
- Required files: `main.tf`, `variables.tf`, `outputs.tf`, `versions.tf`, `README.md`
|
||||
- Recommended: `examples/` directory with working examples
|
||||
|
||||
3. **Follow module best practices:**
|
||||
- Single responsibility - one module, one purpose
|
||||
- Sensible defaults for optional variables
|
||||
- Complete descriptions for all variables and outputs
|
||||
- Input validation using `validation` blocks
|
||||
- Mark sensitive values with `sensitive = true`
|
||||
|
||||
3. **Validate module:**
|
||||
```bash
|
||||
python3 scripts/validate_module.py /path/to/module
|
||||
```
|
||||
|
||||
This checks for:
|
||||
- Required files present
|
||||
- Variables have descriptions and types
|
||||
- Outputs have descriptions
|
||||
- README exists and is complete
|
||||
- Naming conventions followed
|
||||
- Sensitive values properly marked
|
||||
|
||||
4. **Test module:**
|
||||
```bash
|
||||
cd examples/complete
|
||||
terraform init
|
||||
terraform plan
|
||||
```
|
||||
|
||||
5. **Document module:**
|
||||
- Use terraform-docs to auto-generate: `terraform-docs markdown . > README.md`
|
||||
- Include usage examples
|
||||
- Document all inputs and outputs
|
||||
|
||||
**Key Module Patterns:**
|
||||
|
||||
See `references/best_practices.md` "Module Design" section for:
|
||||
- Composability patterns
|
||||
- Variable organization
|
||||
- Output design
|
||||
- Module versioning strategies
|
||||
|
||||
#### Environment Configuration
|
||||
|
||||
For environment-specific infrastructure:
|
||||
|
||||
1. **Structure by environment:**
|
||||
```
|
||||
environments/
|
||||
├── dev/
|
||||
├── staging/
|
||||
└── prod/
|
||||
```
|
||||
|
||||
2. **Use consistent file organization:**
|
||||
```
|
||||
environment/
|
||||
├── main.tf # Resource definitions
|
||||
├── variables.tf # Variable declarations
|
||||
├── terraform.tfvars # Default values (committed)
|
||||
├── secrets.auto.tfvars # Sensitive values (.gitignore)
|
||||
├── backend.tf # State configuration
|
||||
├── outputs.tf # Output values
|
||||
└── versions.tf # Version constraints
|
||||
```
|
||||
|
||||
3. **Reference modules:**
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "git::https://github.com/company/terraform-modules.git//vpc?ref=v1.2.0"
|
||||
|
||||
name = "${var.environment}-vpc"
|
||||
vpc_cidr = var.vpc_cidr
|
||||
environment = var.environment
|
||||
}
|
||||
```
|
||||
|
||||
### 2. State Management & Inspection
|
||||
|
||||
**When to inspect state:**
|
||||
- Before major changes
|
||||
- Investigating drift
|
||||
- Debugging resource issues
|
||||
- Auditing infrastructure
|
||||
|
||||
**Inspect state and check health:**
|
||||
```bash
|
||||
python3 scripts/inspect_state.py /path/to/terraform/directory
|
||||
```
|
||||
|
||||
**Check for drift:**
|
||||
```bash
|
||||
python3 scripts/inspect_state.py /path/to/terraform/directory --check-drift
|
||||
```
|
||||
|
||||
The script provides:
|
||||
- Resource count and types
|
||||
- Backend configuration
|
||||
- Provider versions
|
||||
- Issues with resources (tainted, etc.)
|
||||
- Drift detection (if requested)
|
||||
|
||||
**Manual state operations:**
|
||||
```bash
|
||||
# List all resources
|
||||
terraform state list
|
||||
|
||||
# Show specific resource
|
||||
terraform state show aws_instance.web
|
||||
|
||||
# Remove from state (doesn't destroy)
|
||||
terraform state rm aws_instance.web
|
||||
|
||||
# Move/rename resource
|
||||
terraform state mv aws_instance.web aws_instance.web_server
|
||||
|
||||
# Import existing resource
|
||||
terraform import aws_instance.web i-1234567890abcdef0
|
||||
```
|
||||
|
||||
**State best practices:** See `references/best_practices.md` "State Management" section for:
|
||||
- Remote backend setup (S3 + DynamoDB)
|
||||
- State file organization strategies
|
||||
- Encryption and security
|
||||
- Backup and recovery procedures
|
||||
|
||||
### 3. Standard Terraform Workflow
|
||||
|
||||
```bash
|
||||
# 1. Initialize (first time or after module changes)
|
||||
terraform init
|
||||
|
||||
# 2. Format code
|
||||
terraform fmt -recursive
|
||||
|
||||
# 3. Validate syntax
|
||||
terraform validate
|
||||
|
||||
# 4. Plan changes (always review!)
|
||||
terraform plan -out=tfplan
|
||||
|
||||
# 5. Apply changes
|
||||
terraform apply tfplan
|
||||
|
||||
# 6. Verify outputs
|
||||
terraform output
|
||||
```
|
||||
|
||||
**With Terragrunt:**
|
||||
```bash
|
||||
# Run for single module
|
||||
terragrunt plan
|
||||
terragrunt apply
|
||||
|
||||
# Run for all modules in directory tree
|
||||
terragrunt run-all plan
|
||||
terragrunt run-all apply
|
||||
```
|
||||
|
||||
### 4. Troubleshooting Issues
|
||||
|
||||
When encountering errors:
|
||||
|
||||
1. **Read the complete error message** - Don't skip details
|
||||
|
||||
2. **Check common issues:** See `references/troubleshooting.md` for:
|
||||
- State lock errors
|
||||
- State drift/corruption
|
||||
- Provider authentication failures
|
||||
- Resource errors (already exists, dependency errors, timeouts)
|
||||
- Module source issues
|
||||
- Terragrunt-specific issues (dependency cycles, hooks)
|
||||
- Performance problems
|
||||
|
||||
3. **Enable debug logging if needed:**
|
||||
```bash
|
||||
export TF_LOG=DEBUG
|
||||
export TF_LOG_PATH=terraform-debug.log
|
||||
terraform plan
|
||||
```
|
||||
|
||||
4. **Isolate the problem:**
|
||||
```bash
|
||||
# Test specific resource
|
||||
terraform plan -target=aws_instance.web
|
||||
terraform apply -target=aws_instance.web
|
||||
```
|
||||
|
||||
5. **Common quick fixes:**
|
||||
|
||||
**State locked:**
|
||||
```bash
|
||||
# Verify no one else running, then:
|
||||
terraform force-unlock <lock-id>
|
||||
```
|
||||
|
||||
**Provider cache issues:**
|
||||
```bash
|
||||
rm -rf .terraform
|
||||
terraform init -upgrade
|
||||
```
|
||||
|
||||
**Module cache issues:**
|
||||
```bash
|
||||
rm -rf .terraform/modules
|
||||
terraform init
|
||||
```
|
||||
|
||||
### 5. Code Review & Quality
|
||||
|
||||
**Before committing:**
|
||||
|
||||
1. **Format code:**
|
||||
```bash
|
||||
terraform fmt -recursive
|
||||
```
|
||||
|
||||
2. **Validate syntax:**
|
||||
```bash
|
||||
terraform validate
|
||||
```
|
||||
|
||||
3. **Lint with tflint:**
|
||||
```bash
|
||||
tflint --module
|
||||
```
|
||||
|
||||
4. **Security scan with checkov:**
|
||||
```bash
|
||||
checkov -d .
|
||||
```
|
||||
|
||||
5. **Validate modules:**
|
||||
```bash
|
||||
python3 scripts/validate_module.py modules/vpc
|
||||
```
|
||||
|
||||
6. **Generate documentation:**
|
||||
```bash
|
||||
terraform-docs markdown modules/vpc > modules/vpc/README.md
|
||||
```
|
||||
|
||||
**Review checklist:**
|
||||
- [ ] All variables have descriptions
|
||||
- [ ] Sensitive values marked as sensitive
|
||||
- [ ] Outputs have descriptions
|
||||
- [ ] Resources follow naming conventions
|
||||
- [ ] No hardcoded values (use variables)
|
||||
- [ ] README is complete and current
|
||||
- [ ] Examples directory exists and works
|
||||
- [ ] Version constraints specified
|
||||
- [ ] Security best practices followed
|
||||
|
||||
See `references/best_practices.md` for comprehensive guidelines.
|
||||
|
||||
## Terragrunt Patterns
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
terragrunt-project/
|
||||
├── terragrunt.hcl # Root config
|
||||
├── account.hcl # Account-level vars
|
||||
├── region.hcl # Region-level vars
|
||||
└── environments/
|
||||
├── dev/
|
||||
│ ├── env.hcl # Environment vars
|
||||
│ └── us-east-1/
|
||||
│ ├── vpc/
|
||||
│ │ └── terragrunt.hcl
|
||||
│ └── eks/
|
||||
│ └── terragrunt.hcl
|
||||
└── prod/
|
||||
└── us-east-1/
|
||||
├── vpc/
|
||||
└── eks/
|
||||
```
|
||||
|
||||
### Dependency Management
|
||||
|
||||
```hcl
|
||||
# In eks/terragrunt.hcl
|
||||
dependency "vpc" {
|
||||
config_path = "../vpc"
|
||||
|
||||
# Mock outputs for plan/validate
|
||||
mock_outputs = {
|
||||
vpc_id = "vpc-mock"
|
||||
subnet_ids = ["subnet-mock"]
|
||||
}
|
||||
mock_outputs_allowed_terraform_commands = ["validate", "plan"]
|
||||
}
|
||||
|
||||
inputs = {
|
||||
vpc_id = dependency.vpc.outputs.vpc_id
|
||||
subnet_ids = dependency.vpc.outputs.private_subnet_ids
|
||||
}
|
||||
```
|
||||
|
||||
### Common Patterns
|
||||
|
||||
See `assets/templates/MODULE_TEMPLATE.md` for complete Terragrunt configuration templates including:
|
||||
- Root terragrunt.hcl with provider generation
|
||||
- Remote state configuration
|
||||
- Module-level terragrunt.hcl patterns
|
||||
- Dependency handling
|
||||
|
||||
## Reference Documentation
|
||||
|
||||
### references/best_practices.md
|
||||
|
||||
Comprehensive best practices covering:
|
||||
- **Project Structure** - Recommended directory layouts
|
||||
- **State Management** - Remote state, locking, organization
|
||||
- **Module Design** - Single responsibility, composability, versioning
|
||||
- **Variable Management** - Declarations, files hierarchy, secrets
|
||||
- **Resource Naming** - Conventions and standards
|
||||
- **Security Practices** - Least privilege, encryption, secret management
|
||||
- **Testing & Validation** - Tools and approaches
|
||||
- **CI/CD Integration** - Pipeline patterns
|
||||
|
||||
Read this when:
|
||||
- Setting up new Terraform projects
|
||||
- Establishing team standards
|
||||
- Designing reusable modules
|
||||
- Implementing security controls
|
||||
- Setting up CI/CD pipelines
|
||||
|
||||
### references/troubleshooting.md
|
||||
|
||||
Detailed troubleshooting guide for:
|
||||
- **State Issues** - Lock errors, drift, corruption
|
||||
- **Provider Issues** - Version conflicts, authentication
|
||||
- **Resource Errors** - Already exists, dependencies, timeouts
|
||||
- **Module Issues** - Source not found, version conflicts
|
||||
- **Terragrunt Specific** - Dependency cycles, hooks
|
||||
- **Performance Issues** - Slow plans, optimization strategies
|
||||
|
||||
Read this when:
|
||||
- Encountering specific error messages
|
||||
- Investigating unexpected behavior
|
||||
- Debugging failed deployments
|
||||
- Performance tuning
|
||||
|
||||
Each issue includes:
|
||||
- Symptom description
|
||||
- Common causes
|
||||
- Step-by-step resolution
|
||||
- Prevention strategies
|
||||
|
||||
### references/cost_optimization.md
|
||||
|
||||
Cloud cost optimization strategies for Terraform-managed infrastructure:
|
||||
- **Right-Sizing Resources** - Compute, database, and storage optimization
|
||||
- **Spot and Reserved Instances** - Cost-effective instance strategies
|
||||
- **Storage Optimization** - S3 lifecycle policies, EBS volume types
|
||||
- **Networking Costs** - VPC endpoints, data transfer optimization
|
||||
- **Resource Lifecycle** - Scheduled shutdown, cleanup automation
|
||||
- **Cost Tagging** - Comprehensive tagging for cost allocation
|
||||
- **Monitoring and Alerts** - Budget alerts, anomaly detection
|
||||
- **Multi-Cloud** - Azure, GCP cost optimization patterns
|
||||
|
||||
Read this when:
|
||||
- Planning infrastructure to minimize costs
|
||||
- Conducting cost reviews or optimization initiatives
|
||||
- Implementing auto-scaling and scheduling
|
||||
- Setting up cost monitoring and alerts
|
||||
- Designing cost-effective architectures
|
||||
|
||||
## CI/CD Workflows
|
||||
|
||||
Ready-to-use CI/CD pipeline templates in `assets/workflows/`:
|
||||
|
||||
### github-actions-terraform.yml
|
||||
|
||||
Complete GitHub Actions workflow including:
|
||||
- Terraform validation and formatting checks
|
||||
- TFLint linting
|
||||
- Checkov security scanning
|
||||
- Terraform plan on PRs with comment posting
|
||||
- Terraform apply on main branch with approval
|
||||
- OIDC authentication support
|
||||
|
||||
### github-actions-terragrunt.yml
|
||||
|
||||
Terragrunt-specific workflow featuring:
|
||||
- Changed module detection
|
||||
- Multi-module parallel planning
|
||||
- Run-all commands
|
||||
- Dependency-aware apply ordering
|
||||
- Manual workflow dispatch with environment selection
|
||||
|
||||
### gitlab-ci-terraform.yml
|
||||
|
||||
GitLab CI/CD pipeline with:
|
||||
- Multi-stage pipeline (validate, lint, security, plan, apply)
|
||||
- Artifact management
|
||||
- Manual deployment gates
|
||||
- Multi-environment configuration examples
|
||||
|
||||
Use these templates as starting points for your CI/CD pipelines. Customize based on your:
|
||||
- Cloud provider and authentication method
|
||||
- Repository structure
|
||||
- Team approval workflows
|
||||
- Environment promotion strategy
|
||||
|
||||
## Scripts
|
||||
|
||||
### init_module.py
|
||||
|
||||
Scaffolds a new Terraform module with proper structure and template files.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Create module in current directory
|
||||
python3 scripts/init_module.py my-vpc
|
||||
|
||||
# Create in specific path
|
||||
python3 scripts/init_module.py my-vpc --path ./modules
|
||||
|
||||
# Get JSON output
|
||||
python3 scripts/init_module.py my-vpc --json
|
||||
```
|
||||
|
||||
**Creates:**
|
||||
- `main.tf` - Resource definitions with TODO placeholders
|
||||
- `variables.tf` - Input variables with validation examples
|
||||
- `outputs.tf` - Output values with descriptions
|
||||
- `versions.tf` - Terraform and provider version constraints
|
||||
- `README.md` - Module documentation template
|
||||
- `examples/complete/` - Complete usage example
|
||||
|
||||
**Use when:**
|
||||
- Starting a new Terraform module
|
||||
- Ensuring consistent module structure across team
|
||||
- Quickly bootstrapping module development
|
||||
- Teaching module best practices
|
||||
|
||||
### inspect_state.py
|
||||
|
||||
Comprehensive state inspection and health check.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
# Basic inspection
|
||||
python3 scripts/inspect_state.py /path/to/terraform
|
||||
|
||||
# Include drift detection
|
||||
python3 scripts/inspect_state.py /path/to/terraform --check-drift
|
||||
```
|
||||
|
||||
**Provides:**
|
||||
- State health status
|
||||
- Resource counts and types
|
||||
- Provider versions
|
||||
- Backend configuration
|
||||
- Resource issues (tainted, etc.)
|
||||
- Configuration drift detection (optional)
|
||||
- Actionable recommendations
|
||||
|
||||
**Use when:**
|
||||
- Before major infrastructure changes
|
||||
- Investigating resource issues
|
||||
- Auditing infrastructure state
|
||||
- Detecting configuration drift
|
||||
|
||||
### validate_module.py
|
||||
|
||||
Validates Terraform modules against best practices.
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
python3 scripts/validate_module.py /path/to/module
|
||||
```
|
||||
|
||||
**Checks:**
|
||||
- Required files present (main.tf, variables.tf, outputs.tf)
|
||||
- Variable descriptions and types
|
||||
- Output descriptions
|
||||
- Sensitive value handling
|
||||
- README completeness
|
||||
- Version constraints
|
||||
- Example configurations
|
||||
- Naming conventions
|
||||
- Hard-coded values that should be variables
|
||||
|
||||
**Returns:**
|
||||
- Issues (must fix)
|
||||
- Warnings (should fix)
|
||||
- Suggestions (consider)
|
||||
|
||||
**Use when:**
|
||||
- Creating new modules
|
||||
- Reviewing module code
|
||||
- Before releasing module versions
|
||||
- Establishing quality standards
|
||||
|
||||
## Assets
|
||||
|
||||
### templates/MODULE_TEMPLATE.md
|
||||
|
||||
Complete Terraform module template including:
|
||||
- File-by-file structure and examples
|
||||
- main.tf patterns
|
||||
- variables.tf with validation
|
||||
- outputs.tf best practices
|
||||
- versions.tf constraints
|
||||
- README.md template
|
||||
- Example usage configurations
|
||||
- Terragrunt configuration templates
|
||||
|
||||
**Use this when:**
|
||||
- Creating new modules from scratch
|
||||
- Standardizing module structure
|
||||
- Onboarding team members
|
||||
- Establishing module conventions
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Essential Commands
|
||||
|
||||
```bash
|
||||
# Initialize
|
||||
terraform init
|
||||
terraform init -upgrade # Update providers
|
||||
|
||||
# Validate
|
||||
terraform validate
|
||||
terraform fmt -recursive
|
||||
|
||||
# Plan
|
||||
terraform plan
|
||||
terraform plan -out=tfplan
|
||||
|
||||
# Apply
|
||||
terraform apply
|
||||
terraform apply tfplan
|
||||
terraform apply -auto-approve # CI/CD only
|
||||
|
||||
# State
|
||||
terraform state list
|
||||
terraform state show <resource>
|
||||
terraform state rm <resource>
|
||||
terraform state mv <old> <new>
|
||||
|
||||
# Import
|
||||
terraform import <resource_address> <resource_id>
|
||||
|
||||
# Destroy
|
||||
terraform destroy
|
||||
terraform destroy -target=<resource>
|
||||
|
||||
# Outputs
|
||||
terraform output
|
||||
terraform output <output_name>
|
||||
```
|
||||
|
||||
### Terragrunt Commands
|
||||
|
||||
```bash
|
||||
# Single module
|
||||
terragrunt init
|
||||
terragrunt plan
|
||||
terragrunt apply
|
||||
|
||||
# All modules
|
||||
terragrunt run-all plan
|
||||
terragrunt run-all apply
|
||||
terragrunt run-all destroy
|
||||
|
||||
# With specific modules
|
||||
terragrunt run-all apply --terragrunt-include-dir vpc --terragrunt-include-dir eks
|
||||
```
|
||||
|
||||
## Best Practices Summary
|
||||
|
||||
**Always:**
|
||||
- Use remote state with locking
|
||||
- Plan before apply (review changes)
|
||||
- Pin Terraform and provider versions
|
||||
- Use modules for reusable components
|
||||
- Mark sensitive values as sensitive
|
||||
- Document everything
|
||||
- Test in non-production first
|
||||
|
||||
**Never:**
|
||||
- Commit secrets or credentials
|
||||
- Manually edit state files
|
||||
- Use root AWS credentials
|
||||
- Skip code review for production changes
|
||||
- Deploy without testing
|
||||
- Ignore security scan findings
|
||||
|
||||
**Key Principles:**
|
||||
- Infrastructure as code (everything in version control)
|
||||
- DRY (Don't Repeat Yourself) - use modules
|
||||
- Immutable infrastructure
|
||||
- Environment parity (dev/staging/prod similar)
|
||||
- Security by default
|
||||
- Document for future you
|
||||
386
skills/assets/templates/MODULE_TEMPLATE.md
Normal file
386
skills/assets/templates/MODULE_TEMPLATE.md
Normal file
@@ -0,0 +1,386 @@
|
||||
# Terraform Module Template
|
||||
|
||||
This directory contains templates for creating well-structured Terraform modules.
|
||||
|
||||
## Module Structure
|
||||
|
||||
```
|
||||
module-name/
|
||||
├── main.tf # Primary resource definitions
|
||||
├── variables.tf # Input variables
|
||||
├── outputs.tf # Output values
|
||||
├── versions.tf # Version constraints
|
||||
├── README.md # Module documentation
|
||||
└── examples/ # Usage examples
|
||||
└── complete/
|
||||
├── main.tf
|
||||
├── variables.tf
|
||||
└── outputs.tf
|
||||
```
|
||||
|
||||
## Template Files
|
||||
|
||||
### main.tf
|
||||
```hcl
|
||||
# Main resource definitions
|
||||
terraform {
|
||||
# No backend configuration in modules
|
||||
}
|
||||
|
||||
# Example: VPC Module
|
||||
resource "aws_vpc" "main" {
|
||||
cidr_block = var.vpc_cidr
|
||||
enable_dns_hostnames = var.enable_dns_hostnames
|
||||
enable_dns_support = var.enable_dns_support
|
||||
|
||||
tags = merge(
|
||||
var.tags,
|
||||
{
|
||||
Name = var.name
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
# Use locals for computed values
|
||||
locals {
|
||||
availability_zones = slice(data.aws_availability_zones.available.names, 0, var.az_count)
|
||||
}
|
||||
```
|
||||
|
||||
### variables.tf
|
||||
```hcl
|
||||
variable "name" {
|
||||
description = "Name to be used on all resources as prefix"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "vpc_cidr" {
|
||||
description = "CIDR block for VPC"
|
||||
type = string
|
||||
|
||||
validation {
|
||||
condition = can(cidrhost(var.vpc_cidr, 0))
|
||||
error_message = "Must be a valid IPv4 CIDR block."
|
||||
}
|
||||
}
|
||||
|
||||
variable "enable_dns_hostnames" {
|
||||
description = "Enable DNS hostnames in the VPC"
|
||||
type = bool
|
||||
default = true
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "A map of tags to add to all resources"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
||||
# For sensitive values
|
||||
variable "database_password" {
|
||||
description = "Master password for the database"
|
||||
type = string
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# With validation
|
||||
variable "environment" {
|
||||
description = "Environment name"
|
||||
type = string
|
||||
|
||||
validation {
|
||||
condition = contains(["dev", "staging", "prod"], var.environment)
|
||||
error_message = "Environment must be one of: dev, staging, prod."
|
||||
}
|
||||
}
|
||||
|
||||
# Complex types
|
||||
variable "subnets" {
|
||||
description = "Map of subnet configurations"
|
||||
type = map(object({
|
||||
cidr_block = string
|
||||
availability_zone = string
|
||||
public = bool
|
||||
}))
|
||||
default = {}
|
||||
}
|
||||
```
|
||||
|
||||
### outputs.tf
|
||||
```hcl
|
||||
output "vpc_id" {
|
||||
description = "The ID of the VPC"
|
||||
value = aws_vpc.main.id
|
||||
}
|
||||
|
||||
output "vpc_cidr" {
|
||||
description = "The CIDR block of the VPC"
|
||||
value = aws_vpc.main.cidr_block
|
||||
}
|
||||
|
||||
output "private_subnet_ids" {
|
||||
description = "List of IDs of private subnets"
|
||||
value = aws_subnet.private[*].id
|
||||
}
|
||||
|
||||
# Sensitive outputs
|
||||
output "database_endpoint" {
|
||||
description = "Database connection endpoint"
|
||||
value = aws_db_instance.main.endpoint
|
||||
sensitive = true
|
||||
}
|
||||
|
||||
# Complex outputs
|
||||
output "subnet_details" {
|
||||
description = "Detailed information about all subnets"
|
||||
value = {
|
||||
for subnet in aws_subnet.main :
|
||||
subnet.id => {
|
||||
cidr_block = subnet.cidr_block
|
||||
availability_zone = subnet.availability_zone
|
||||
public = subnet.map_public_ip_on_launch
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### versions.tf
|
||||
```hcl
|
||||
terraform {
|
||||
required_version = ">= 1.3.0"
|
||||
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = ">= 5.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### README.md Template
|
||||
```markdown
|
||||
# Module Name
|
||||
|
||||
Brief description of what this module does.
|
||||
|
||||
## Usage
|
||||
|
||||
\`\`\`hcl
|
||||
module "example" {
|
||||
source = "./modules/module-name"
|
||||
|
||||
name = "my-resource"
|
||||
vpc_cidr = "10.0.0.0/16"
|
||||
environment = "prod"
|
||||
|
||||
tags = {
|
||||
Environment = "prod"
|
||||
Project = "example"
|
||||
}
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
## Examples
|
||||
|
||||
- [Complete](./examples/complete) - Full example with all options
|
||||
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.3.0 |
|
||||
| aws | >= 5.0.0 |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| name | Resource name | `string` | n/a | yes |
|
||||
| vpc_cidr | VPC CIDR block | `string` | n/a | yes |
|
||||
| environment | Environment name | `string` | n/a | yes |
|
||||
| tags | Common tags | `map(string)` | `{}` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| vpc_id | VPC identifier |
|
||||
| private_subnet_ids | List of private subnet IDs |
|
||||
|
||||
## Authors
|
||||
|
||||
Module is maintained by [Your Team].
|
||||
|
||||
## License
|
||||
|
||||
Apache 2 Licensed. See LICENSE for full details.
|
||||
\`\`\`
|
||||
|
||||
## Example: Complete Usage Example
|
||||
|
||||
### examples/complete/main.tf
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "../../"
|
||||
|
||||
name = "example-vpc"
|
||||
vpc_cidr = "10.0.0.0/16"
|
||||
environment = "dev"
|
||||
enable_dns_hostnames = true
|
||||
|
||||
tags = {
|
||||
Environment = "dev"
|
||||
Project = "example"
|
||||
ManagedBy = "Terraform"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### examples/complete/outputs.tf
|
||||
```hcl
|
||||
output "vpc_id" {
|
||||
description = "The ID of the VPC"
|
||||
value = module.vpc.vpc_id
|
||||
}
|
||||
```
|
||||
|
||||
### examples/complete/variables.tf
|
||||
```hcl
|
||||
variable "region" {
|
||||
description = "AWS region"
|
||||
type = string
|
||||
default = "us-east-1"
|
||||
}
|
||||
```
|
||||
|
||||
## Terragrunt Configuration Template
|
||||
|
||||
### terragrunt.hcl (root)
|
||||
```hcl
|
||||
# Root terragrunt.hcl
|
||||
locals {
|
||||
# Load account-level variables
|
||||
account_vars = read_terragrunt_config(find_in_parent_folders("account.hcl"))
|
||||
|
||||
# Load region-level variables
|
||||
region_vars = read_terragrunt_config(find_in_parent_folders("region.hcl"))
|
||||
|
||||
# Load environment variables
|
||||
environment_vars = read_terragrunt_config(find_in_parent_folders("env.hcl"))
|
||||
|
||||
# Extract commonly used variables
|
||||
account_name = local.account_vars.locals.account_name
|
||||
account_id = local.account_vars.locals.account_id
|
||||
aws_region = local.region_vars.locals.aws_region
|
||||
environment = local.environment_vars.locals.environment
|
||||
}
|
||||
|
||||
# Generate provider configuration
|
||||
generate "provider" {
|
||||
path = "provider.tf"
|
||||
if_exists = "overwrite_terragrunt"
|
||||
contents = <<EOF
|
||||
provider "aws" {
|
||||
region = "${local.aws_region}"
|
||||
|
||||
assume_role {
|
||||
role_arn = "arn:aws:iam::${local.account_id}:role/TerraformRole"
|
||||
}
|
||||
|
||||
default_tags {
|
||||
tags = {
|
||||
Environment = "${local.environment}"
|
||||
ManagedBy = "Terragrunt"
|
||||
Account = "${local.account_name}"
|
||||
}
|
||||
}
|
||||
}
|
||||
EOF
|
||||
}
|
||||
|
||||
# Configure S3 backend
|
||||
remote_state {
|
||||
backend = "s3"
|
||||
|
||||
generate = {
|
||||
path = "backend.tf"
|
||||
if_exists = "overwrite_terragrunt"
|
||||
}
|
||||
|
||||
config = {
|
||||
bucket = "${local.account_name}-terraform-state"
|
||||
key = "${path_relative_to_include()}/terraform.tfstate"
|
||||
region = local.aws_region
|
||||
encrypt = true
|
||||
dynamodb_table = "${local.account_name}-terraform-lock"
|
||||
|
||||
s3_bucket_tags = {
|
||||
Name = "Terraform State"
|
||||
Environment = local.environment
|
||||
}
|
||||
|
||||
dynamodb_table_tags = {
|
||||
Name = "Terraform Lock"
|
||||
Environment = local.environment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Global inputs for all modules
|
||||
inputs = {
|
||||
account_name = local.account_name
|
||||
account_id = local.account_id
|
||||
aws_region = local.aws_region
|
||||
environment = local.environment
|
||||
}
|
||||
```
|
||||
|
||||
### terragrunt.hcl (module level)
|
||||
```hcl
|
||||
# Include root configuration
|
||||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
# Define terraform source
|
||||
terraform {
|
||||
source = "git::https://github.com/company/terraform-modules.git//vpc?ref=v1.0.0"
|
||||
}
|
||||
|
||||
# Dependencies on other modules
|
||||
dependency "iam" {
|
||||
config_path = "../iam"
|
||||
|
||||
mock_outputs = {
|
||||
role_arn = "arn:aws:iam::123456789012:role/mock"
|
||||
}
|
||||
mock_outputs_allowed_terraform_commands = ["validate", "plan"]
|
||||
}
|
||||
|
||||
# Module-specific inputs
|
||||
inputs = {
|
||||
name = "my-vpc"
|
||||
vpc_cidr = "10.0.0.0/16"
|
||||
|
||||
# Use dependency outputs
|
||||
iam_role_arn = dependency.iam.outputs.role_arn
|
||||
|
||||
# Module-specific tags
|
||||
tags = {
|
||||
Component = "networking"
|
||||
Module = "vpc"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always include descriptions** for variables and outputs
|
||||
2. **Use validation blocks** for important variables
|
||||
3. **Mark sensitive values** as sensitive
|
||||
4. **Provide sensible defaults** where appropriate
|
||||
5. **Document everything** in README
|
||||
6. **Include usage examples** in examples/ directory
|
||||
7. **Version your modules** using Git tags
|
||||
8. **Test modules** before tagging new versions
|
||||
224
skills/assets/workflows/github-actions-terraform.yml
Normal file
224
skills/assets/workflows/github-actions-terraform.yml
Normal file
@@ -0,0 +1,224 @@
|
||||
name: Terraform CI/CD
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- '**.tf'
|
||||
- '**.tfvars'
|
||||
- '.github/workflows/terraform.yml'
|
||||
push:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- '**.tf'
|
||||
- '**.tfvars'
|
||||
workflow_dispatch:
|
||||
|
||||
env:
|
||||
TF_VERSION: '1.5.0'
|
||||
TF_WORKING_DIR: '.' # Change to your terraform directory
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write # Required for OIDC
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
name: Validate
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Terraform Format Check
|
||||
id: fmt
|
||||
run: terraform fmt -check -recursive
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
continue-on-error: true
|
||||
|
||||
- name: Terraform Init
|
||||
id: init
|
||||
run: terraform init -backend=false
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Terraform Validate
|
||||
id: validate
|
||||
run: terraform validate -no-color
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Comment PR - Validation Results
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const output = `#### Terraform Format and Style 🖌\`${{ steps.fmt.outcome }}\`
|
||||
#### Terraform Initialization ⚙️\`${{ steps.init.outcome }}\`
|
||||
#### Terraform Validation 🤖\`${{ steps.validate.outcome }}\`
|
||||
|
||||
*Pusher: @${{ github.actor }}, Action: \`${{ github.event_name }}\`, Working Directory: \`${{ env.TF_WORKING_DIR }}\`, Workflow: \`${{ github.workflow }}\`*`;
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: output
|
||||
})
|
||||
|
||||
lint:
|
||||
name: Lint
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup TFLint
|
||||
uses: terraform-linters/setup-tflint@v4
|
||||
|
||||
- name: Init TFLint
|
||||
run: tflint --init
|
||||
|
||||
- name: Run TFLint
|
||||
run: tflint -f compact
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
security:
|
||||
name: Security Scan
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Run Checkov
|
||||
uses: bridgecrewio/checkov-action@master
|
||||
with:
|
||||
directory: ${{ env.TF_WORKING_DIR }}
|
||||
framework: terraform
|
||||
output_format: sarif
|
||||
output_file_path: reports/checkov.sarif
|
||||
soft_fail: true # Don't fail the build, just report
|
||||
|
||||
- name: Upload Checkov Results
|
||||
if: always()
|
||||
uses: github/codeql-action/upload-sarif@v3
|
||||
with:
|
||||
sarif_file: reports/checkov.sarif
|
||||
|
||||
plan:
|
||||
name: Plan
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate, lint]
|
||||
if: github.event_name == 'pull_request'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS Credentials (OIDC)
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
|
||||
aws-region: us-east-1
|
||||
|
||||
# Alternative: Use access keys (not recommended)
|
||||
# - name: Configure AWS Credentials
|
||||
# uses: aws-actions/configure-aws-credentials@v4
|
||||
# with:
|
||||
# aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||
# aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||
# aws-region: us-east-1
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Terraform Init
|
||||
run: terraform init
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Terraform Plan
|
||||
id: plan
|
||||
run: |
|
||||
terraform plan -no-color -out=tfplan
|
||||
terraform show -no-color tfplan > plan_output.txt
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Comment PR - Plan
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const plan = fs.readFileSync('${{ env.TF_WORKING_DIR }}/plan_output.txt', 'utf8');
|
||||
const maxLength = 65000;
|
||||
const truncatedPlan = plan.length > maxLength ? plan.substring(0, maxLength) + '\n... (truncated)' : plan;
|
||||
|
||||
const output = `#### Terraform Plan 📖
|
||||
<details><summary>Show Plan</summary>
|
||||
|
||||
\`\`\`terraform
|
||||
${truncatedPlan}
|
||||
\`\`\`
|
||||
|
||||
</details>
|
||||
|
||||
*Pusher: @${{ github.actor }}, Action: \`${{ github.event_name }}\`, Working Directory: \`${{ env.TF_WORKING_DIR }}\`*`;
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: output
|
||||
})
|
||||
|
||||
- name: Upload Plan
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: tfplan
|
||||
path: ${{ env.TF_WORKING_DIR }}/tfplan
|
||||
retention-days: 5
|
||||
|
||||
apply:
|
||||
name: Apply
|
||||
runs-on: ubuntu-latest
|
||||
needs: [validate, lint, security]
|
||||
if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')
|
||||
environment: production # Requires approval in GitHub settings
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS Credentials (OIDC)
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
|
||||
- name: Terraform Init
|
||||
run: terraform init
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Terraform Apply
|
||||
run: terraform apply -auto-approve
|
||||
working-directory: ${{ env.TF_WORKING_DIR }}
|
||||
|
||||
- name: Notify Success
|
||||
if: success()
|
||||
run: |
|
||||
echo "✅ Terraform apply completed successfully"
|
||||
# Add notification logic here (Slack, email, etc.)
|
||||
|
||||
- name: Notify Failure
|
||||
if: failure()
|
||||
run: |
|
||||
echo "❌ Terraform apply failed"
|
||||
# Add notification logic here (Slack, email, etc.)
|
||||
236
skills/assets/workflows/github-actions-terragrunt.yml
Normal file
236
skills/assets/workflows/github-actions-terragrunt.yml
Normal file
@@ -0,0 +1,236 @@
|
||||
name: Terragrunt CI/CD
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- '**.tf'
|
||||
- '**.hcl'
|
||||
- '**.tfvars'
|
||||
push:
|
||||
branches: [main, master]
|
||||
paths:
|
||||
- '**.tf'
|
||||
- '**.hcl'
|
||||
- '**.tfvars'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
environment:
|
||||
description: 'Environment to deploy'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- dev
|
||||
- staging
|
||||
- prod
|
||||
action:
|
||||
description: 'Action to perform'
|
||||
required: true
|
||||
type: choice
|
||||
options:
|
||||
- plan
|
||||
- apply
|
||||
- destroy
|
||||
|
||||
env:
|
||||
TF_VERSION: '1.5.0'
|
||||
TG_VERSION: '0.55.0'
|
||||
WORKING_DIR: 'environments' # Base directory for environments
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
id-token: write
|
||||
|
||||
jobs:
|
||||
detect-changes:
|
||||
name: Detect Changed Modules
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
has-changes: ${{ steps.set-matrix.outputs.has-changes }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get Changed Files
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@v42
|
||||
with:
|
||||
files: |
|
||||
**/*.tf
|
||||
**/*.hcl
|
||||
**/*.tfvars
|
||||
|
||||
- name: Set Matrix
|
||||
id: set-matrix
|
||||
run: |
|
||||
# Find all directories with terragrunt.hcl that have changes
|
||||
changed_dirs=$(echo "${{ steps.changed-files.outputs.all_changed_files }}" | tr ' ' '\n' | xargs -I {} dirname {} | grep -E "environments/(dev|staging|prod)" | sort -u | jq -R -s -c 'split("\n")[:-1]')
|
||||
|
||||
if [ "$changed_dirs" = "[]" ]; then
|
||||
echo "has-changes=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "has-changes=true" >> $GITHUB_OUTPUT
|
||||
echo "matrix={\"directory\":$changed_dirs}" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
validate:
|
||||
name: Validate
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'pull_request'
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
terraform_wrapper: false
|
||||
|
||||
- name: Setup Terragrunt
|
||||
run: |
|
||||
wget -q https://github.com/gruntwork-io/terragrunt/releases/download/v${{ env.TG_VERSION }}/terragrunt_linux_amd64
|
||||
chmod +x terragrunt_linux_amd64
|
||||
sudo mv terragrunt_linux_amd64 /usr/local/bin/terragrunt
|
||||
terragrunt --version
|
||||
|
||||
- name: Terragrunt Format Check
|
||||
run: terragrunt hclfmt --terragrunt-check
|
||||
working-directory: ${{ env.WORKING_DIR }}
|
||||
continue-on-error: true
|
||||
|
||||
- name: Terragrunt Validate All
|
||||
run: terragrunt run-all validate --terragrunt-non-interactive
|
||||
working-directory: ${{ env.WORKING_DIR }}
|
||||
|
||||
plan:
|
||||
name: Plan - ${{ matrix.directory }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [detect-changes, validate]
|
||||
if: needs.detect-changes.outputs.has-changes == 'true'
|
||||
strategy:
|
||||
matrix: ${{ fromJson(needs.detect-changes.outputs.matrix) }}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
terraform_wrapper: false
|
||||
|
||||
- name: Setup Terragrunt
|
||||
run: |
|
||||
wget -q https://github.com/gruntwork-io/terragrunt/releases/download/v${{ env.TG_VERSION }}/terragrunt_linux_amd64
|
||||
chmod +x terragrunt_linux_amd64
|
||||
sudo mv terragrunt_linux_amd64 /usr/local/bin/terragrunt
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Terragrunt Plan
|
||||
id: plan
|
||||
run: |
|
||||
terragrunt run-all plan --terragrunt-non-interactive -out=tfplan 2>&1 | tee plan_output.txt
|
||||
working-directory: ${{ matrix.directory }}
|
||||
continue-on-error: true
|
||||
|
||||
- name: Upload Plan Output
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: plan-${{ hashFiles(matrix.directory) }}
|
||||
path: ${{ matrix.directory }}/plan_output.txt
|
||||
retention-days: 5
|
||||
|
||||
- name: Comment PR
|
||||
if: github.event_name == 'pull_request'
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
script: |
|
||||
const fs = require('fs');
|
||||
const planPath = '${{ matrix.directory }}/plan_output.txt';
|
||||
let plan = 'Plan output not available';
|
||||
|
||||
try {
|
||||
plan = fs.readFileSync(planPath, 'utf8');
|
||||
const maxLength = 65000;
|
||||
plan = plan.length > maxLength ? plan.substring(0, maxLength) + '\n... (truncated)' : plan;
|
||||
} catch (error) {
|
||||
plan = 'Error reading plan output: ' + error.message;
|
||||
}
|
||||
|
||||
const output = `#### Terragrunt Plan for \`${{ matrix.directory }}\`
|
||||
<details><summary>Show Plan</summary>
|
||||
|
||||
\`\`\`terraform
|
||||
${plan}
|
||||
\`\`\`
|
||||
|
||||
</details>
|
||||
|
||||
*Workflow: ${{ github.workflow }}, Actor: @${{ github.actor }}*`;
|
||||
|
||||
github.rest.issues.createComment({
|
||||
issue_number: context.issue.number,
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
body: output
|
||||
});
|
||||
|
||||
apply:
|
||||
name: Apply - ${{ matrix.directory }}
|
||||
runs-on: ubuntu-latest
|
||||
needs: [detect-changes]
|
||||
if: |
|
||||
(github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master')) ||
|
||||
(github.event_name == 'workflow_dispatch' && github.event.inputs.action == 'apply')
|
||||
strategy:
|
||||
matrix:
|
||||
directory: ${{ github.event_name == 'workflow_dispatch' && fromJson(format('["environments/{0}"]', github.event.inputs.environment)) || fromJson(needs.detect-changes.outputs.matrix).directory }}
|
||||
fail-fast: false
|
||||
max-parallel: 1 # Apply one at a time
|
||||
environment:
|
||||
name: production # Configure approval in GitHub settings
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
with:
|
||||
terraform_version: ${{ env.TF_VERSION }}
|
||||
terraform_wrapper: false
|
||||
|
||||
- name: Setup Terragrunt
|
||||
run: |
|
||||
wget -q https://github.com/gruntwork-io/terragrunt/releases/download/v${{ env.TG_VERSION }}/terragrunt_linux_amd64
|
||||
chmod +x terragrunt_linux_amd64
|
||||
sudo mv terragrunt_linux_amd64 /usr/local/bin/terragrunt
|
||||
|
||||
- name: Configure AWS Credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ secrets.AWS_ROLE_ARN }}
|
||||
aws-region: us-east-1
|
||||
|
||||
- name: Terragrunt Apply
|
||||
run: |
|
||||
terragrunt run-all apply --terragrunt-non-interactive -auto-approve
|
||||
working-directory: ${{ matrix.directory }}
|
||||
|
||||
- name: Notify Success
|
||||
if: success()
|
||||
run: echo "✅ Terragrunt apply completed for ${{ matrix.directory }}"
|
||||
|
||||
- name: Notify Failure
|
||||
if: failure()
|
||||
run: echo "❌ Terragrunt apply failed for ${{ matrix.directory }}"
|
||||
184
skills/assets/workflows/gitlab-ci-terraform.yml
Normal file
184
skills/assets/workflows/gitlab-ci-terraform.yml
Normal file
@@ -0,0 +1,184 @@
|
||||
# GitLab CI/CD Pipeline for Terraform
|
||||
|
||||
variables:
|
||||
TF_VERSION: "1.5.0"
|
||||
TF_ROOT: ${CI_PROJECT_DIR} # Change to your terraform directory
|
||||
TF_STATE_NAME: default
|
||||
|
||||
image:
|
||||
name: hashicorp/terraform:$TF_VERSION
|
||||
entrypoint: [""]
|
||||
|
||||
cache:
|
||||
key: "$CI_COMMIT_REF_SLUG"
|
||||
paths:
|
||||
- ${TF_ROOT}/.terraform
|
||||
|
||||
before_script:
|
||||
- cd ${TF_ROOT}
|
||||
- terraform --version
|
||||
|
||||
stages:
|
||||
- validate
|
||||
- lint
|
||||
- security
|
||||
- plan
|
||||
- apply
|
||||
|
||||
# Validate Terraform configuration
|
||||
validate:
|
||||
stage: validate
|
||||
script:
|
||||
- terraform fmt -check -recursive
|
||||
- terraform init -backend=false
|
||||
- terraform validate
|
||||
rules:
|
||||
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
|
||||
# Lint with tflint
|
||||
tflint:
|
||||
stage: lint
|
||||
image:
|
||||
name: ghcr.io/terraform-linters/tflint:latest
|
||||
entrypoint: [""]
|
||||
script:
|
||||
- cd ${TF_ROOT}
|
||||
- tflint --init
|
||||
- tflint -f compact
|
||||
rules:
|
||||
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
|
||||
# Security scan with Checkov
|
||||
checkov:
|
||||
stage: security
|
||||
image:
|
||||
name: bridgecrew/checkov:latest
|
||||
entrypoint: [""]
|
||||
script:
|
||||
- checkov -d ${TF_ROOT} --framework terraform --output cli --soft-fail
|
||||
allow_failure: true
|
||||
rules:
|
||||
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
|
||||
# Plan Terraform changes
|
||||
plan:
|
||||
stage: plan
|
||||
script:
|
||||
- terraform init
|
||||
- terraform plan -out=tfplan
|
||||
- terraform show -no-color tfplan > plan_output.txt
|
||||
artifacts:
|
||||
name: plan
|
||||
paths:
|
||||
- ${TF_ROOT}/tfplan
|
||||
- ${TF_ROOT}/plan_output.txt
|
||||
reports:
|
||||
terraform: ${TF_ROOT}/tfplan
|
||||
expire_in: 7 days
|
||||
rules:
|
||||
- if: '$CI_PIPELINE_SOURCE == "merge_request_event"'
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
when: manual
|
||||
|
||||
# Apply Terraform changes (manual trigger for production)
|
||||
apply:
|
||||
stage: apply
|
||||
script:
|
||||
- terraform init
|
||||
- terraform apply -auto-approve
|
||||
dependencies:
|
||||
- plan
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
when: manual
|
||||
environment:
|
||||
name: production
|
||||
action: start
|
||||
|
||||
# Destroy infrastructure (manual trigger, protected)
|
||||
destroy:
|
||||
stage: apply
|
||||
script:
|
||||
- terraform init
|
||||
- terraform destroy -auto-approve
|
||||
rules:
|
||||
- if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
when: manual
|
||||
environment:
|
||||
name: production
|
||||
action: stop
|
||||
|
||||
# ========================================
|
||||
# Multi-Environment Example
|
||||
# ========================================
|
||||
# Uncomment and customize for multiple environments
|
||||
|
||||
# .plan_template: &plan_template
|
||||
# stage: plan
|
||||
# script:
|
||||
# - terraform init
|
||||
# - terraform workspace select ${TF_WORKSPACE} || terraform workspace new ${TF_WORKSPACE}
|
||||
# - terraform plan -out=tfplan -var-file=environments/${TF_WORKSPACE}.tfvars
|
||||
# artifacts:
|
||||
# paths:
|
||||
# - ${TF_ROOT}/tfplan
|
||||
# expire_in: 7 days
|
||||
|
||||
# .apply_template: &apply_template
|
||||
# stage: apply
|
||||
# script:
|
||||
# - terraform init
|
||||
# - terraform workspace select ${TF_WORKSPACE}
|
||||
# - terraform apply -auto-approve tfplan
|
||||
# when: manual
|
||||
|
||||
# plan:dev:
|
||||
# <<: *plan_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: dev
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == "develop"'
|
||||
|
||||
# apply:dev:
|
||||
# <<: *apply_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: dev
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == "develop"'
|
||||
# environment:
|
||||
# name: dev
|
||||
|
||||
# plan:staging:
|
||||
# <<: *plan_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: staging
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == "staging"'
|
||||
|
||||
# apply:staging:
|
||||
# <<: *apply_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: staging
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == "staging"'
|
||||
# environment:
|
||||
# name: staging
|
||||
|
||||
# plan:prod:
|
||||
# <<: *plan_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: prod
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
|
||||
# apply:prod:
|
||||
# <<: *apply_template
|
||||
# variables:
|
||||
# TF_WORKSPACE: prod
|
||||
# rules:
|
||||
# - if: '$CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH'
|
||||
# environment:
|
||||
# name: production
|
||||
709
skills/references/best_practices.md
Normal file
709
skills/references/best_practices.md
Normal file
@@ -0,0 +1,709 @@
|
||||
# Terraform Best Practices
|
||||
|
||||
Comprehensive guide to Terraform best practices for infrastructure as code.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Project Structure](#project-structure)
|
||||
2. [State Management](#state-management)
|
||||
3. [Module Design](#module-design)
|
||||
4. [Variable Management](#variable-management)
|
||||
5. [Resource Naming](#resource-naming)
|
||||
6. [Security Practices](#security-practices)
|
||||
7. [Testing & Validation](#testing--validation)
|
||||
8. [CI/CD Integration](#cicd-integration)
|
||||
|
||||
---
|
||||
|
||||
## Project Structure
|
||||
|
||||
### Recommended Directory Layout
|
||||
|
||||
```
|
||||
terraform-project/
|
||||
├── environments/
|
||||
│ ├── dev/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ ├── outputs.tf
|
||||
│ │ ├── terraform.tfvars
|
||||
│ │ └── backend.tf
|
||||
│ ├── staging/
|
||||
│ └── prod/
|
||||
├── modules/
|
||||
│ ├── networking/
|
||||
│ │ ├── main.tf
|
||||
│ │ ├── variables.tf
|
||||
│ │ ├── outputs.tf
|
||||
│ │ ├── versions.tf
|
||||
│ │ └── README.md
|
||||
│ ├── compute/
|
||||
│ └── database/
|
||||
├── global/
|
||||
│ ├── iam/
|
||||
│ └── dns/
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Key Principles
|
||||
|
||||
**Separate Environments**
|
||||
- Use directories for each environment (dev, staging, prod)
|
||||
- Each environment has its own state file
|
||||
- Prevents accidental changes to wrong environment
|
||||
|
||||
**Reusable Modules**
|
||||
- Common infrastructure patterns in modules/
|
||||
- Modules are versioned and tested
|
||||
- Used across multiple environments
|
||||
|
||||
**Global Resources**
|
||||
- Resources shared across environments (IAM, DNS)
|
||||
- Separate state for better isolation
|
||||
- Carefully managed with extra review
|
||||
|
||||
---
|
||||
|
||||
## State Management
|
||||
|
||||
### Remote State is Essential
|
||||
|
||||
**Why Remote State:**
|
||||
- Team collaboration and locking
|
||||
- State backup and versioning
|
||||
- Secure credential handling
|
||||
- Disaster recovery
|
||||
|
||||
**Recommended Backend: S3 + DynamoDB**
|
||||
|
||||
```hcl
|
||||
terraform {
|
||||
backend "s3" {
|
||||
bucket = "company-terraform-state"
|
||||
key = "prod/networking/terraform.tfstate"
|
||||
region = "us-east-1"
|
||||
encrypt = true
|
||||
dynamodb_table = "terraform-state-lock"
|
||||
kms_key_id = "arn:aws:kms:us-east-1:ACCOUNT:key/KEY_ID"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**State Best Practices:**
|
||||
|
||||
1. **Enable Encryption**: Always encrypt state at rest
|
||||
2. **Enable Versioning**: On S3 bucket for state recovery
|
||||
3. **Use State Locking**: DynamoDB table prevents concurrent modifications
|
||||
4. **Restrict Access**: IAM policies limiting who can read/write state
|
||||
5. **Separate State Files**: Different states for different components
|
||||
6. **Regular Backups**: Automated backups of state files
|
||||
|
||||
### State File Organization
|
||||
|
||||
**Bad - Single State:**
|
||||
```
|
||||
terraform.tfstate (contains everything)
|
||||
```
|
||||
|
||||
**Good - Multiple States:**
|
||||
```
|
||||
networking/terraform.tfstate
|
||||
compute/terraform.tfstate
|
||||
database/terraform.tfstate
|
||||
```
|
||||
|
||||
**Benefits:**
|
||||
- Reduced blast radius
|
||||
- Faster plan/apply operations
|
||||
- Parallel team work
|
||||
- Easier to understand and debug
|
||||
|
||||
### State Management Commands
|
||||
|
||||
```bash
|
||||
# List resources in state
|
||||
terraform state list
|
||||
|
||||
# Show specific resource
|
||||
terraform state show aws_instance.example
|
||||
|
||||
# Move resource to different address
|
||||
terraform state mv aws_instance.old aws_instance.new
|
||||
|
||||
# Remove resource from state (doesn't destroy)
|
||||
terraform state rm aws_instance.example
|
||||
|
||||
# Import existing resource
|
||||
terraform import aws_instance.example i-1234567890abcdef0
|
||||
|
||||
# Pull state for inspection (read-only)
|
||||
terraform state pull > state.json
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Design
|
||||
|
||||
### Module Structure
|
||||
|
||||
Every module should have:
|
||||
|
||||
```
|
||||
module-name/
|
||||
├── main.tf # Primary resources
|
||||
├── variables.tf # Input variables
|
||||
├── outputs.tf # Output values
|
||||
├── versions.tf # Version constraints
|
||||
├── README.md # Documentation
|
||||
└── examples/ # Usage examples
|
||||
└── complete/
|
||||
├── main.tf
|
||||
└── variables.tf
|
||||
```
|
||||
|
||||
### Module Best Practices
|
||||
|
||||
**1. Single Responsibility**
|
||||
Each module should do one thing well:
|
||||
- ✅ `vpc-module` creates VPC with subnets, route tables, NACLs
|
||||
- ❌ `infrastructure` creates VPC, EC2, RDS, S3, everything
|
||||
|
||||
**2. Composability**
|
||||
Modules should work together:
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "./modules/vpc"
|
||||
cidr = "10.0.0.0/16"
|
||||
}
|
||||
|
||||
module "eks" {
|
||||
source = "./modules/eks"
|
||||
vpc_id = module.vpc.vpc_id
|
||||
subnet_ids = module.vpc.private_subnet_ids
|
||||
}
|
||||
```
|
||||
|
||||
**3. Sensible Defaults**
|
||||
```hcl
|
||||
variable "instance_type" {
|
||||
type = string
|
||||
description = "EC2 instance type"
|
||||
default = "t3.micro" # Reasonable default
|
||||
}
|
||||
|
||||
variable "enable_monitoring" {
|
||||
type = bool
|
||||
description = "Enable detailed monitoring"
|
||||
default = false # Cost-effective default
|
||||
}
|
||||
```
|
||||
|
||||
**4. Complete Documentation**
|
||||
|
||||
```hcl
|
||||
variable "vpc_cidr" {
|
||||
type = string
|
||||
description = "CIDR block for VPC. Must be a valid IPv4 CIDR."
|
||||
|
||||
validation {
|
||||
condition = can(cidrhost(var.vpc_cidr, 0))
|
||||
error_message = "Must be a valid IPv4 CIDR block."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**5. Output Useful Values**
|
||||
|
||||
```hcl
|
||||
output "vpc_id" {
|
||||
description = "ID of the VPC"
|
||||
value = aws_vpc.main.id
|
||||
}
|
||||
|
||||
output "private_subnet_ids" {
|
||||
description = "List of private subnet IDs for deploying workloads"
|
||||
value = aws_subnet.private[*].id
|
||||
}
|
||||
|
||||
output "nat_gateway_ips" {
|
||||
description = "Elastic IPs of NAT gateways for firewall whitelisting"
|
||||
value = aws_eip.nat[*].public_ip
|
||||
}
|
||||
```
|
||||
|
||||
### Module Versioning
|
||||
|
||||
**Use Git Tags for Versioning:**
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "git::https://github.com/company/terraform-modules.git//vpc?ref=v1.2.3"
|
||||
# Configuration...
|
||||
}
|
||||
```
|
||||
|
||||
**Semantic Versioning:**
|
||||
- v1.0.0 → First stable release
|
||||
- v1.1.0 → New features (backward compatible)
|
||||
- v1.1.1 → Bug fixes
|
||||
- v2.0.0 → Breaking changes
|
||||
|
||||
---
|
||||
|
||||
## Variable Management
|
||||
|
||||
### Variable Declaration
|
||||
|
||||
**Always Include:**
|
||||
```hcl
|
||||
variable "environment" {
|
||||
type = string
|
||||
description = "Environment name (dev, staging, prod)"
|
||||
|
||||
validation {
|
||||
condition = contains(["dev", "staging", "prod"], var.environment)
|
||||
error_message = "Environment must be dev, staging, or prod."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Variable Files Hierarchy
|
||||
|
||||
```
|
||||
terraform.tfvars # Default values (committed, no secrets)
|
||||
dev.tfvars # Dev overrides
|
||||
prod.tfvars # Prod overrides
|
||||
secrets.auto.tfvars # Auto-loaded (in .gitignore)
|
||||
```
|
||||
|
||||
**Usage:**
|
||||
```bash
|
||||
terraform apply -var-file="prod.tfvars"
|
||||
```
|
||||
|
||||
### Sensitive Variables
|
||||
|
||||
**Mark as Sensitive:**
|
||||
```hcl
|
||||
variable "database_password" {
|
||||
type = string
|
||||
description = "Master password for database"
|
||||
sensitive = true
|
||||
}
|
||||
```
|
||||
|
||||
**Never commit secrets:**
|
||||
```bash
|
||||
# .gitignore
|
||||
*.auto.tfvars
|
||||
secrets.tfvars
|
||||
terraform.tfvars # If contains secrets
|
||||
```
|
||||
|
||||
**Better: Use External Secret Management**
|
||||
```hcl
|
||||
data "aws_secretsmanager_secret_version" "db_password" {
|
||||
secret_id = "prod/database/master-password"
|
||||
}
|
||||
|
||||
resource "aws_db_instance" "main" {
|
||||
password = data.aws_secretsmanager_secret_version.db_password.secret_string
|
||||
}
|
||||
```
|
||||
|
||||
### Variable Organization
|
||||
|
||||
**Group related variables:**
|
||||
```hcl
|
||||
# Network Configuration
|
||||
variable "vpc_cidr" { }
|
||||
variable "availability_zones" { }
|
||||
variable "public_subnet_cidrs" { }
|
||||
variable "private_subnet_cidrs" { }
|
||||
|
||||
# Application Configuration
|
||||
variable "app_name" { }
|
||||
variable "app_version" { }
|
||||
variable "instance_count" { }
|
||||
|
||||
# Tagging
|
||||
variable "tags" {
|
||||
type = map(string)
|
||||
description = "Common tags for all resources"
|
||||
default = {}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Resource Naming
|
||||
|
||||
### Naming Conventions
|
||||
|
||||
**Terraform Resources (snake_case):**
|
||||
```hcl
|
||||
resource "aws_vpc" "main_vpc" { }
|
||||
resource "aws_subnet" "public_subnet_az1" { }
|
||||
resource "aws_instance" "web_server_01" { }
|
||||
```
|
||||
|
||||
**AWS Resource Names (kebab-case):**
|
||||
```hcl
|
||||
resource "aws_s3_bucket" "logs" {
|
||||
bucket = "company-prod-application-logs"
|
||||
# company-{env}-{service}-{purpose}
|
||||
}
|
||||
|
||||
resource "aws_instance" "web" {
|
||||
tags = {
|
||||
Name = "prod-web-server-01"
|
||||
# {env}-{service}-{type}-{number}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Naming Standards
|
||||
|
||||
**Pattern: `{company}-{environment}-{service}-{resource_type}`**
|
||||
|
||||
Examples:
|
||||
- `acme-prod-api-alb`
|
||||
- `acme-dev-workers-asg`
|
||||
- `acme-staging-database-rds`
|
||||
|
||||
**Benefits:**
|
||||
- Easy filtering in AWS console
|
||||
- Clear ownership and purpose
|
||||
- Consistent across environments
|
||||
- Billing and cost tracking
|
||||
|
||||
---
|
||||
|
||||
## Security Practices
|
||||
|
||||
### 1. Principle of Least Privilege
|
||||
|
||||
```hcl
|
||||
# Bad - Too permissive
|
||||
resource "aws_iam_policy" "bad" {
|
||||
policy = jsonencode({
|
||||
Statement = [{
|
||||
Effect = "Allow"
|
||||
Action = "*"
|
||||
Resource = "*"
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
# Good - Specific permissions
|
||||
resource "aws_iam_policy" "good" {
|
||||
policy = jsonencode({
|
||||
Statement = [{
|
||||
Effect = "Allow"
|
||||
Action = [
|
||||
"s3:GetObject",
|
||||
"s3:PutObject"
|
||||
]
|
||||
Resource = "arn:aws:s3:::my-bucket/*"
|
||||
}]
|
||||
})
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Encryption Everywhere
|
||||
|
||||
```hcl
|
||||
# Encrypt S3 buckets
|
||||
resource "aws_s3_bucket" "secure" {
|
||||
bucket = "my-secure-bucket"
|
||||
}
|
||||
|
||||
resource "aws_s3_bucket_server_side_encryption_configuration" "secure" {
|
||||
bucket = aws_s3_bucket.secure.id
|
||||
|
||||
rule {
|
||||
apply_server_side_encryption_by_default {
|
||||
sse_algorithm = "aws:kms"
|
||||
kms_master_key_id = aws_kms_key.bucket.arn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Encrypt EBS volumes
|
||||
resource "aws_instance" "secure" {
|
||||
root_block_device {
|
||||
encrypted = true
|
||||
}
|
||||
}
|
||||
|
||||
# Encrypt RDS databases
|
||||
resource "aws_db_instance" "secure" {
|
||||
storage_encrypted = true
|
||||
kms_key_id = aws_kms_key.rds.arn
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Network Security
|
||||
|
||||
```hcl
|
||||
# Restrictive security groups
|
||||
resource "aws_security_group" "web" {
|
||||
name_prefix = "web-"
|
||||
|
||||
# Only allow specific inbound
|
||||
ingress {
|
||||
from_port = 443
|
||||
to_port = 443
|
||||
protocol = "tcp"
|
||||
cidr_blocks = ["0.0.0.0/0"] # Consider restricting further
|
||||
}
|
||||
|
||||
# Explicit outbound
|
||||
egress {
|
||||
from_port = 443
|
||||
to_port = 443
|
||||
protocol = "tcp"
|
||||
cidr_blocks = ["0.0.0.0/0"]
|
||||
}
|
||||
}
|
||||
|
||||
# Use private subnets for workloads
|
||||
resource "aws_subnet" "private" {
|
||||
map_public_ip_on_launch = false # No public IPs
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Secret Management
|
||||
|
||||
**Never in Code:**
|
||||
```hcl
|
||||
# ❌ NEVER DO THIS
|
||||
resource "aws_db_instance" "bad" {
|
||||
password = "MySecretPassword123" # NEVER!
|
||||
}
|
||||
```
|
||||
|
||||
**Use AWS Secrets Manager:**
|
||||
```hcl
|
||||
# ✅ CORRECT APPROACH
|
||||
data "aws_secretsmanager_secret_version" "db" {
|
||||
secret_id = var.db_secret_arn
|
||||
}
|
||||
|
||||
resource "aws_db_instance" "good" {
|
||||
password = data.aws_secretsmanager_secret_version.db.secret_string
|
||||
}
|
||||
```
|
||||
|
||||
### 5. Resource Tagging
|
||||
|
||||
```hcl
|
||||
locals {
|
||||
common_tags = {
|
||||
Environment = var.environment
|
||||
ManagedBy = "Terraform"
|
||||
Owner = "platform-team"
|
||||
Project = var.project_name
|
||||
CostCenter = var.cost_center
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_instance" "web" {
|
||||
tags = merge(
|
||||
local.common_tags,
|
||||
{
|
||||
Name = "web-server"
|
||||
Role = "webserver"
|
||||
}
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Testing & Validation
|
||||
|
||||
### Pre-Deployment Validation
|
||||
|
||||
**1. Terraform Validate**
|
||||
```bash
|
||||
terraform validate
|
||||
```
|
||||
Checks syntax and configuration validity.
|
||||
|
||||
**2. Terraform Plan**
|
||||
```bash
|
||||
terraform plan -out=tfplan
|
||||
```
|
||||
Review changes before applying.
|
||||
|
||||
**3. tflint**
|
||||
```bash
|
||||
tflint --module
|
||||
```
|
||||
Linter for catching errors and enforcing conventions.
|
||||
|
||||
**4. checkov**
|
||||
```bash
|
||||
checkov -d .
|
||||
```
|
||||
Security and compliance scanning.
|
||||
|
||||
**5. terraform-docs**
|
||||
```bash
|
||||
terraform-docs markdown . > README.md
|
||||
```
|
||||
Auto-generate documentation.
|
||||
|
||||
### Automated Testing
|
||||
|
||||
**Terratest (Go):**
|
||||
```go
|
||||
func TestVPCCreation(t *testing.T) {
|
||||
terraformOptions := terraform.WithDefaultRetryableErrors(t, &terraform.Options{
|
||||
TerraformDir: "../examples/complete",
|
||||
})
|
||||
|
||||
defer terraform.Destroy(t, terraformOptions)
|
||||
terraform.InitAndApply(t, terraformOptions)
|
||||
|
||||
vpcId := terraform.Output(t, terraformOptions, "vpc_id")
|
||||
assert.NotEmpty(t, vpcId)
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## CI/CD Integration
|
||||
|
||||
### GitHub Actions Example
|
||||
|
||||
```yaml
|
||||
name: Terraform
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [main]
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
terraform:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v2
|
||||
|
||||
- name: Terraform Init
|
||||
run: terraform init
|
||||
|
||||
- name: Terraform Validate
|
||||
run: terraform validate
|
||||
|
||||
- name: Terraform Plan
|
||||
run: terraform plan -no-color
|
||||
if: github.event_name == 'pull_request'
|
||||
|
||||
- name: Terraform Apply
|
||||
run: terraform apply -auto-approve
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
```
|
||||
|
||||
### Best Practices for CI/CD
|
||||
|
||||
1. **Always run plan on PRs** - Review changes before merge
|
||||
2. **Require approvals** - Human review for production
|
||||
3. **Use workspaces or directories** - Separate pipeline per environment
|
||||
4. **Store state remotely** - S3 backend with locking
|
||||
5. **Use credential management** - OIDC or IAM roles, never store credentials
|
||||
6. **Run security scans** - checkov, tfsec in pipeline
|
||||
7. **Tag releases** - Version your infrastructure code
|
||||
|
||||
---
|
||||
|
||||
## Common Pitfalls to Avoid
|
||||
|
||||
### 1. Not Using Remote State
|
||||
- ❌ Local state doesn't work for teams
|
||||
- ✅ Use S3, Terraform Cloud, or other remote backend
|
||||
|
||||
### 2. Hardcoding Values
|
||||
- ❌ `region = "us-east-1"` in every resource
|
||||
- ✅ Use variables and locals
|
||||
|
||||
### 3. Not Using Modules
|
||||
- ❌ Copying code between environments
|
||||
- ✅ Create reusable modules
|
||||
|
||||
### 4. Ignoring State
|
||||
- ❌ Manually modifying infrastructure
|
||||
- ✅ All changes through Terraform
|
||||
|
||||
### 5. Poor Naming
|
||||
- ❌ `resource "aws_instance" "i1" { }`
|
||||
- ✅ `resource "aws_instance" "web_server_01" { }`
|
||||
|
||||
### 6. No Documentation
|
||||
- ❌ No README, no comments
|
||||
- ✅ Document everything
|
||||
|
||||
### 7. Massive State Files
|
||||
- ❌ Single state for entire infrastructure
|
||||
- ✅ Break into logical components
|
||||
|
||||
### 8. No Testing
|
||||
- ❌ Apply directly to production
|
||||
- ✅ Test in dev/staging first
|
||||
|
||||
---
|
||||
|
||||
## Quick Reference
|
||||
|
||||
### Essential Commands
|
||||
```bash
|
||||
# Initialize
|
||||
terraform init
|
||||
|
||||
# Validate configuration
|
||||
terraform validate
|
||||
|
||||
# Format code
|
||||
terraform fmt -recursive
|
||||
|
||||
# Plan changes
|
||||
terraform plan
|
||||
|
||||
# Apply changes
|
||||
terraform apply
|
||||
|
||||
# Destroy resources
|
||||
terraform destroy
|
||||
|
||||
# Show current state
|
||||
terraform show
|
||||
|
||||
# List resources
|
||||
terraform state list
|
||||
|
||||
# Output values
|
||||
terraform output
|
||||
```
|
||||
|
||||
### Useful Flags
|
||||
```bash
|
||||
# Plan without color
|
||||
terraform plan -no-color
|
||||
|
||||
# Apply without prompts
|
||||
terraform apply -auto-approve
|
||||
|
||||
# Destroy specific resource
|
||||
terraform destroy -target=aws_instance.example
|
||||
|
||||
# Use specific var file
|
||||
terraform apply -var-file="prod.tfvars"
|
||||
|
||||
# Set variable via CLI
|
||||
terraform apply -var="environment=prod"
|
||||
```
|
||||
665
skills/references/cost_optimization.md
Normal file
665
skills/references/cost_optimization.md
Normal file
@@ -0,0 +1,665 @@
|
||||
# Terraform Cost Optimization Guide
|
||||
|
||||
Strategies for optimizing cloud infrastructure costs when using Terraform.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [Right-Sizing Resources](#right-sizing-resources)
|
||||
2. [Spot and Reserved Instances](#spot-and-reserved-instances)
|
||||
3. [Storage Optimization](#storage-optimization)
|
||||
4. [Networking Costs](#networking-costs)
|
||||
5. [Resource Lifecycle](#resource-lifecycle)
|
||||
6. [Cost Tagging](#cost-tagging)
|
||||
7. [Monitoring and Alerts](#monitoring-and-alerts)
|
||||
8. [Multi-Cloud Considerations](#multi-cloud-considerations)
|
||||
|
||||
---
|
||||
|
||||
## Right-Sizing Resources
|
||||
|
||||
### Compute Resources
|
||||
|
||||
**Start small, scale up:**
|
||||
```hcl
|
||||
variable "instance_type" {
|
||||
type = string
|
||||
description = "EC2 instance type"
|
||||
default = "t3.micro" # Start with smallest reasonable size
|
||||
|
||||
validation {
|
||||
condition = can(regex("^t[0-9]\\.", var.instance_type))
|
||||
error_message = "Consider starting with burstable (t-series) instances for cost optimization."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Use auto-scaling instead of over-provisioning:**
|
||||
```hcl
|
||||
resource "aws_autoscaling_group" "app" {
|
||||
min_size = 2 # Minimum for HA
|
||||
desired_capacity = 2 # Normal load
|
||||
max_size = 10 # Peak load
|
||||
|
||||
# Scale based on actual usage
|
||||
target_group_arns = [aws_lb_target_group.app.arn]
|
||||
|
||||
tag {
|
||||
key = "Environment"
|
||||
value = var.environment
|
||||
propagate_at_launch = true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Database Right-Sizing
|
||||
|
||||
**Start with appropriate size:**
|
||||
```hcl
|
||||
resource "aws_db_instance" "main" {
|
||||
instance_class = var.environment == "prod" ? "db.t3.medium" : "db.t3.micro"
|
||||
|
||||
# Enable auto-scaling for storage
|
||||
allocated_storage = 20
|
||||
max_allocated_storage = 100 # Auto-scale up to 100GB
|
||||
|
||||
# Use cheaper storage for non-prod
|
||||
storage_type = var.environment == "prod" ? "io1" : "gp3"
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Spot and Reserved Instances
|
||||
|
||||
### Spot Instances for Non-Critical Workloads
|
||||
|
||||
**Launch Template for Spot:**
|
||||
```hcl
|
||||
resource "aws_launch_template" "spot" {
|
||||
name_prefix = "spot-"
|
||||
image_id = data.aws_ami.amazon_linux.id
|
||||
instance_type = "t3.medium"
|
||||
|
||||
instance_market_options {
|
||||
market_type = "spot"
|
||||
|
||||
spot_options {
|
||||
max_price = "0.05" # Set price limit
|
||||
spot_instance_type = "one-time"
|
||||
instance_interruption_behavior = "terminate"
|
||||
}
|
||||
}
|
||||
|
||||
tag_specifications {
|
||||
resource_type = "instance"
|
||||
tags = {
|
||||
Name = "spot-instance"
|
||||
Workload = "non-critical"
|
||||
CostSavings = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_autoscaling_group" "spot" {
|
||||
desired_capacity = 5
|
||||
max_size = 10
|
||||
min_size = 0
|
||||
|
||||
mixed_instances_policy {
|
||||
instances_distribution {
|
||||
on_demand_percentage_above_base_capacity = 20 # 20% on-demand, 80% spot
|
||||
spot_allocation_strategy = "capacity-optimized"
|
||||
}
|
||||
|
||||
launch_template {
|
||||
launch_template_specification {
|
||||
launch_template_id = aws_launch_template.spot.id
|
||||
version = "$Latest"
|
||||
}
|
||||
|
||||
# Multiple instance types increase spot availability
|
||||
override {
|
||||
instance_type = "t3.medium"
|
||||
}
|
||||
override {
|
||||
instance_type = "t3.large"
|
||||
}
|
||||
override {
|
||||
instance_type = "t3a.medium"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Reserved Instances (Use Outside Terraform)
|
||||
|
||||
Terraform shouldn't manage reservations directly, but should:
|
||||
- Tag resources consistently for reservation planning
|
||||
- Use Instance Savings Plans for flexibility
|
||||
- Monitor usage patterns to inform reservation purchases
|
||||
|
||||
**Tagging for reservation analysis:**
|
||||
```hcl
|
||||
locals {
|
||||
reservation_tags = {
|
||||
ReservationCandidate = var.environment == "prod" ? "true" : "false"
|
||||
UsagePattern = "steady-state" # or "variable", "burst"
|
||||
CostCenter = var.cost_center
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Storage Optimization
|
||||
|
||||
### S3 Lifecycle Policies
|
||||
|
||||
**Automatic tiering:**
|
||||
```hcl
|
||||
resource "aws_s3_bucket_lifecycle_configuration" "logs" {
|
||||
bucket = aws_s3_bucket.logs.id
|
||||
|
||||
rule {
|
||||
id = "log-retention"
|
||||
status = "Enabled"
|
||||
|
||||
transition {
|
||||
days = 30
|
||||
storage_class = "STANDARD_IA" # Infrequent Access after 30 days
|
||||
}
|
||||
|
||||
transition {
|
||||
days = 90
|
||||
storage_class = "GLACIER_IR" # Instant Retrieval Glacier after 90 days
|
||||
}
|
||||
|
||||
transition {
|
||||
days = 180
|
||||
storage_class = "DEEP_ARCHIVE" # Deep Archive after 180 days
|
||||
}
|
||||
|
||||
expiration {
|
||||
days = 365 # Delete after 1 year
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Intelligent tiering for variable access:**
|
||||
```hcl
|
||||
resource "aws_s3_bucket_intelligent_tiering_configuration" "assets" {
|
||||
bucket = aws_s3_bucket.assets.id
|
||||
name = "entire-bucket"
|
||||
|
||||
tiering {
|
||||
access_tier = "ARCHIVE_ACCESS"
|
||||
days = 90
|
||||
}
|
||||
|
||||
tiering {
|
||||
access_tier = "DEEP_ARCHIVE_ACCESS"
|
||||
days = 180
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### EBS Volume Optimization
|
||||
|
||||
**Use appropriate volume types:**
|
||||
```hcl
|
||||
resource "aws_instance" "app" {
|
||||
ami = data.aws_ami.amazon_linux.id
|
||||
instance_type = "t3.medium"
|
||||
|
||||
root_block_device {
|
||||
volume_type = "gp3" # gp3 is cheaper than gp2 with better baseline
|
||||
volume_size = 20
|
||||
iops = 3000 # Default, only pay more if you need more
|
||||
throughput = 125 # Default
|
||||
encrypted = true
|
||||
|
||||
# Delete on termination to avoid orphaned volumes
|
||||
delete_on_termination = true
|
||||
}
|
||||
|
||||
tags = {
|
||||
Name = "app-server"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Snapshot lifecycle:**
|
||||
```hcl
|
||||
resource "aws_dlm_lifecycle_policy" "snapshots" {
|
||||
description = "EBS snapshot lifecycle"
|
||||
execution_role_arn = aws_iam_role.dlm.arn
|
||||
state = "ENABLED"
|
||||
|
||||
policy_details {
|
||||
resource_types = ["VOLUME"]
|
||||
|
||||
schedule {
|
||||
name = "Daily snapshots"
|
||||
|
||||
create_rule {
|
||||
interval = 24
|
||||
interval_unit = "HOURS"
|
||||
times = ["03:00"]
|
||||
}
|
||||
|
||||
retain_rule {
|
||||
count = 7 # Keep only 7 days of snapshots
|
||||
}
|
||||
|
||||
copy_tags = true
|
||||
}
|
||||
|
||||
target_tags = {
|
||||
BackupEnabled = "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Networking Costs
|
||||
|
||||
### Minimize Data Transfer
|
||||
|
||||
**Use VPC endpoints to avoid NAT charges:**
|
||||
```hcl
|
||||
resource "aws_vpc_endpoint" "s3" {
|
||||
vpc_id = aws_vpc.main.id
|
||||
service_name = "com.amazonaws.${var.region}.s3"
|
||||
route_table_ids = [
|
||||
aws_route_table.private.id
|
||||
]
|
||||
|
||||
tags = {
|
||||
Name = "s3-endpoint"
|
||||
CostSavings = "reduces-nat-charges"
|
||||
}
|
||||
}
|
||||
|
||||
resource "aws_vpc_endpoint" "dynamodb" {
|
||||
vpc_id = aws_vpc.main.id
|
||||
service_name = "com.amazonaws.${var.region}.dynamodb"
|
||||
route_table_ids = [
|
||||
aws_route_table.private.id
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
**Interface endpoints for AWS services:**
|
||||
```hcl
|
||||
resource "aws_vpc_endpoint" "ecr_api" {
|
||||
vpc_id = aws_vpc.main.id
|
||||
service_name = "com.amazonaws.${var.region}.ecr.api"
|
||||
vpc_endpoint_type = "Interface"
|
||||
subnet_ids = aws_subnet.private[*].id
|
||||
security_group_ids = [aws_security_group.vpc_endpoints.id]
|
||||
private_dns_enabled = true
|
||||
|
||||
tags = {
|
||||
Name = "ecr-api-endpoint"
|
||||
CostSavings = "reduces-nat-data-transfer"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Regional Optimization
|
||||
|
||||
**Co-locate resources in same region/AZ:**
|
||||
```hcl
|
||||
# Bad - cross-region data transfer is expensive
|
||||
resource "aws_instance" "app" {
|
||||
availability_zone = "us-east-1a"
|
||||
}
|
||||
|
||||
resource "aws_rds_cluster" "main" {
|
||||
availability_zones = ["us-west-2a"] # Different region!
|
||||
}
|
||||
|
||||
# Good - same region and AZ when possible
|
||||
resource "aws_instance" "app" {
|
||||
availability_zone = var.availability_zone
|
||||
}
|
||||
|
||||
resource "aws_rds_cluster" "main" {
|
||||
availability_zones = [var.availability_zone] # Same AZ
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Resource Lifecycle
|
||||
|
||||
### Scheduled Shutdown for Non-Production
|
||||
|
||||
**Lambda to stop/start instances:**
|
||||
```hcl
|
||||
resource "aws_lambda_function" "scheduler" {
|
||||
filename = "scheduler.zip"
|
||||
function_name = "instance-scheduler"
|
||||
role = aws_iam_role.scheduler.arn
|
||||
handler = "scheduler.handler"
|
||||
runtime = "python3.9"
|
||||
|
||||
environment {
|
||||
variables = {
|
||||
TAG_KEY = "Schedule"
|
||||
TAG_VALUE = "business-hours"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# EventBridge rule to stop instances at night
|
||||
resource "aws_cloudwatch_event_rule" "stop_instances" {
|
||||
name = "stop-dev-instances"
|
||||
description = "Stop dev instances at 7 PM"
|
||||
schedule_expression = "cron(0 19 ? * MON-FRI *)" # 7 PM weekdays
|
||||
}
|
||||
|
||||
resource "aws_cloudwatch_event_target" "stop" {
|
||||
rule = aws_cloudwatch_event_rule.stop_instances.name
|
||||
target_id = "stop-instances"
|
||||
arn = aws_lambda_function.scheduler.arn
|
||||
|
||||
input = jsonencode({
|
||||
action = "stop"
|
||||
})
|
||||
}
|
||||
|
||||
# Start instances in the morning
|
||||
resource "aws_cloudwatch_event_rule" "start_instances" {
|
||||
name = "start-dev-instances"
|
||||
description = "Start dev instances at 8 AM"
|
||||
schedule_expression = "cron(0 8 ? * MON-FRI *)" # 8 AM weekdays
|
||||
}
|
||||
```
|
||||
|
||||
**Tag instances for scheduling:**
|
||||
```hcl
|
||||
resource "aws_instance" "dev" {
|
||||
ami = data.aws_ami.amazon_linux.id
|
||||
instance_type = "t3.medium"
|
||||
|
||||
tags = {
|
||||
Name = "dev-server"
|
||||
Environment = "dev"
|
||||
Schedule = "business-hours" # Scheduler will stop/start based on this
|
||||
AutoShutdown = "true"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Cleanup Old Resources
|
||||
|
||||
**S3 lifecycle for temporary data:**
|
||||
```hcl
|
||||
resource "aws_s3_bucket_lifecycle_configuration" "temp" {
|
||||
bucket = aws_s3_bucket.temp.id
|
||||
|
||||
rule {
|
||||
id = "cleanup-temp-files"
|
||||
status = "Enabled"
|
||||
|
||||
filter {
|
||||
prefix = "temp/"
|
||||
}
|
||||
|
||||
expiration {
|
||||
days = 7 # Delete after 7 days
|
||||
}
|
||||
|
||||
abort_incomplete_multipart_upload {
|
||||
days_after_initiation = 1
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Tagging
|
||||
|
||||
### Comprehensive Tagging Strategy
|
||||
|
||||
**Define tagging locals:**
|
||||
```hcl
|
||||
locals {
|
||||
common_tags = {
|
||||
# Cost allocation tags
|
||||
CostCenter = var.cost_center
|
||||
Project = var.project_name
|
||||
Environment = var.environment
|
||||
Owner = var.team_email
|
||||
|
||||
# Operational tags
|
||||
ManagedBy = "Terraform"
|
||||
TerraformModule = basename(abspath(path.module))
|
||||
|
||||
# Cost optimization tags
|
||||
AutoShutdown = var.environment != "prod" ? "enabled" : "disabled"
|
||||
ReservationCandidate = var.environment == "prod" ? "true" : "false"
|
||||
CostOptimized = "true"
|
||||
}
|
||||
}
|
||||
|
||||
# Apply to all resources
|
||||
resource "aws_instance" "app" {
|
||||
# ... configuration ...
|
||||
|
||||
tags = merge(
|
||||
local.common_tags,
|
||||
{
|
||||
Name = "${var.environment}-app-server"
|
||||
Role = "application"
|
||||
}
|
||||
)
|
||||
}
|
||||
```
|
||||
|
||||
**Enforce tagging with AWS Config:**
|
||||
```hcl
|
||||
resource "aws_config_config_rule" "required_tags" {
|
||||
name = "required-tags"
|
||||
|
||||
source {
|
||||
owner = "AWS"
|
||||
source_identifier = "REQUIRED_TAGS"
|
||||
}
|
||||
|
||||
input_parameters = jsonencode({
|
||||
tag1Key = "CostCenter"
|
||||
tag2Key = "Environment"
|
||||
tag3Key = "Owner"
|
||||
})
|
||||
|
||||
depends_on = [aws_config_configuration_recorder.main]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Monitoring and Alerts
|
||||
|
||||
### Budget Alerts
|
||||
|
||||
**AWS Budgets with Terraform:**
|
||||
```hcl
|
||||
resource "aws_budgets_budget" "monthly" {
|
||||
name = "${var.environment}-monthly-budget"
|
||||
budget_type = "COST"
|
||||
limit_amount = var.monthly_budget
|
||||
limit_unit = "USD"
|
||||
time_unit = "MONTHLY"
|
||||
time_period_start = "2024-01-01_00:00"
|
||||
|
||||
cost_filter {
|
||||
name = "TagKeyValue"
|
||||
values = [
|
||||
"Environment$${var.environment}"
|
||||
]
|
||||
}
|
||||
|
||||
notification {
|
||||
comparison_operator = "GREATER_THAN"
|
||||
threshold = 80
|
||||
threshold_type = "PERCENTAGE"
|
||||
notification_type = "ACTUAL"
|
||||
subscriber_email_addresses = [var.budget_alert_email]
|
||||
}
|
||||
|
||||
notification {
|
||||
comparison_operator = "GREATER_THAN"
|
||||
threshold = 100
|
||||
threshold_type = "PERCENTAGE"
|
||||
notification_type = "ACTUAL"
|
||||
subscriber_email_addresses = [var.budget_alert_email]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Cost Anomaly Detection
|
||||
|
||||
```hcl
|
||||
resource "aws_ce_anomaly_monitor" "service" {
|
||||
name = "${var.environment}-service-monitor"
|
||||
monitor_type = "DIMENSIONAL"
|
||||
monitor_dimension = "SERVICE"
|
||||
}
|
||||
|
||||
resource "aws_ce_anomaly_subscription" "alerts" {
|
||||
name = "${var.environment}-anomaly-alerts"
|
||||
frequency = "DAILY"
|
||||
|
||||
monitor_arn_list = [
|
||||
aws_ce_anomaly_monitor.service.arn
|
||||
]
|
||||
|
||||
subscriber {
|
||||
type = "EMAIL"
|
||||
address = var.cost_alert_email
|
||||
}
|
||||
|
||||
threshold_expression {
|
||||
dimension {
|
||||
key = "ANOMALY_TOTAL_IMPACT_ABSOLUTE"
|
||||
values = ["100"] # Alert on $100+ anomalies
|
||||
match_options = ["GREATER_THAN_OR_EQUAL"]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Multi-Cloud Considerations
|
||||
|
||||
### Azure Cost Optimization
|
||||
|
||||
**Use Azure Hybrid Benefit:**
|
||||
```hcl
|
||||
resource "azurerm_linux_virtual_machine" "main" {
|
||||
# ... configuration ...
|
||||
|
||||
# Use Azure Hybrid Benefit for licensing savings
|
||||
license_type = "RHEL_BYOS" # or "SLES_BYOS"
|
||||
}
|
||||
```
|
||||
|
||||
**Azure Reserved Instances (outside Terraform):**
|
||||
- Purchase through Azure Portal
|
||||
- Tag VMs with `ReservationGroup` for planning
|
||||
|
||||
### GCP Cost Optimization
|
||||
|
||||
**Use committed use discounts:**
|
||||
```hcl
|
||||
resource "google_compute_instance" "main" {
|
||||
# ... configuration ...
|
||||
|
||||
# Use committed use discount
|
||||
scheduling {
|
||||
automatic_restart = true
|
||||
on_host_maintenance = "MIGRATE"
|
||||
preemptible = var.environment != "prod" # Preemptible for non-prod
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**GCP Preemptible VMs:**
|
||||
```hcl
|
||||
resource "google_compute_instance_template" "preemptible" {
|
||||
machine_type = "n1-standard-1"
|
||||
|
||||
scheduling {
|
||||
automatic_restart = false
|
||||
on_host_maintenance = "TERMINATE"
|
||||
preemptible = true # Up to 80% cost reduction
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Cost Optimization Checklist
|
||||
|
||||
### Before Deployment
|
||||
- [ ] Right-size compute resources (start small)
|
||||
- [ ] Use appropriate storage tiers
|
||||
- [ ] Enable auto-scaling instead of over-provisioning
|
||||
- [ ] Implement tagging strategy
|
||||
- [ ] Configure lifecycle policies
|
||||
- [ ] Set up VPC endpoints for AWS services
|
||||
|
||||
### After Deployment
|
||||
- [ ] Monitor actual usage vs. provisioned capacity
|
||||
- [ ] Review cost allocation tags
|
||||
- [ ] Identify reservation opportunities
|
||||
- [ ] Configure budget alerts
|
||||
- [ ] Enable cost anomaly detection
|
||||
- [ ] Schedule non-production resource shutdown
|
||||
|
||||
### Ongoing
|
||||
- [ ] Monthly cost review
|
||||
- [ ] Quarterly right-sizing analysis
|
||||
- [ ] Annual reservation review
|
||||
- [ ] Remove unused resources
|
||||
- [ ] Optimize data transfer patterns
|
||||
- [ ] Update instance families (new generations are often cheaper)
|
||||
|
||||
---
|
||||
|
||||
## Cost Estimation Tools
|
||||
|
||||
### Use `infracost` in CI/CD
|
||||
|
||||
```bash
|
||||
# Install infracost
|
||||
curl -fsSL https://raw.githubusercontent.com/infracost/infracost/master/scripts/install.sh | sh
|
||||
|
||||
# Generate cost estimate
|
||||
infracost breakdown --path .
|
||||
|
||||
# Compare cost changes in PR
|
||||
infracost diff --path . --compare-to tfplan.json
|
||||
```
|
||||
|
||||
### Terraform Cloud Cost Estimation
|
||||
|
||||
Enable in Terraform Cloud workspace settings for automatic cost estimates on every plan.
|
||||
|
||||
---
|
||||
|
||||
## Additional Resources
|
||||
|
||||
- AWS Cost Optimization: https://aws.amazon.com/pricing/cost-optimization/
|
||||
- Azure Cost Management: https://azure.microsoft.com/en-us/products/cost-management/
|
||||
- GCP Cost Management: https://cloud.google.com/cost-management
|
||||
- Infracost: https://www.infracost.io/
|
||||
- Cloud Cost Optimization Tools: Kubecost, CloudHealth, CloudCheckr
|
||||
635
skills/references/troubleshooting.md
Normal file
635
skills/references/troubleshooting.md
Normal file
@@ -0,0 +1,635 @@
|
||||
# Terraform Troubleshooting Guide
|
||||
|
||||
Common Terraform and Terragrunt issues with solutions.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
1. [State Issues](#state-issues)
|
||||
2. [Provider Issues](#provider-issues)
|
||||
3. [Resource Errors](#resource-errors)
|
||||
4. [Module Issues](#module-issues)
|
||||
5. [Terragrunt Specific](#terragrunt-specific)
|
||||
6. [Performance Issues](#performance-issues)
|
||||
|
||||
---
|
||||
|
||||
## State Issues
|
||||
|
||||
### State Lock Error
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error locking state: Error acquiring the state lock
|
||||
Lock Info:
|
||||
ID: abc123...
|
||||
Path: terraform.tfstate
|
||||
Operation: OperationTypeApply
|
||||
Who: user@hostname
|
||||
Created: 2024-01-15 10:30:00 UTC
|
||||
```
|
||||
|
||||
**Common Causes:**
|
||||
1. Previous operation crashed or was interrupted
|
||||
2. Another user/process is running terraform
|
||||
3. State lock wasn't released properly
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Verify no one else is running terraform:**
|
||||
```bash
|
||||
# Check with team first!
|
||||
```
|
||||
|
||||
2. **Force unlock (use with caution):**
|
||||
```bash
|
||||
terraform force-unlock abc123
|
||||
```
|
||||
|
||||
3. **For DynamoDB backend, check lock table:**
|
||||
```bash
|
||||
aws dynamodb get-item \
|
||||
--table-name terraform-state-lock \
|
||||
--key '{"LockID": {"S": "path/to/state/terraform.tfstate-md5"}}'
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Use proper state locking backend (S3 + DynamoDB)
|
||||
- Implement timeout in CI/CD pipelines
|
||||
- Always let terraform complete or properly cancel
|
||||
|
||||
---
|
||||
|
||||
### State Drift Detected
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Note: Objects have changed outside of Terraform
|
||||
|
||||
Terraform detected the following changes made outside of Terraform
|
||||
since the last "terraform apply":
|
||||
```
|
||||
|
||||
**Common Causes:**
|
||||
1. Manual changes in AWS console
|
||||
2. Another tool modifying resources
|
||||
3. Auto-scaling or auto-remediation
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Review the drift:**
|
||||
```bash
|
||||
terraform plan -detailed-exitcode
|
||||
```
|
||||
|
||||
2. **Options:**
|
||||
- **Import changes:** Update terraform to match reality
|
||||
- **Revert changes:** Apply terraform to restore desired state
|
||||
- **Refresh state:** `terraform apply -refresh-only`
|
||||
|
||||
3. **Import specific changes:**
|
||||
```bash
|
||||
# Update your .tf files, then:
|
||||
terraform plan # Verify it matches
|
||||
terraform apply
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Implement policy to prevent manual changes
|
||||
- Use AWS Config rules to detect drift
|
||||
- Regular `terraform plan` to catch drift early
|
||||
- Consider using Terraform Cloud drift detection
|
||||
|
||||
---
|
||||
|
||||
### State Corruption
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Failed to load state
|
||||
Error: state snapshot was created by Terraform v1.5.0,
|
||||
which is newer than current v1.3.0
|
||||
```
|
||||
|
||||
**Common Causes:**
|
||||
1. Using different Terraform versions
|
||||
2. State file manually edited
|
||||
3. Incomplete state upload
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Version mismatch:**
|
||||
```bash
|
||||
# Upgrade to matching version
|
||||
tfenv install 1.5.0
|
||||
tfenv use 1.5.0
|
||||
```
|
||||
|
||||
2. **Restore from backup:**
|
||||
```bash
|
||||
# For S3 backend with versioning
|
||||
aws s3api list-object-versions \
|
||||
--bucket terraform-state \
|
||||
--prefix prod/terraform.tfstate
|
||||
|
||||
# Restore specific version
|
||||
aws s3api get-object \
|
||||
--bucket terraform-state \
|
||||
--key prod/terraform.tfstate \
|
||||
--version-id VERSION_ID \
|
||||
terraform.tfstate
|
||||
```
|
||||
|
||||
3. **Rebuild state (last resort):**
|
||||
```bash
|
||||
# Remove corrupted state
|
||||
terraform state rm aws_instance.example
|
||||
|
||||
# Re-import resources
|
||||
terraform import aws_instance.example i-1234567890abcdef0
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Pin Terraform version in `versions.tf`
|
||||
- Enable S3 versioning for state bucket
|
||||
- Never manually edit state files
|
||||
- Use consistent Terraform versions across team
|
||||
|
||||
---
|
||||
|
||||
## Provider Issues
|
||||
|
||||
### Provider Version Conflict
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Incompatible provider version
|
||||
|
||||
Provider registry.terraform.io/hashicorp/aws v5.0.0 does not have
|
||||
a package available for your current platform
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Specify version constraints:**
|
||||
```hcl
|
||||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.67.0" # Use compatible version
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **Clean provider cache:**
|
||||
```bash
|
||||
rm -rf .terraform
|
||||
terraform init -upgrade
|
||||
```
|
||||
|
||||
3. **Lock file sync:**
|
||||
```bash
|
||||
terraform providers lock \
|
||||
-platform=darwin_amd64 \
|
||||
-platform=darwin_arm64 \
|
||||
-platform=linux_amd64
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Authentication Failures
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: error configuring Terraform AWS Provider:
|
||||
no valid credential sources found
|
||||
```
|
||||
|
||||
**Common Causes:**
|
||||
1. Missing AWS credentials
|
||||
2. Expired credentials
|
||||
3. Incorrect IAM permissions
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Verify credentials:**
|
||||
```bash
|
||||
aws sts get-caller-identity
|
||||
```
|
||||
|
||||
2. **Check credential order:**
|
||||
- Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
||||
- Shared credentials file (~/.aws/credentials)
|
||||
- IAM role (for EC2/ECS)
|
||||
|
||||
3. **Configure provider:**
|
||||
```hcl
|
||||
provider "aws" {
|
||||
region = "us-east-1"
|
||||
|
||||
# Option 1: Use profile
|
||||
profile = "production"
|
||||
|
||||
# Option 2: Assume role
|
||||
assume_role {
|
||||
role_arn = "arn:aws:iam::ACCOUNT:role/TerraformRole"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. **Check IAM permissions:**
|
||||
```bash
|
||||
# Test specific permission
|
||||
aws ec2 describe-instances --dry-run
|
||||
```
|
||||
|
||||
**Prevention:**
|
||||
- Use IAM roles in CI/CD
|
||||
- Implement OIDC for GitHub Actions
|
||||
- Regular credential rotation
|
||||
- Use AWS SSO for developers
|
||||
|
||||
---
|
||||
|
||||
## Resource Errors
|
||||
|
||||
### Resource Already Exists
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: creating EC2 Instance: EntityAlreadyExists:
|
||||
Resource with id 'i-1234567890abcdef0' already exists
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Import existing resource:**
|
||||
```bash
|
||||
terraform import aws_instance.web i-1234567890abcdef0
|
||||
```
|
||||
|
||||
2. **Verify configuration matches:**
|
||||
```bash
|
||||
terraform plan # Should show no changes after import
|
||||
```
|
||||
|
||||
3. **If configuration differs, update it:**
|
||||
```hcl
|
||||
resource "aws_instance" "web" {
|
||||
ami = "ami-abc123" # Match existing
|
||||
instance_type = "t3.micro" # Match existing
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Dependency Errors
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: resource depends on resource "aws_vpc.main" that
|
||||
is not declared in the configuration
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Add explicit dependency:**
|
||||
```hcl
|
||||
resource "aws_subnet" "private" {
|
||||
vpc_id = aws_vpc.main.id
|
||||
|
||||
depends_on = [
|
||||
aws_internet_gateway.main # Explicit dependency
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
2. **Use data sources for existing resources:**
|
||||
```hcl
|
||||
data "aws_vpc" "existing" {
|
||||
id = "vpc-12345678"
|
||||
}
|
||||
|
||||
resource "aws_subnet" "new" {
|
||||
vpc_id = data.aws_vpc.existing.id
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Timeout Errors
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: timeout while waiting for state to become 'available'
|
||||
(last state: 'pending', timeout: 10m0s)
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Increase timeout:**
|
||||
```hcl
|
||||
resource "aws_db_instance" "main" {
|
||||
# ... configuration ...
|
||||
|
||||
timeouts {
|
||||
create = "60m"
|
||||
update = "60m"
|
||||
delete = "60m"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
2. **Check resource status manually:**
|
||||
```bash
|
||||
aws rds describe-db-instances --db-instance-identifier mydb
|
||||
```
|
||||
|
||||
3. **Retry the operation:**
|
||||
```bash
|
||||
terraform apply
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Module Issues
|
||||
|
||||
### Module Source Not Found
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Failed to download module
|
||||
|
||||
Could not download module "vpc" (main.tf:10) source:
|
||||
git::https://github.com/company/terraform-modules.git//vpc
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Verify source URL:**
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "git::https://github.com/company/terraform-modules.git//vpc?ref=v1.0.0"
|
||||
# Add authentication if private repo
|
||||
}
|
||||
```
|
||||
|
||||
2. **For private repos, configure Git auth:**
|
||||
```bash
|
||||
# SSH key
|
||||
git config --global url."git@github.com:".insteadOf "https://github.com/"
|
||||
|
||||
# Or use HTTPS with token
|
||||
git config --global url."https://oauth2:TOKEN@github.com/".insteadOf "https://github.com/"
|
||||
```
|
||||
|
||||
3. **Clear module cache:**
|
||||
```bash
|
||||
rm -rf .terraform/modules
|
||||
terraform init
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Module Version Conflicts
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Inconsistent dependency lock file
|
||||
|
||||
Module has dependencies locked at version 1.0.0 but
|
||||
root module requires version 2.0.0
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Update lock file:**
|
||||
```bash
|
||||
terraform init -upgrade
|
||||
```
|
||||
|
||||
2. **Pin module version:**
|
||||
```hcl
|
||||
module "vpc" {
|
||||
source = "terraform-aws-modules/vpc/aws"
|
||||
version = "~> 3.0" # Compatible with 3.x
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Terragrunt Specific
|
||||
|
||||
### Dependency Cycle Detected
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Dependency cycle detected:
|
||||
module-a depends on module-b
|
||||
module-b depends on module-c
|
||||
module-c depends on module-a
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Review dependencies in terragrunt.hcl:**
|
||||
```hcl
|
||||
dependency "vpc" {
|
||||
config_path = "../vpc"
|
||||
}
|
||||
|
||||
dependency "database" {
|
||||
config_path = "../database"
|
||||
}
|
||||
|
||||
# Don't create circular references!
|
||||
```
|
||||
|
||||
2. **Refactor to remove cycle:**
|
||||
- Split modules differently
|
||||
- Use data sources instead of dependencies
|
||||
- Pass values through variables
|
||||
|
||||
3. **Use mock outputs during planning:**
|
||||
```hcl
|
||||
dependency "vpc" {
|
||||
config_path = "../vpc"
|
||||
|
||||
mock_outputs = {
|
||||
vpc_id = "vpc-mock"
|
||||
}
|
||||
mock_outputs_allowed_terraform_commands = ["validate", "plan"]
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Hook Failures
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Hook execution failed
|
||||
Command: pre_apply_hook.sh
|
||||
Exit code: 1
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Debug the hook:**
|
||||
```bash
|
||||
# Run hook manually
|
||||
bash .terragrunt-cache/.../pre_apply_hook.sh
|
||||
```
|
||||
|
||||
2. **Add error handling to hook:**
|
||||
```bash
|
||||
#!/bin/bash
|
||||
set -e # Exit on error
|
||||
|
||||
# Your hook logic
|
||||
if ! command -v jq &> /dev/null; then
|
||||
echo "jq is required but not installed"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
3. **Make hook executable:**
|
||||
```bash
|
||||
chmod +x hooks/pre_apply_hook.sh
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Include Path Issues
|
||||
|
||||
**Symptom:**
|
||||
```
|
||||
Error: Cannot include file
|
||||
Path does not exist: ../common.hcl
|
||||
```
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Use correct relative path:**
|
||||
```hcl
|
||||
include "root" {
|
||||
path = find_in_parent_folders()
|
||||
}
|
||||
|
||||
include "common" {
|
||||
path = "${get_terragrunt_dir()}/../common.hcl"
|
||||
}
|
||||
```
|
||||
|
||||
2. **Verify file exists:**
|
||||
```bash
|
||||
ls -la ../common.hcl
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Performance Issues
|
||||
|
||||
### Slow Plans/Applies
|
||||
|
||||
**Symptoms:**
|
||||
- `terraform plan` takes >5 minutes
|
||||
- `terraform apply` very slow
|
||||
- State operations timing out
|
||||
|
||||
**Common Causes:**
|
||||
1. Too many resources in single state
|
||||
2. Slow provider API calls
|
||||
3. Large number of data sources
|
||||
4. Complex interpolations
|
||||
|
||||
**Resolution:**
|
||||
|
||||
1. **Split state files:**
|
||||
```
|
||||
networking/ # Separate state
|
||||
compute/ # Separate state
|
||||
database/ # Separate state
|
||||
```
|
||||
|
||||
2. **Use targeted operations:**
|
||||
```bash
|
||||
terraform plan -target=aws_instance.web
|
||||
terraform apply -target=module.vpc
|
||||
```
|
||||
|
||||
3. **Optimize data sources:**
|
||||
```hcl
|
||||
# Bad - queries every plan
|
||||
data "aws_ami" "ubuntu" {
|
||||
most_recent = true
|
||||
# ... filters
|
||||
}
|
||||
|
||||
# Better - use specific AMI
|
||||
variable "ami_id" {
|
||||
default = "ami-abc123" # Update periodically
|
||||
}
|
||||
```
|
||||
|
||||
4. **Enable parallelism:**
|
||||
```bash
|
||||
terraform apply -parallelism=20 # Default is 10
|
||||
```
|
||||
|
||||
5. **Use caching (Terragrunt):**
|
||||
```hcl
|
||||
remote_state {
|
||||
backend = "s3"
|
||||
config = {
|
||||
skip_credentials_validation = true # Faster
|
||||
skip_metadata_api_check = true
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Quick Diagnostic Steps
|
||||
|
||||
When encountering any Terraform error:
|
||||
|
||||
1. **Read the full error message** - Don't skip details
|
||||
2. **Check recent changes** - What changed since last successful run?
|
||||
3. **Verify versions** - Terraform, providers, modules
|
||||
4. **Check state** - Is it locked? Corrupted?
|
||||
5. **Test authentication** - Can you access resources manually?
|
||||
6. **Review logs** - Use TF_LOG=DEBUG for detailed output
|
||||
7. **Isolate the problem** - Use -target to test specific resources
|
||||
|
||||
### Enable Debug Logging
|
||||
|
||||
```bash
|
||||
export TF_LOG=DEBUG
|
||||
export TF_LOG_PATH=terraform-debug.log
|
||||
terraform plan
|
||||
```
|
||||
|
||||
### Test Configuration
|
||||
|
||||
```bash
|
||||
terraform validate # Syntax check
|
||||
terraform fmt -check # Format check
|
||||
tflint # Linting
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Prevention Checklist
|
||||
|
||||
- [ ] Use remote state with locking
|
||||
- [ ] Pin Terraform and provider versions
|
||||
- [ ] Implement pre-commit hooks
|
||||
- [ ] Run plan before every apply
|
||||
- [ ] Use modules for reusable components
|
||||
- [ ] Enable state versioning/backups
|
||||
- [ ] Document architecture and dependencies
|
||||
- [ ] Implement CI/CD with proper reviews
|
||||
- [ ] Regular terraform plan in CI to detect drift
|
||||
- [ ] Monitor and alert on state changes
|
||||
319
skills/scripts/init_module.py
Executable file
319
skills/scripts/init_module.py
Executable file
@@ -0,0 +1,319 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Terraform Module Scaffolding Tool
|
||||
Creates a new Terraform module with proper structure and template files
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
# Template content for module files
|
||||
TEMPLATES = {
|
||||
"main.tf": '''# {module_name} module - Main configuration
|
||||
|
||||
resource "example_resource" "main" {{
|
||||
# TODO: Replace with actual resources
|
||||
name = var.name
|
||||
|
||||
tags = merge(
|
||||
var.tags,
|
||||
{{
|
||||
Module = "{module_name}"
|
||||
}}
|
||||
)
|
||||
}}
|
||||
''',
|
||||
|
||||
"variables.tf": '''# Input variables for {module_name} module
|
||||
|
||||
variable "name" {{
|
||||
type = string
|
||||
description = "Name for the {module_name} resources"
|
||||
}}
|
||||
|
||||
variable "tags" {{
|
||||
type = map(string)
|
||||
description = "Common tags to apply to all resources"
|
||||
default = {{}}
|
||||
}}
|
||||
|
||||
variable "environment" {{
|
||||
type = string
|
||||
description = "Environment name (dev, staging, prod)"
|
||||
|
||||
validation {{
|
||||
condition = contains(["dev", "staging", "prod"], var.environment)
|
||||
error_message = "Environment must be dev, staging, or prod."
|
||||
}}
|
||||
}}
|
||||
''',
|
||||
|
||||
"outputs.tf": '''# Output values for {module_name} module
|
||||
|
||||
output "id" {{
|
||||
description = "ID of the created {module_name} resource"
|
||||
value = example_resource.main.id
|
||||
}}
|
||||
|
||||
output "arn" {{
|
||||
description = "ARN of the created {module_name} resource"
|
||||
value = example_resource.main.arn
|
||||
}}
|
||||
''',
|
||||
|
||||
"versions.tf": '''# Provider and Terraform version constraints
|
||||
|
||||
terraform {{
|
||||
required_version = ">= 1.5.0"
|
||||
|
||||
required_providers {{
|
||||
aws = {{
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
''',
|
||||
|
||||
"README.md": '''# {module_title} Module
|
||||
|
||||
Terraform module for managing {module_name}.
|
||||
|
||||
## Usage
|
||||
|
||||
```hcl
|
||||
module "{module_name}" {{
|
||||
source = "./modules/{module_name}"
|
||||
|
||||
name = "example"
|
||||
environment = "dev"
|
||||
|
||||
tags = {{
|
||||
Project = "MyProject"
|
||||
Owner = "TeamName"
|
||||
}}
|
||||
}}
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
| Name | Version |
|
||||
|------|---------|
|
||||
| terraform | >= 1.5.0 |
|
||||
| aws | ~> 5.0 |
|
||||
|
||||
## Inputs
|
||||
|
||||
| Name | Description | Type | Default | Required |
|
||||
|------|-------------|------|---------|:--------:|
|
||||
| name | Name for the {module_name} resources | `string` | n/a | yes |
|
||||
| environment | Environment name (dev, staging, prod) | `string` | n/a | yes |
|
||||
| tags | Common tags to apply to all resources | `map(string)` | `{{}}` | no |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Name | Description |
|
||||
|------|-------------|
|
||||
| id | ID of the created {module_name} resource |
|
||||
| arn | ARN of the created {module_name} resource |
|
||||
|
||||
## Examples
|
||||
|
||||
See the `examples/` directory for complete usage examples.
|
||||
|
||||
---
|
||||
|
||||
<!-- BEGIN_TF_DOCS -->
|
||||
<!-- Run: terraform-docs markdown . > README.md -->
|
||||
<!-- END_TF_DOCS -->
|
||||
''',
|
||||
|
||||
"examples/complete/main.tf": '''# Complete example for {module_name} module
|
||||
|
||||
module "{module_name}" {{
|
||||
source = "../.."
|
||||
|
||||
name = "example-{module_name}"
|
||||
environment = "dev"
|
||||
|
||||
tags = {{
|
||||
Project = "Example"
|
||||
Environment = "dev"
|
||||
ManagedBy = "Terraform"
|
||||
}}
|
||||
}}
|
||||
|
||||
output "{module_name}_id" {{
|
||||
description = "ID of the {module_name}"
|
||||
value = module.{module_name}.id
|
||||
}}
|
||||
''',
|
||||
|
||||
"examples/complete/versions.tf": '''terraform {{
|
||||
required_version = ">= 1.5.0"
|
||||
|
||||
required_providers {{
|
||||
aws = {{
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 5.0"
|
||||
}}
|
||||
}}
|
||||
}}
|
||||
|
||||
provider "aws" {{
|
||||
region = "us-east-1"
|
||||
}}
|
||||
''',
|
||||
|
||||
"examples/complete/README.md": '''# Complete Example
|
||||
|
||||
This example demonstrates the complete usage of the {module_name} module with all available options.
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
terraform init
|
||||
terraform plan
|
||||
terraform apply
|
||||
```
|
||||
|
||||
## Cleanup
|
||||
|
||||
```bash
|
||||
terraform destroy
|
||||
```
|
||||
'''
|
||||
}
|
||||
|
||||
|
||||
def create_module_structure(module_name: str, base_path: str = ".") -> Dict:
|
||||
"""
|
||||
Create the module directory structure with template files
|
||||
|
||||
Args:
|
||||
module_name: Name of the module
|
||||
base_path: Base directory where module should be created
|
||||
|
||||
Returns:
|
||||
Dict with status and details
|
||||
"""
|
||||
result = {
|
||||
"success": False,
|
||||
"module_name": module_name,
|
||||
"path": None,
|
||||
"files_created": [],
|
||||
"errors": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Create base module directory
|
||||
module_path = Path(base_path) / module_name
|
||||
if module_path.exists():
|
||||
result["errors"].append(f"Module directory already exists: {module_path}")
|
||||
return result
|
||||
|
||||
module_path.mkdir(parents=True, exist_ok=True)
|
||||
result["path"] = str(module_path.absolute())
|
||||
|
||||
# Create examples directory
|
||||
examples_path = module_path / "examples" / "complete"
|
||||
examples_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Format module name for titles (replace hyphens with spaces, capitalize)
|
||||
module_title = module_name.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
# Create files from templates
|
||||
for filename, template in TEMPLATES.items():
|
||||
file_path = module_path / filename
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
content = template.format(
|
||||
module_name=module_name,
|
||||
module_title=module_title
|
||||
)
|
||||
|
||||
file_path.write_text(content)
|
||||
result["files_created"].append(str(file_path.relative_to(base_path)))
|
||||
|
||||
result["success"] = True
|
||||
|
||||
except Exception as e:
|
||||
result["errors"].append(f"Error creating module: {str(e)}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Initialize a new Terraform module with standard structure",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Create module in current directory
|
||||
%(prog)s my-vpc
|
||||
|
||||
# Create module in specific path
|
||||
%(prog)s my-vpc --path ./modules
|
||||
|
||||
# Output as JSON
|
||||
%(prog)s my-vpc --json
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"module_name",
|
||||
help="Name of the module to create (use lowercase with hyphens)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--path",
|
||||
default=".",
|
||||
help="Base directory where module should be created (default: current directory)"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
help="Output result as JSON"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate module name
|
||||
if not args.module_name.replace("-", "").replace("_", "").isalnum():
|
||||
print("Error: Module name should only contain lowercase letters, numbers, hyphens, and underscores",
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Create module
|
||||
result = create_module_structure(args.module_name, args.path)
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(result, indent=2))
|
||||
else:
|
||||
if result["success"]:
|
||||
print(f"✅ Successfully created module: {args.module_name}")
|
||||
print(f"📁 Location: {result['path']}")
|
||||
print(f"\n📝 Created {len(result['files_created'])} files:")
|
||||
for file in result["files_created"]:
|
||||
print(f" - {file}")
|
||||
print(f"\n🚀 Next steps:")
|
||||
print(f" 1. cd {result['path']}")
|
||||
print(f" 2. Update main.tf with your resources")
|
||||
print(f" 3. Run: terraform init")
|
||||
print(f" 4. Run: terraform validate")
|
||||
print(f" 5. Generate docs: terraform-docs markdown . > README.md")
|
||||
else:
|
||||
print(f"❌ Failed to create module: {args.module_name}", file=sys.stderr)
|
||||
for error in result["errors"]:
|
||||
print(f" Error: {error}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
sys.exit(0 if result["success"] else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
232
skills/scripts/inspect_state.py
Executable file
232
skills/scripts/inspect_state.py
Executable file
@@ -0,0 +1,232 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Terraform State Inspector & Drift Detector
|
||||
Analyzes Terraform state and detects configuration drift
|
||||
"""
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from typing import Dict, List, Any
|
||||
from datetime import datetime
|
||||
|
||||
def run_command(cmd: List[str], cwd: str = ".") -> Dict[str, Any]:
|
||||
"""Run a command and return the result"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
cwd=cwd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
return {"success": True, "stdout": result.stdout, "stderr": result.stderr}
|
||||
except subprocess.CalledProcessError as e:
|
||||
return {"success": False, "stdout": e.stdout, "stderr": e.stderr, "returncode": e.returncode}
|
||||
|
||||
def check_state_health(working_dir: str) -> Dict[str, Any]:
|
||||
"""Check the health of the Terraform state"""
|
||||
print("🔍 Checking Terraform state health...\n")
|
||||
|
||||
# Check if state exists
|
||||
result = run_command(["terraform", "state", "list"], working_dir)
|
||||
if not result["success"]:
|
||||
return {
|
||||
"healthy": False,
|
||||
"error": "Unable to read state. Is Terraform initialized?",
|
||||
"details": result["stderr"]
|
||||
}
|
||||
|
||||
resources = result["stdout"].strip().split("\n") if result["stdout"].strip() else []
|
||||
|
||||
return {
|
||||
"healthy": True,
|
||||
"resource_count": len(resources),
|
||||
"resources": resources
|
||||
}
|
||||
|
||||
def detect_drift(working_dir: str) -> Dict[str, Any]:
|
||||
"""Run terraform plan to detect drift"""
|
||||
print("🔄 Detecting configuration drift...\n")
|
||||
|
||||
result = run_command(["terraform", "plan", "-detailed-exitcode", "-no-color"], working_dir)
|
||||
|
||||
# Exit codes: 0 = no changes, 1 = error, 2 = changes detected
|
||||
if result["returncode"] == 0:
|
||||
return {
|
||||
"drift_detected": False,
|
||||
"message": "No drift detected - infrastructure matches configuration"
|
||||
}
|
||||
elif result["returncode"] == 2:
|
||||
return {
|
||||
"drift_detected": True,
|
||||
"message": "Drift detected - infrastructure differs from configuration",
|
||||
"plan_output": result["stdout"]
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"error": True,
|
||||
"message": "Error running terraform plan",
|
||||
"details": result["stderr"]
|
||||
}
|
||||
|
||||
def analyze_state_resources(working_dir: str) -> Dict[str, Any]:
|
||||
"""Analyze resources in the state file"""
|
||||
print("📊 Analyzing state resources...\n")
|
||||
|
||||
result = run_command(["terraform", "show", "-json"], working_dir)
|
||||
if not result["success"]:
|
||||
return {"error": "Unable to read state JSON", "details": result["stderr"]}
|
||||
|
||||
try:
|
||||
state_data = json.loads(result["stdout"])
|
||||
except json.JSONDecodeError:
|
||||
return {"error": "Unable to parse state JSON"}
|
||||
|
||||
resources = state_data.get("values", {}).get("root_module", {}).get("resources", [])
|
||||
|
||||
# Categorize resources by type
|
||||
resource_types = {}
|
||||
for resource in resources:
|
||||
res_type = resource.get("type", "unknown")
|
||||
resource_types[res_type] = resource_types.get(res_type, 0) + 1
|
||||
|
||||
# Identify potentially problematic resources
|
||||
issues = []
|
||||
for resource in resources:
|
||||
# Check for resources with tainted status
|
||||
if resource.get("tainted", False):
|
||||
issues.append(f"⚠️ Resource {resource['address']} is tainted")
|
||||
|
||||
return {
|
||||
"total_resources": len(resources),
|
||||
"resource_types": resource_types,
|
||||
"issues": issues
|
||||
}
|
||||
|
||||
def check_provider_versions(working_dir: str) -> Dict[str, Any]:
|
||||
"""Check provider versions and constraints"""
|
||||
print("📦 Checking provider versions...\n")
|
||||
|
||||
result = run_command(["terraform", "version", "-json"], working_dir)
|
||||
if not result["success"]:
|
||||
return {"error": "Unable to get version info"}
|
||||
|
||||
try:
|
||||
version_data = json.loads(result["stdout"])
|
||||
return {
|
||||
"terraform_version": version_data.get("terraform_version"),
|
||||
"provider_versions": version_data.get("provider_selections", {})
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
return {"error": "Unable to parse version JSON"}
|
||||
|
||||
def check_backend_config(working_dir: str) -> Dict[str, Any]:
|
||||
"""Check backend configuration"""
|
||||
print("🗄️ Checking backend configuration...\n")
|
||||
|
||||
result = run_command(["terraform", "show", "-json"], working_dir)
|
||||
if not result["success"]:
|
||||
return {"error": "Unable to read backend config"}
|
||||
|
||||
try:
|
||||
state_data = json.loads(result["stdout"])
|
||||
backend = state_data.get("values", {}).get("backend", {})
|
||||
|
||||
return {
|
||||
"backend_type": backend.get("type", "local"),
|
||||
"config": backend.get("config", {})
|
||||
}
|
||||
except json.JSONDecodeError:
|
||||
return {"error": "Unable to parse backend config"}
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: inspect_state.py <terraform-directory> [--check-drift]")
|
||||
sys.exit(1)
|
||||
|
||||
working_dir = sys.argv[1]
|
||||
check_drift_flag = "--check-drift" in sys.argv
|
||||
|
||||
print("=" * 70)
|
||||
print("🏗️ TERRAFORM STATE INSPECTOR")
|
||||
print("=" * 70)
|
||||
print(f"Working Directory: {working_dir}")
|
||||
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
|
||||
# Check state health
|
||||
state_health = check_state_health(working_dir)
|
||||
if not state_health.get("healthy"):
|
||||
print(f"❌ State Health: UNHEALTHY")
|
||||
print(f" Error: {state_health.get('error')}")
|
||||
print(f" Details: {state_health.get('details')}")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"✅ State Health: HEALTHY")
|
||||
print(f" Total Resources: {state_health['resource_count']}\n")
|
||||
|
||||
# Check provider versions
|
||||
versions = check_provider_versions(working_dir)
|
||||
if "error" not in versions:
|
||||
print(f"📦 Terraform Version: {versions['terraform_version']}")
|
||||
print(f" Providers:")
|
||||
for provider, version in versions.get('provider_versions', {}).items():
|
||||
print(f" • {provider}: {version}")
|
||||
print()
|
||||
|
||||
# Check backend
|
||||
backend = check_backend_config(working_dir)
|
||||
if "error" not in backend:
|
||||
print(f"🗄️ Backend Type: {backend['backend_type']}")
|
||||
if backend['backend_type'] != 'local':
|
||||
print(f" Configuration: {backend.get('config', {})}")
|
||||
print()
|
||||
|
||||
# Analyze resources
|
||||
analysis = analyze_state_resources(working_dir)
|
||||
if "error" not in analysis:
|
||||
print(f"📊 Resource Analysis:")
|
||||
print(f" Total Resources: {analysis['total_resources']}")
|
||||
print(f" Resource Types:")
|
||||
for res_type, count in sorted(analysis['resource_types'].items()):
|
||||
print(f" • {res_type}: {count}")
|
||||
|
||||
if analysis['issues']:
|
||||
print(f"\n ⚠️ Issues Found:")
|
||||
for issue in analysis['issues']:
|
||||
print(f" {issue}")
|
||||
else:
|
||||
print(f"\n ✅ No issues detected")
|
||||
print()
|
||||
|
||||
# Check for drift if requested
|
||||
if check_drift_flag:
|
||||
drift = detect_drift(working_dir)
|
||||
if drift.get("error"):
|
||||
print(f"❌ Drift Detection Failed:")
|
||||
print(f" {drift['message']}")
|
||||
print(f" {drift.get('details', '')}")
|
||||
elif drift.get("drift_detected"):
|
||||
print(f"⚠️ DRIFT DETECTED")
|
||||
print(f" {drift['message']}")
|
||||
print(f"\n Run 'terraform plan' for detailed differences")
|
||||
else:
|
||||
print(f"✅ No Drift Detected")
|
||||
print(f" {drift['message']}")
|
||||
print()
|
||||
|
||||
print("=" * 70)
|
||||
print("✅ State inspection complete!")
|
||||
|
||||
# Recommendations
|
||||
print("\n💡 Recommendations:")
|
||||
if state_health['resource_count'] == 0:
|
||||
print(" • No resources in state - consider running 'terraform apply'")
|
||||
if backend.get('backend_type') == 'local':
|
||||
print(" • Using local backend - consider remote backend for team collaboration")
|
||||
if not check_drift_flag:
|
||||
print(" • Run with --check-drift flag to detect configuration drift")
|
||||
|
||||
print("\n" + "=" * 70)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
227
skills/scripts/validate_module.py
Executable file
227
skills/scripts/validate_module.py
Executable file
@@ -0,0 +1,227 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Terraform Module Validator
|
||||
Validates Terraform modules against best practices
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Any
|
||||
|
||||
class ModuleValidator:
|
||||
def __init__(self, module_path: str):
|
||||
self.module_path = Path(module_path)
|
||||
self.issues = []
|
||||
self.warnings = []
|
||||
self.suggestions = []
|
||||
|
||||
def validate(self) -> Dict[str, Any]:
|
||||
"""Run all validation checks"""
|
||||
print(f"🔍 Validating module: {self.module_path}\n")
|
||||
|
||||
self.check_required_files()
|
||||
self.check_variables_file()
|
||||
self.check_outputs_file()
|
||||
self.check_readme()
|
||||
self.check_versions_file()
|
||||
self.check_examples()
|
||||
self.check_naming_conventions()
|
||||
|
||||
return {
|
||||
"valid": len(self.issues) == 0,
|
||||
"issues": self.issues,
|
||||
"warnings": self.warnings,
|
||||
"suggestions": self.suggestions
|
||||
}
|
||||
|
||||
def check_required_files(self):
|
||||
"""Check for required module files"""
|
||||
required_files = ['main.tf', 'variables.tf', 'outputs.tf']
|
||||
|
||||
for file in required_files:
|
||||
if not (self.module_path / file).exists():
|
||||
self.issues.append(f"Missing required file: {file}")
|
||||
|
||||
def check_variables_file(self):
|
||||
"""Check variables.tf for best practices"""
|
||||
vars_file = self.module_path / 'variables.tf'
|
||||
if not vars_file.exists():
|
||||
return
|
||||
|
||||
content = vars_file.read_text()
|
||||
|
||||
# Check for variable descriptions
|
||||
variable_blocks = re.findall(r'variable\s+"([^"]+)"\s*{([^}]+)}', content, re.DOTALL)
|
||||
|
||||
for var_name, var_content in variable_blocks:
|
||||
if 'description' not in var_content:
|
||||
self.warnings.append(f"Variable '{var_name}' missing description")
|
||||
|
||||
if 'type' not in var_content:
|
||||
self.warnings.append(f"Variable '{var_name}' missing type constraint")
|
||||
|
||||
# Check for sensitive variables without sensitive flag
|
||||
if any(keyword in var_name.lower() for keyword in ['password', 'secret', 'key', 'token']):
|
||||
if 'sensitive' not in var_content or 'sensitive = true' not in var_content:
|
||||
self.warnings.append(f"Variable '{var_name}' appears sensitive but not marked as sensitive")
|
||||
|
||||
def check_outputs_file(self):
|
||||
"""Check outputs.tf for best practices"""
|
||||
outputs_file = self.module_path / 'outputs.tf'
|
||||
if not outputs_file.exists():
|
||||
return
|
||||
|
||||
content = outputs_file.read_text()
|
||||
|
||||
# Check for output descriptions
|
||||
output_blocks = re.findall(r'output\s+"([^"]+)"\s*{([^}]+)}', content, re.DOTALL)
|
||||
|
||||
if len(output_blocks) == 0:
|
||||
self.suggestions.append("Consider adding outputs to expose useful resource attributes")
|
||||
|
||||
for output_name, output_content in output_blocks:
|
||||
if 'description' not in output_content:
|
||||
self.warnings.append(f"Output '{output_name}' missing description")
|
||||
|
||||
# Check for sensitive outputs
|
||||
if any(keyword in output_name.lower() for keyword in ['password', 'secret', 'key', 'token']):
|
||||
if 'sensitive' not in output_content or 'sensitive = true' not in output_content:
|
||||
self.warnings.append(f"Output '{output_name}' appears sensitive but not marked as sensitive")
|
||||
|
||||
def check_readme(self):
|
||||
"""Check for README documentation"""
|
||||
readme_files = ['README.md', 'readme.md', 'README.txt']
|
||||
has_readme = any((self.module_path / f).exists() for f in readme_files)
|
||||
|
||||
if not has_readme:
|
||||
self.issues.append("Missing README.md - modules should be documented")
|
||||
return
|
||||
|
||||
# Find which readme exists
|
||||
readme_path = None
|
||||
for f in readme_files:
|
||||
if (self.module_path / f).exists():
|
||||
readme_path = self.module_path / f
|
||||
break
|
||||
|
||||
if readme_path:
|
||||
content = readme_path.read_text()
|
||||
|
||||
# Check for key sections
|
||||
required_sections = ['Usage', 'Inputs', 'Outputs']
|
||||
for section in required_sections:
|
||||
if section.lower() not in content.lower():
|
||||
self.suggestions.append(f"README missing '{section}' section")
|
||||
|
||||
# Check for examples
|
||||
if 'example' not in content.lower():
|
||||
self.suggestions.append("README should include usage examples")
|
||||
|
||||
def check_versions_file(self):
|
||||
"""Check for versions.tf or terraform block"""
|
||||
versions_file = self.module_path / 'versions.tf'
|
||||
|
||||
# Check versions.tf
|
||||
if versions_file.exists():
|
||||
content = versions_file.read_text()
|
||||
if 'required_version' not in content:
|
||||
self.warnings.append("versions.tf should specify required_version")
|
||||
if 'required_providers' not in content:
|
||||
self.warnings.append("versions.tf should specify required_providers with versions")
|
||||
else:
|
||||
# Check main.tf for terraform block
|
||||
main_file = self.module_path / 'main.tf'
|
||||
if main_file.exists():
|
||||
content = main_file.read_text()
|
||||
if 'terraform' not in content or 'required_version' not in content:
|
||||
self.warnings.append("Module should specify Terraform version requirements")
|
||||
else:
|
||||
self.warnings.append("Consider creating versions.tf to specify version constraints")
|
||||
|
||||
def check_examples(self):
|
||||
"""Check for example usage"""
|
||||
examples_dir = self.module_path / 'examples'
|
||||
|
||||
if not examples_dir.exists():
|
||||
self.suggestions.append("Consider adding 'examples/' directory with usage examples")
|
||||
elif examples_dir.is_dir():
|
||||
example_subdirs = [d for d in examples_dir.iterdir() if d.is_dir()]
|
||||
if len(example_subdirs) == 0:
|
||||
self.suggestions.append("examples/ directory is empty - add example configurations")
|
||||
|
||||
def check_naming_conventions(self):
|
||||
"""Check file and resource naming conventions"""
|
||||
tf_files = list(self.module_path.glob('*.tf'))
|
||||
|
||||
for tf_file in tf_files:
|
||||
# Check for snake_case file names
|
||||
if not re.match(r'^[a-z0-9_]+\.tf$', tf_file.name):
|
||||
self.warnings.append(f"File '{tf_file.name}' should use snake_case naming")
|
||||
|
||||
# Check file content for naming
|
||||
content = tf_file.read_text()
|
||||
|
||||
# Check resource names use snake_case
|
||||
resources = re.findall(r'resource\s+"[^"]+"\s+"([^"]+)"', content)
|
||||
for resource_name in resources:
|
||||
if not re.match(r'^[a-z0-9_]+$', resource_name):
|
||||
self.warnings.append(f"Resource name '{resource_name}' should use snake_case")
|
||||
|
||||
# Check for hard-coded values that should be variables
|
||||
if re.search(r'= "us-east-1"', content):
|
||||
self.suggestions.append("Consider making region configurable via variable")
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: validate_module.py <module-directory>")
|
||||
sys.exit(1)
|
||||
|
||||
module_path = sys.argv[1]
|
||||
|
||||
if not os.path.isdir(module_path):
|
||||
print(f"❌ Error: {module_path} is not a directory")
|
||||
sys.exit(1)
|
||||
|
||||
print("=" * 70)
|
||||
print("🏗️ TERRAFORM MODULE VALIDATOR")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
validator = ModuleValidator(module_path)
|
||||
result = validator.validate()
|
||||
|
||||
# Print results
|
||||
if result['issues']:
|
||||
print("❌ ISSUES (Must Fix):")
|
||||
for issue in result['issues']:
|
||||
print(f" • {issue}")
|
||||
print()
|
||||
|
||||
if result['warnings']:
|
||||
print("⚠️ WARNINGS (Should Fix):")
|
||||
for warning in result['warnings']:
|
||||
print(f" • {warning}")
|
||||
print()
|
||||
|
||||
if result['suggestions']:
|
||||
print("💡 SUGGESTIONS (Consider):")
|
||||
for suggestion in result['suggestions']:
|
||||
print(f" • {suggestion}")
|
||||
print()
|
||||
|
||||
# Summary
|
||||
print("=" * 70)
|
||||
if result['valid']:
|
||||
print("✅ Module validation PASSED!")
|
||||
if not result['warnings'] and not result['suggestions']:
|
||||
print(" No issues, warnings, or suggestions - excellent work!")
|
||||
else:
|
||||
print("❌ Module validation FAILED!")
|
||||
print(f" {len(result['issues'])} issues must be fixed before using this module")
|
||||
print("=" * 70)
|
||||
|
||||
sys.exit(0 if result['valid'] else 1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user