Initial commit
This commit is contained in:
12
.claude-plugin/plugin.json
Normal file
12
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "serverless-eda",
|
||||
"description": "AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging",
|
||||
"version": "0.0.0-2025.11.28",
|
||||
"author": {
|
||||
"name": "Kane Zhu",
|
||||
"email": "me@kane.mx"
|
||||
},
|
||||
"skills": [
|
||||
"./skills/aws-serverless-eda"
|
||||
]
|
||||
}
|
||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# serverless-eda
|
||||
|
||||
AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging
|
||||
68
plugin.lock.json
Normal file
68
plugin.lock.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||
"pluginId": "gh:zxkane/aws-skills:serverless-eda",
|
||||
"normalized": {
|
||||
"repo": null,
|
||||
"ref": "refs/tags/v20251128.0",
|
||||
"commit": "ff0ed7dd84ee38c5963e2be6ddfd74065c81521b",
|
||||
"treeHash": "9375638efaf61fa4e3870fbb415c277069d5ddac44feeb5c77f78f30ed9c22f7",
|
||||
"generatedAt": "2025-11-28T10:29:14.139105Z",
|
||||
"toolVersion": "publish_plugins.py@0.2.0"
|
||||
},
|
||||
"origin": {
|
||||
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||
"branch": "master",
|
||||
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||
},
|
||||
"manifest": {
|
||||
"name": "serverless-eda",
|
||||
"description": "AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging"
|
||||
},
|
||||
"content": {
|
||||
"files": [
|
||||
{
|
||||
"path": "README.md",
|
||||
"sha256": "5a7d0d76f54cbae89f1e7147bd50b25ba333fb16f79afb0442f11d7673b1cf2a"
|
||||
},
|
||||
{
|
||||
"path": ".claude-plugin/plugin.json",
|
||||
"sha256": "18793e20fa1c2d078881402c0942a85186cd8e8c0604ce11318466a35e2d5292"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/SKILL.md",
|
||||
"sha256": "f008433c085a85dc0da9063a7e69545f91eebc1253a9018fe0950473b024a3cd"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/observability-best-practices.md",
|
||||
"sha256": "531977c659a774fec3dabd4c369789d5979bceb83cd4656bfe818929569c9e7a"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/deployment-best-practices.md",
|
||||
"sha256": "f94b18c62732f950a29b3e4cd11134ba397c2cfce4b3bb777ee729c17c9f1268"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/security-best-practices.md",
|
||||
"sha256": "511f6e0921f852947db893ca64c093e15f728f6b4e35c40423a50f1398278261"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/serverless-patterns.md",
|
||||
"sha256": "8b7408d9c98f8224290093acb831468b9a01bd8d17436e301d4383c18c556f2c"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/eda-patterns.md",
|
||||
"sha256": "c3518448e773c0e93d1a1f518d3d8d67475995bf3211ae1cf57cac46447ea6e1"
|
||||
},
|
||||
{
|
||||
"path": "skills/aws-serverless-eda/references/performance-optimization.md",
|
||||
"sha256": "5086493fbeb4c97c1bc891484d6deecbfbc6d02a9422f6854dd8c7274320bfa6"
|
||||
}
|
||||
],
|
||||
"dirSha256": "9375638efaf61fa4e3870fbb415c277069d5ddac44feeb5c77f78f30ed9c22f7"
|
||||
},
|
||||
"security": {
|
||||
"scannedAt": null,
|
||||
"scannerVersion": null,
|
||||
"flags": []
|
||||
}
|
||||
}
|
||||
747
skills/aws-serverless-eda/SKILL.md
Normal file
747
skills/aws-serverless-eda/SKILL.md
Normal file
@@ -0,0 +1,747 @@
|
||||
---
|
||||
name: aws-serverless-eda
|
||||
description: AWS serverless and event-driven architecture expert based on Well-Architected Framework. Use when building serverless APIs, Lambda functions, REST APIs, microservices, or async workflows. Covers Lambda with TypeScript/Python, API Gateway (REST/HTTP), DynamoDB, Step Functions, EventBridge, SQS, SNS, and serverless patterns. Essential when user mentions serverless, Lambda, API Gateway, event-driven, async processing, queues, pub/sub, or wants to build scalable serverless applications with AWS best practices.
|
||||
---
|
||||
|
||||
# AWS Serverless & Event-Driven Architecture
|
||||
|
||||
This skill provides comprehensive guidance for building serverless applications and event-driven architectures on AWS based on Well-Architected Framework principles.
|
||||
|
||||
## Integrated MCP Servers
|
||||
|
||||
This skill includes 5 MCP servers for serverless development:
|
||||
|
||||
### AWS Documentation MCP Server
|
||||
**When to use**: Always verify AWS service information before implementation
|
||||
- Search AWS documentation for latest features and best practices
|
||||
- Check regional availability of AWS services
|
||||
- Verify service limits and quotas
|
||||
- Confirm API specifications and parameters
|
||||
- Access up-to-date AWS service information
|
||||
|
||||
### AWS Serverless MCP Server
|
||||
**Purpose**: Complete serverless application lifecycle with SAM CLI
|
||||
- Initialize new serverless applications
|
||||
- Deploy serverless applications
|
||||
- Test Lambda functions locally
|
||||
- Generate SAM templates
|
||||
- Manage serverless application lifecycle
|
||||
|
||||
### AWS Lambda Tool MCP Server
|
||||
**Purpose**: Execute Lambda functions as tools
|
||||
- Invoke Lambda functions directly
|
||||
- Test Lambda integrations
|
||||
- Execute workflows requiring private resource access
|
||||
- Run Lambda-based automation
|
||||
|
||||
### AWS Step Functions MCP Server
|
||||
**Purpose**: Execute complex workflows and orchestration
|
||||
- Create and manage state machines
|
||||
- Execute workflow orchestrations
|
||||
- Handle distributed transactions
|
||||
- Implement saga patterns
|
||||
- Coordinate microservices
|
||||
|
||||
### Amazon SNS/SQS MCP Server
|
||||
**Purpose**: Event-driven messaging and queue management
|
||||
- Publish messages to SNS topics
|
||||
- Send/receive messages from SQS queues
|
||||
- Manage event-driven communication
|
||||
- Implement pub/sub patterns
|
||||
- Handle asynchronous processing
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
Use this skill when:
|
||||
- Building serverless applications with Lambda
|
||||
- Designing event-driven architectures
|
||||
- Implementing microservices patterns
|
||||
- Creating asynchronous processing workflows
|
||||
- Orchestrating multi-service transactions
|
||||
- Building real-time data processing pipelines
|
||||
- Implementing saga patterns for distributed transactions
|
||||
- Designing for scale and resilience
|
||||
|
||||
## AWS Well-Architected Serverless Design Principles
|
||||
|
||||
### 1. Speedy, Simple, Singular
|
||||
|
||||
**Functions should be concise and single-purpose**
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Single purpose, focused function
|
||||
export const processOrder = async (event: OrderEvent) => {
|
||||
// Only handles order processing
|
||||
const order = await validateOrder(event);
|
||||
await saveOrder(order);
|
||||
await publishOrderCreatedEvent(order);
|
||||
return { statusCode: 200, body: JSON.stringify({ orderId: order.id }) };
|
||||
};
|
||||
|
||||
// ❌ BAD - Function does too much
|
||||
export const handleEverything = async (event: any) => {
|
||||
// Handles orders, inventory, payments, shipping...
|
||||
// Too many responsibilities
|
||||
};
|
||||
```
|
||||
|
||||
**Keep functions environmentally efficient and cost-aware**:
|
||||
- Minimize cold start times
|
||||
- Optimize memory allocation
|
||||
- Use provisioned concurrency only when needed
|
||||
- Leverage connection reuse
|
||||
|
||||
### 2. Think Concurrent Requests, Not Total Requests
|
||||
|
||||
**Design for concurrency, not volume**
|
||||
|
||||
Lambda scales horizontally - design considerations should focus on:
|
||||
- Concurrent execution limits
|
||||
- Downstream service throttling
|
||||
- Shared resource contention
|
||||
- Connection pool sizing
|
||||
|
||||
```typescript
|
||||
// Consider concurrent Lambda executions accessing DynamoDB
|
||||
const table = new dynamodb.Table(this, 'Table', {
|
||||
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, // Auto-scales with load
|
||||
});
|
||||
|
||||
// Or with provisioned capacity + auto-scaling
|
||||
const table = new dynamodb.Table(this, 'Table', {
|
||||
billingMode: dynamodb.BillingMode.PROVISIONED,
|
||||
readCapacity: 5,
|
||||
writeCapacity: 5,
|
||||
});
|
||||
|
||||
// Enable auto-scaling for concurrent load
|
||||
table.autoScaleReadCapacity({ minCapacity: 5, maxCapacity: 100 });
|
||||
table.autoScaleWriteCapacity({ minCapacity: 5, maxCapacity: 100 });
|
||||
```
|
||||
|
||||
### 3. Share Nothing
|
||||
|
||||
**Function runtime environments are short-lived**
|
||||
|
||||
```typescript
|
||||
// ❌ BAD - Relying on local file system
|
||||
export const handler = async (event: any) => {
|
||||
fs.writeFileSync('/tmp/data.json', JSON.stringify(data)); // Lost after execution
|
||||
};
|
||||
|
||||
// ✅ GOOD - Use persistent storage
|
||||
export const handler = async (event: any) => {
|
||||
await s3.putObject({
|
||||
Bucket: process.env.BUCKET_NAME,
|
||||
Key: 'data.json',
|
||||
Body: JSON.stringify(data),
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
**State management**:
|
||||
- Use DynamoDB for persistent state
|
||||
- Use Step Functions for workflow state
|
||||
- Use ElastiCache for session state
|
||||
- Use S3 for file storage
|
||||
|
||||
### 4. Assume No Hardware Affinity
|
||||
|
||||
**Applications must be hardware-agnostic**
|
||||
|
||||
Infrastructure can change without notice:
|
||||
- Lambda functions can run on different hardware
|
||||
- Container instances can be replaced
|
||||
- No assumption about underlying infrastructure
|
||||
|
||||
**Design for portability**:
|
||||
- Use environment variables for configuration
|
||||
- Avoid hardware-specific optimizations
|
||||
- Test across different environments
|
||||
|
||||
### 5. Orchestrate with State Machines, Not Function Chaining
|
||||
|
||||
**Use Step Functions for orchestration**
|
||||
|
||||
```typescript
|
||||
// ❌ BAD - Lambda function chaining
|
||||
export const handler1 = async (event: any) => {
|
||||
const result = await processStep1(event);
|
||||
await lambda.invoke({
|
||||
FunctionName: 'handler2',
|
||||
Payload: JSON.stringify(result),
|
||||
});
|
||||
};
|
||||
|
||||
// ✅ GOOD - Step Functions orchestration
|
||||
const stateMachine = new stepfunctions.StateMachine(this, 'OrderWorkflow', {
|
||||
definition: stepfunctions.Chain
|
||||
.start(validateOrder)
|
||||
.next(processPayment)
|
||||
.next(shipOrder)
|
||||
.next(sendConfirmation),
|
||||
});
|
||||
```
|
||||
|
||||
**Benefits of Step Functions**:
|
||||
- Visual workflow representation
|
||||
- Built-in error handling and retries
|
||||
- Execution history and debugging
|
||||
- Parallel and sequential execution
|
||||
- Service integrations without code
|
||||
|
||||
### 6. Use Events to Trigger Transactions
|
||||
|
||||
**Event-driven over synchronous request/response**
|
||||
|
||||
```typescript
|
||||
// Pattern: Event-driven processing
|
||||
const bucket = new s3.Bucket(this, 'DataBucket');
|
||||
|
||||
bucket.addEventNotification(
|
||||
s3.EventType.OBJECT_CREATED,
|
||||
new s3n.LambdaDestination(processFunction),
|
||||
{ prefix: 'uploads/' }
|
||||
);
|
||||
|
||||
// Pattern: EventBridge integration
|
||||
const rule = new events.Rule(this, 'OrderRule', {
|
||||
eventPattern: {
|
||||
source: ['orders'],
|
||||
detailType: ['OrderPlaced'],
|
||||
},
|
||||
});
|
||||
|
||||
rule.addTarget(new targets.LambdaFunction(processOrderFunction));
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Loose coupling between services
|
||||
- Asynchronous processing
|
||||
- Better fault tolerance
|
||||
- Independent scaling
|
||||
|
||||
### 7. Design for Failures and Duplicates
|
||||
|
||||
**Operations must be idempotent**
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Idempotent operation
|
||||
export const handler = async (event: SQSEvent) => {
|
||||
for (const record of event.Records) {
|
||||
const orderId = JSON.parse(record.body).orderId;
|
||||
|
||||
// Check if already processed (idempotency)
|
||||
const existing = await dynamodb.getItem({
|
||||
TableName: process.env.TABLE_NAME,
|
||||
Key: { orderId },
|
||||
});
|
||||
|
||||
if (existing.Item) {
|
||||
console.log('Order already processed:', orderId);
|
||||
continue; // Skip duplicate
|
||||
}
|
||||
|
||||
// Process order
|
||||
await processOrder(orderId);
|
||||
|
||||
// Mark as processed
|
||||
await dynamodb.putItem({
|
||||
TableName: process.env.TABLE_NAME,
|
||||
Item: { orderId, processedAt: Date.now() },
|
||||
});
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Implement retry logic with exponential backoff**:
|
||||
```typescript
|
||||
async function withRetry<T>(fn: () => Promise<T>, maxRetries = 3): Promise<T> {
|
||||
for (let i = 0; i < maxRetries; i++) {
|
||||
try {
|
||||
return await fn();
|
||||
} catch (error) {
|
||||
if (i === maxRetries - 1) throw error;
|
||||
await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
|
||||
}
|
||||
}
|
||||
throw new Error('Max retries exceeded');
|
||||
}
|
||||
```
|
||||
|
||||
## Event-Driven Architecture Patterns
|
||||
|
||||
### Pattern 1: Event Router (EventBridge)
|
||||
|
||||
Use EventBridge for event routing and filtering:
|
||||
|
||||
```typescript
|
||||
// Create custom event bus
|
||||
const eventBus = new events.EventBus(this, 'AppEventBus', {
|
||||
eventBusName: 'application-events',
|
||||
});
|
||||
|
||||
// Define event schema
|
||||
const schema = new events.Schema(this, 'OrderSchema', {
|
||||
schemaName: 'OrderPlaced',
|
||||
definition: events.SchemaDefinition.fromInline({
|
||||
openapi: '3.0.0',
|
||||
info: { version: '1.0.0', title: 'Order Events' },
|
||||
paths: {},
|
||||
components: {
|
||||
schemas: {
|
||||
OrderPlaced: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
orderId: { type: 'string' },
|
||||
customerId: { type: 'string' },
|
||||
amount: { type: 'number' },
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
});
|
||||
|
||||
// Create rules for different consumers
|
||||
new events.Rule(this, 'ProcessOrderRule', {
|
||||
eventBus,
|
||||
eventPattern: {
|
||||
source: ['orders'],
|
||||
detailType: ['OrderPlaced'],
|
||||
},
|
||||
targets: [new targets.LambdaFunction(processOrderFunction)],
|
||||
});
|
||||
|
||||
new events.Rule(this, 'NotifyCustomerRule', {
|
||||
eventBus,
|
||||
eventPattern: {
|
||||
source: ['orders'],
|
||||
detailType: ['OrderPlaced'],
|
||||
},
|
||||
targets: [new targets.LambdaFunction(notifyCustomerFunction)],
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 2: Queue-Based Processing (SQS)
|
||||
|
||||
Use SQS for reliable asynchronous processing:
|
||||
|
||||
```typescript
|
||||
// Standard queue for at-least-once delivery
|
||||
const queue = new sqs.Queue(this, 'ProcessingQueue', {
|
||||
visibilityTimeout: Duration.seconds(300),
|
||||
retentionPeriod: Duration.days(14),
|
||||
deadLetterQueue: {
|
||||
queue: dlq,
|
||||
maxReceiveCount: 3,
|
||||
},
|
||||
});
|
||||
|
||||
// FIFO queue for ordered processing
|
||||
const fifoQueue = new sqs.Queue(this, 'OrderedQueue', {
|
||||
fifo: true,
|
||||
contentBasedDeduplication: true,
|
||||
deduplicationScope: sqs.DeduplicationScope.MESSAGE_GROUP,
|
||||
});
|
||||
|
||||
// Lambda consumer
|
||||
new lambda.EventSourceMapping(this, 'QueueConsumer', {
|
||||
target: processingFunction,
|
||||
eventSourceArn: queue.queueArn,
|
||||
batchSize: 10,
|
||||
maxBatchingWindow: Duration.seconds(5),
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 3: Pub/Sub (SNS + SQS Fan-Out)
|
||||
|
||||
Implement fan-out pattern for multiple consumers:
|
||||
|
||||
```typescript
|
||||
// Create SNS topic
|
||||
const topic = new sns.Topic(this, 'OrderTopic', {
|
||||
displayName: 'Order Events',
|
||||
});
|
||||
|
||||
// Multiple SQS queues subscribe to topic
|
||||
const inventoryQueue = new sqs.Queue(this, 'InventoryQueue');
|
||||
const shippingQueue = new sqs.Queue(this, 'ShippingQueue');
|
||||
const analyticsQueue = new sqs.Queue(this, 'AnalyticsQueue');
|
||||
|
||||
topic.addSubscription(new subscriptions.SqsSubscription(inventoryQueue));
|
||||
topic.addSubscription(new subscriptions.SqsSubscription(shippingQueue));
|
||||
topic.addSubscription(new subscriptions.SqsSubscription(analyticsQueue));
|
||||
|
||||
// Each queue has its own Lambda consumer
|
||||
new lambda.EventSourceMapping(this, 'InventoryConsumer', {
|
||||
target: inventoryFunction,
|
||||
eventSourceArn: inventoryQueue.queueArn,
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 4: Saga Pattern with Step Functions
|
||||
|
||||
Implement distributed transactions:
|
||||
|
||||
```typescript
|
||||
const reserveFlight = new tasks.LambdaInvoke(this, 'ReserveFlight', {
|
||||
lambdaFunction: reserveFlightFunction,
|
||||
outputPath: '$.Payload',
|
||||
});
|
||||
|
||||
const reserveHotel = new tasks.LambdaInvoke(this, 'ReserveHotel', {
|
||||
lambdaFunction: reserveHotelFunction,
|
||||
outputPath: '$.Payload',
|
||||
});
|
||||
|
||||
const processPayment = new tasks.LambdaInvoke(this, 'ProcessPayment', {
|
||||
lambdaFunction: processPaymentFunction,
|
||||
outputPath: '$.Payload',
|
||||
});
|
||||
|
||||
// Compensating transactions
|
||||
const cancelFlight = new tasks.LambdaInvoke(this, 'CancelFlight', {
|
||||
lambdaFunction: cancelFlightFunction,
|
||||
});
|
||||
|
||||
const cancelHotel = new tasks.LambdaInvoke(this, 'CancelHotel', {
|
||||
lambdaFunction: cancelHotelFunction,
|
||||
});
|
||||
|
||||
// Define saga with compensation
|
||||
const definition = reserveFlight
|
||||
.next(reserveHotel)
|
||||
.next(processPayment)
|
||||
.addCatch(cancelHotel.next(cancelFlight), {
|
||||
resultPath: '$.error',
|
||||
});
|
||||
|
||||
new stepfunctions.StateMachine(this, 'BookingStateMachine', {
|
||||
definition,
|
||||
timeout: Duration.minutes(5),
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 5: Event Sourcing
|
||||
|
||||
Store events as source of truth:
|
||||
|
||||
```typescript
|
||||
// Event store with DynamoDB
|
||||
const eventStore = new dynamodb.Table(this, 'EventStore', {
|
||||
partitionKey: { name: 'aggregateId', type: dynamodb.AttributeType.STRING },
|
||||
sortKey: { name: 'version', type: dynamodb.AttributeType.NUMBER },
|
||||
stream: dynamodb.StreamViewType.NEW_IMAGE,
|
||||
});
|
||||
|
||||
// Lambda function stores events
|
||||
export const handleCommand = async (event: any) => {
|
||||
const { aggregateId, eventType, eventData } = event;
|
||||
|
||||
// Get current version
|
||||
const items = await dynamodb.query({
|
||||
TableName: process.env.EVENT_STORE,
|
||||
KeyConditionExpression: 'aggregateId = :id',
|
||||
ExpressionAttributeValues: { ':id': aggregateId },
|
||||
ScanIndexForward: false,
|
||||
Limit: 1,
|
||||
});
|
||||
|
||||
const nextVersion = items.Items?.[0]?.version + 1 || 1;
|
||||
|
||||
// Append new event
|
||||
await dynamodb.putItem({
|
||||
TableName: process.env.EVENT_STORE,
|
||||
Item: {
|
||||
aggregateId,
|
||||
version: nextVersion,
|
||||
eventType,
|
||||
eventData,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
// Projections read from event stream
|
||||
eventStore.grantStreamRead(projectionFunction);
|
||||
```
|
||||
|
||||
## Serverless Architecture Patterns
|
||||
|
||||
### Pattern 1: API-Driven Microservices
|
||||
|
||||
REST APIs with Lambda backend:
|
||||
|
||||
```typescript
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
restApiName: 'microservices-api',
|
||||
deployOptions: {
|
||||
throttlingRateLimit: 1000,
|
||||
throttlingBurstLimit: 2000,
|
||||
tracingEnabled: true,
|
||||
},
|
||||
});
|
||||
|
||||
// User service
|
||||
const users = api.root.addResource('users');
|
||||
users.addMethod('GET', new apigateway.LambdaIntegration(getUsersFunction));
|
||||
users.addMethod('POST', new apigateway.LambdaIntegration(createUserFunction));
|
||||
|
||||
// Order service
|
||||
const orders = api.root.addResource('orders');
|
||||
orders.addMethod('GET', new apigateway.LambdaIntegration(getOrdersFunction));
|
||||
orders.addMethod('POST', new apigateway.LambdaIntegration(createOrderFunction));
|
||||
```
|
||||
|
||||
### Pattern 2: Stream Processing
|
||||
|
||||
Real-time data processing with Kinesis:
|
||||
|
||||
```typescript
|
||||
const stream = new kinesis.Stream(this, 'DataStream', {
|
||||
shardCount: 2,
|
||||
retentionPeriod: Duration.days(7),
|
||||
});
|
||||
|
||||
// Lambda processes stream records
|
||||
new lambda.EventSourceMapping(this, 'StreamProcessor', {
|
||||
target: processFunction,
|
||||
eventSourceArn: stream.streamArn,
|
||||
batchSize: 100,
|
||||
maxBatchingWindow: Duration.seconds(5),
|
||||
parallelizationFactor: 10,
|
||||
startingPosition: lambda.StartingPosition.LATEST,
|
||||
retryAttempts: 3,
|
||||
bisectBatchOnError: true,
|
||||
onFailure: new lambdaDestinations.SqsDestination(dlq),
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 3: Async Task Processing
|
||||
|
||||
Background job processing:
|
||||
|
||||
```typescript
|
||||
// SQS queue for tasks
|
||||
const taskQueue = new sqs.Queue(this, 'TaskQueue', {
|
||||
visibilityTimeout: Duration.minutes(5),
|
||||
receiveMessageWaitTime: Duration.seconds(20), // Long polling
|
||||
deadLetterQueue: {
|
||||
queue: dlq,
|
||||
maxReceiveCount: 3,
|
||||
},
|
||||
});
|
||||
|
||||
// Lambda worker processes tasks
|
||||
const worker = new lambda.Function(this, 'TaskWorker', {
|
||||
// ... configuration
|
||||
reservedConcurrentExecutions: 10, // Control concurrency
|
||||
});
|
||||
|
||||
new lambda.EventSourceMapping(this, 'TaskConsumer', {
|
||||
target: worker,
|
||||
eventSourceArn: taskQueue.queueArn,
|
||||
batchSize: 10,
|
||||
reportBatchItemFailures: true, // Partial batch failure handling
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 4: Scheduled Jobs
|
||||
|
||||
Periodic processing with EventBridge:
|
||||
|
||||
```typescript
|
||||
// Daily cleanup job
|
||||
new events.Rule(this, 'DailyCleanup', {
|
||||
schedule: events.Schedule.cron({ hour: '2', minute: '0' }),
|
||||
targets: [new targets.LambdaFunction(cleanupFunction)],
|
||||
});
|
||||
|
||||
// Process every 5 minutes
|
||||
new events.Rule(this, 'FrequentProcessing', {
|
||||
schedule: events.Schedule.rate(Duration.minutes(5)),
|
||||
targets: [new targets.LambdaFunction(processFunction)],
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern 5: Webhook Processing
|
||||
|
||||
Handle external webhooks:
|
||||
|
||||
```typescript
|
||||
// API Gateway endpoint for webhooks
|
||||
const webhookApi = new apigateway.RestApi(this, 'WebhookApi', {
|
||||
restApiName: 'webhooks',
|
||||
});
|
||||
|
||||
const webhook = webhookApi.root.addResource('webhook');
|
||||
webhook.addMethod('POST', new apigateway.LambdaIntegration(webhookFunction, {
|
||||
proxy: true,
|
||||
timeout: Duration.seconds(29), // API Gateway max
|
||||
}));
|
||||
|
||||
// Lambda handler validates and queues webhook
|
||||
export const handler = async (event: APIGatewayProxyEvent) => {
|
||||
// Validate webhook signature
|
||||
const isValid = validateSignature(event.headers, event.body);
|
||||
if (!isValid) {
|
||||
return { statusCode: 401, body: 'Invalid signature' };
|
||||
}
|
||||
|
||||
// Queue for async processing
|
||||
await sqs.sendMessage({
|
||||
QueueUrl: process.env.QUEUE_URL,
|
||||
MessageBody: event.body,
|
||||
});
|
||||
|
||||
// Return immediately
|
||||
return { statusCode: 202, body: 'Accepted' };
|
||||
};
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Error Handling
|
||||
|
||||
**Implement comprehensive error handling**:
|
||||
|
||||
```typescript
|
||||
export const handler = async (event: SQSEvent) => {
|
||||
const failures: SQSBatchItemFailure[] = [];
|
||||
|
||||
for (const record of event.Records) {
|
||||
try {
|
||||
await processRecord(record);
|
||||
} catch (error) {
|
||||
console.error('Failed to process record:', record.messageId, error);
|
||||
failures.push({ itemIdentifier: record.messageId });
|
||||
}
|
||||
}
|
||||
|
||||
// Return partial batch failures for retry
|
||||
return { batchItemFailures: failures };
|
||||
};
|
||||
```
|
||||
|
||||
### Dead Letter Queues
|
||||
|
||||
**Always configure DLQs for error handling**:
|
||||
|
||||
```typescript
|
||||
const dlq = new sqs.Queue(this, 'DLQ', {
|
||||
retentionPeriod: Duration.days(14),
|
||||
});
|
||||
|
||||
const queue = new sqs.Queue(this, 'Queue', {
|
||||
deadLetterQueue: {
|
||||
queue: dlq,
|
||||
maxReceiveCount: 3,
|
||||
},
|
||||
});
|
||||
|
||||
// Monitor DLQ depth
|
||||
new cloudwatch.Alarm(this, 'DLQAlarm', {
|
||||
metric: dlq.metricApproximateNumberOfMessagesVisible(),
|
||||
threshold: 1,
|
||||
evaluationPeriods: 1,
|
||||
alarmDescription: 'Messages in DLQ require attention',
|
||||
});
|
||||
```
|
||||
|
||||
### Observability
|
||||
|
||||
**Enable tracing and monitoring**:
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
tracing: lambda.Tracing.ACTIVE, // X-Ray tracing
|
||||
environment: {
|
||||
POWERTOOLS_SERVICE_NAME: 'order-service',
|
||||
POWERTOOLS_METRICS_NAMESPACE: 'MyApp',
|
||||
LOG_LEVEL: 'INFO',
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## Using MCP Servers Effectively
|
||||
|
||||
### AWS Serverless MCP Usage
|
||||
|
||||
**Lifecycle management**:
|
||||
- Initialize new serverless projects
|
||||
- Generate SAM templates
|
||||
- Deploy applications
|
||||
- Test locally before deployment
|
||||
|
||||
### Lambda Tool MCP Usage
|
||||
|
||||
**Function execution**:
|
||||
- Test Lambda functions directly
|
||||
- Execute automation workflows
|
||||
- Access private resources
|
||||
- Validate integrations
|
||||
|
||||
### Step Functions MCP Usage
|
||||
|
||||
**Workflow orchestration**:
|
||||
- Create state machines for complex workflows
|
||||
- Execute distributed transactions
|
||||
- Implement saga patterns
|
||||
- Coordinate microservices
|
||||
|
||||
### SNS/SQS MCP Usage
|
||||
|
||||
**Messaging operations**:
|
||||
- Test pub/sub patterns
|
||||
- Send test messages to queues
|
||||
- Validate event routing
|
||||
- Debug message processing
|
||||
|
||||
## Additional Resources
|
||||
|
||||
This skill includes comprehensive reference documentation based on AWS best practices:
|
||||
|
||||
- **Serverless Patterns**: `references/serverless-patterns.md`
|
||||
- Core serverless architectures and API patterns
|
||||
- Data processing and integration patterns
|
||||
- Orchestration with Step Functions
|
||||
- Anti-patterns to avoid
|
||||
|
||||
- **Event-Driven Architecture Patterns**: `references/eda-patterns.md`
|
||||
- Event routing and processing patterns
|
||||
- Event sourcing and saga patterns
|
||||
- Idempotency and error handling
|
||||
- Message ordering and deduplication
|
||||
|
||||
- **Security Best Practices**: `references/security-best-practices.md`
|
||||
- Shared responsibility model
|
||||
- IAM least privilege patterns
|
||||
- Data protection and encryption
|
||||
- Network security with VPC
|
||||
|
||||
- **Observability Best Practices**: `references/observability-best-practices.md`
|
||||
- Three pillars: metrics, logs, traces
|
||||
- Structured logging with Lambda Powertools
|
||||
- X-Ray distributed tracing
|
||||
- CloudWatch alarms and dashboards
|
||||
|
||||
- **Performance Optimization**: `references/performance-optimization.md`
|
||||
- Cold start optimization techniques
|
||||
- Memory and CPU optimization
|
||||
- Package size reduction
|
||||
- Provisioned concurrency patterns
|
||||
|
||||
- **Deployment Best Practices**: `references/deployment-best-practices.md`
|
||||
- CI/CD pipeline design
|
||||
- Testing strategies (unit, integration, load)
|
||||
- Deployment strategies (canary, blue/green)
|
||||
- Rollback and safety mechanisms
|
||||
|
||||
**External Resources**:
|
||||
- **AWS Well-Architected Serverless Lens**: https://docs.aws.amazon.com/wellarchitected/latest/serverless-applications-lens/
|
||||
- **ServerlessLand.com**: Pre-built serverless patterns
|
||||
- **AWS Serverless Workshops**: https://serverlessland.com/learn?type=Workshops
|
||||
|
||||
For detailed implementation patterns, anti-patterns, and code examples, refer to the comprehensive references in the skill directory.
|
||||
@@ -0,0 +1,830 @@
|
||||
# Serverless Deployment Best Practices
|
||||
|
||||
Deployment best practices for serverless applications including CI/CD, testing, and deployment strategies.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Software Release Process](#software-release-process)
|
||||
- [Infrastructure as Code](#infrastructure-as-code)
|
||||
- [CI/CD Pipeline Design](#cicd-pipeline-design)
|
||||
- [Testing Strategies](#testing-strategies)
|
||||
- [Deployment Strategies](#deployment-strategies)
|
||||
- [Rollback and Safety](#rollback-and-safety)
|
||||
|
||||
## Software Release Process
|
||||
|
||||
### Four Stages of Release
|
||||
|
||||
**1. Source Phase**:
|
||||
- Developers commit code changes
|
||||
- Code review (peer review)
|
||||
- Version control (Git)
|
||||
|
||||
**2. Build Phase**:
|
||||
- Compile code
|
||||
- Run unit tests
|
||||
- Style checking and linting
|
||||
- Create deployment packages
|
||||
- Build container images
|
||||
|
||||
**3. Test Phase**:
|
||||
- Integration tests with other systems
|
||||
- Load testing
|
||||
- UI testing
|
||||
- Security testing (penetration testing)
|
||||
- Acceptance testing
|
||||
|
||||
**4. Production Phase**:
|
||||
- Deploy to production environment
|
||||
- Monitor for errors
|
||||
- Validate deployment success
|
||||
- Rollback if needed
|
||||
|
||||
### CI/CD Maturity Levels
|
||||
|
||||
**Continuous Integration (CI)**:
|
||||
- Automated build on code commit
|
||||
- Automated unit testing
|
||||
- Manual deployment to test/production
|
||||
|
||||
**Continuous Delivery (CD)**:
|
||||
- Automated deployment to test environments
|
||||
- Manual approval for production
|
||||
- Automated testing in non-prod
|
||||
|
||||
**Continuous Deployment**:
|
||||
- Fully automated pipeline
|
||||
- Automated deployment to production
|
||||
- No manual intervention after code commit
|
||||
|
||||
## Infrastructure as Code
|
||||
|
||||
### Framework Selection
|
||||
|
||||
**AWS SAM (Serverless Application Model)**:
|
||||
|
||||
```yaml
|
||||
# template.yaml
|
||||
AWSTemplateFormatVersion: '2010-09-09'
|
||||
Transform: AWS::Serverless-2016-10-31
|
||||
|
||||
Resources:
|
||||
OrderFunction:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
Handler: app.handler
|
||||
Runtime: nodejs20.x
|
||||
CodeUri: src/
|
||||
Events:
|
||||
Api:
|
||||
Type: Api
|
||||
Properties:
|
||||
Path: /orders
|
||||
Method: post
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Simple, serverless-focused syntax
|
||||
- Built-in best practices
|
||||
- SAM CLI for local testing
|
||||
- Integrates with CodeDeploy
|
||||
|
||||
**AWS CDK**:
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'OrderFunction', {
|
||||
entry: 'src/orders/handler.ts',
|
||||
environment: {
|
||||
TABLE_NAME: ordersTable.tableName,
|
||||
},
|
||||
});
|
||||
|
||||
ordersTable.grantReadWriteData(orderFunction);
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Type-safe, programmatic
|
||||
- Reusable constructs
|
||||
- Rich AWS service support
|
||||
- Better for complex infrastructure
|
||||
|
||||
**When to use**:
|
||||
- **SAM**: Serverless-only applications, simpler projects
|
||||
- **CDK**: Complex infrastructure, multiple services, reusable patterns
|
||||
|
||||
### Environment Management
|
||||
|
||||
**Separate environments**:
|
||||
|
||||
```typescript
|
||||
// CDK App
|
||||
const app = new cdk.App();
|
||||
|
||||
new ServerlessStack(app, 'DevStack', {
|
||||
env: { account: '111111111111', region: 'us-east-1' },
|
||||
environment: 'dev',
|
||||
logLevel: 'DEBUG',
|
||||
});
|
||||
|
||||
new ServerlessStack(app, 'ProdStack', {
|
||||
env: { account: '222222222222', region: 'us-east-1' },
|
||||
environment: 'prod',
|
||||
logLevel: 'INFO',
|
||||
});
|
||||
```
|
||||
|
||||
**SAM with parameters**:
|
||||
|
||||
```yaml
|
||||
Parameters:
|
||||
Environment:
|
||||
Type: String
|
||||
Default: dev
|
||||
AllowedValues:
|
||||
- dev
|
||||
- staging
|
||||
- prod
|
||||
|
||||
Resources:
|
||||
Function:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
Environment:
|
||||
Variables:
|
||||
ENVIRONMENT: !Ref Environment
|
||||
LOG_LEVEL: !If [IsProd, INFO, DEBUG]
|
||||
```
|
||||
|
||||
## CI/CD Pipeline Design
|
||||
|
||||
### AWS CodePipeline
|
||||
|
||||
**Comprehensive pipeline**:
|
||||
|
||||
```typescript
|
||||
import * as codepipeline from 'aws-cdk-lib/aws-codepipeline';
|
||||
import * as codepipeline_actions from 'aws-cdk-lib/aws-codepipeline-actions';
|
||||
|
||||
const sourceOutput = new codepipeline.Artifact();
|
||||
const buildOutput = new codepipeline.Artifact();
|
||||
|
||||
const pipeline = new codepipeline.Pipeline(this, 'Pipeline', {
|
||||
pipelineName: 'serverless-pipeline',
|
||||
});
|
||||
|
||||
// Source stage
|
||||
pipeline.addStage({
|
||||
stageName: 'Source',
|
||||
actions: [
|
||||
new codepipeline_actions.CodeStarConnectionsSourceAction({
|
||||
actionName: 'GitHub_Source',
|
||||
owner: 'myorg',
|
||||
repo: 'myrepo',
|
||||
branch: 'main',
|
||||
output: sourceOutput,
|
||||
connectionArn: githubConnection.connectionArn,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
// Build stage
|
||||
pipeline.addStage({
|
||||
stageName: 'Build',
|
||||
actions: [
|
||||
new codepipeline_actions.CodeBuildAction({
|
||||
actionName: 'Build',
|
||||
project: buildProject,
|
||||
input: sourceOutput,
|
||||
outputs: [buildOutput],
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
// Test stage
|
||||
pipeline.addStage({
|
||||
stageName: 'Test',
|
||||
actions: [
|
||||
new codepipeline_actions.CloudFormationCreateUpdateStackAction({
|
||||
actionName: 'Deploy_Test',
|
||||
templatePath: buildOutput.atPath('packaged.yaml'),
|
||||
stackName: 'test-stack',
|
||||
adminPermissions: true,
|
||||
}),
|
||||
new codepipeline_actions.CodeBuildAction({
|
||||
actionName: 'Integration_Tests',
|
||||
project: testProject,
|
||||
input: buildOutput,
|
||||
runOrder: 2,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
// Production stage (with manual approval)
|
||||
pipeline.addStage({
|
||||
stageName: 'Production',
|
||||
actions: [
|
||||
new codepipeline_actions.ManualApprovalAction({
|
||||
actionName: 'Approve',
|
||||
}),
|
||||
new codepipeline_actions.CloudFormationCreateUpdateStackAction({
|
||||
actionName: 'Deploy_Prod',
|
||||
templatePath: buildOutput.atPath('packaged.yaml'),
|
||||
stackName: 'prod-stack',
|
||||
adminPermissions: true,
|
||||
runOrder: 2,
|
||||
}),
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### GitHub Actions
|
||||
|
||||
**Serverless deployment workflow**:
|
||||
|
||||
```yaml
|
||||
# .github/workflows/deploy.yml
|
||||
name: Deploy Serverless Application
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
|
||||
jobs:
|
||||
build-and-deploy:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
node-version: '20'
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Run tests
|
||||
run: npm test
|
||||
|
||||
- name: Setup SAM CLI
|
||||
uses: aws-actions/setup-sam@v2
|
||||
|
||||
- name: Build SAM application
|
||||
run: sam build
|
||||
|
||||
- name: Deploy to Dev
|
||||
if: github.ref != 'refs/heads/main'
|
||||
run: |
|
||||
sam deploy \
|
||||
--no-confirm-changeset \
|
||||
--no-fail-on-empty-changeset \
|
||||
--stack-name dev-stack \
|
||||
--parameter-overrides Environment=dev
|
||||
|
||||
- name: Run integration tests
|
||||
run: npm run test:integration
|
||||
|
||||
- name: Deploy to Prod
|
||||
if: github.ref == 'refs/heads/main'
|
||||
run: |
|
||||
sam deploy \
|
||||
--no-confirm-changeset \
|
||||
--no-fail-on-empty-changeset \
|
||||
--stack-name prod-stack \
|
||||
--parameter-overrides Environment=prod
|
||||
```
|
||||
|
||||
## Testing Strategies
|
||||
|
||||
### Unit Testing
|
||||
|
||||
**Test business logic independently**:
|
||||
|
||||
```typescript
|
||||
// handler.ts
|
||||
export const processOrder = (order: Order): ProcessedOrder => {
|
||||
// Pure business logic (easily testable)
|
||||
validateOrder(order);
|
||||
calculateTotal(order);
|
||||
return transformOrder(order);
|
||||
};
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const order = parseEvent(event);
|
||||
const processed = processOrder(order); // Testable function
|
||||
await saveToDatabase(processed);
|
||||
return formatResponse(processed);
|
||||
};
|
||||
|
||||
// handler.test.ts
|
||||
import { processOrder } from './handler';
|
||||
|
||||
describe('processOrder', () => {
|
||||
it('calculates total correctly', () => {
|
||||
const order = {
|
||||
items: [
|
||||
{ price: 10, quantity: 2 },
|
||||
{ price: 5, quantity: 3 },
|
||||
],
|
||||
};
|
||||
|
||||
const result = processOrder(order);
|
||||
|
||||
expect(result.total).toBe(35);
|
||||
});
|
||||
|
||||
it('throws on invalid order', () => {
|
||||
const invalid = { items: [] };
|
||||
expect(() => processOrder(invalid)).toThrow();
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Integration Testing
|
||||
|
||||
**Test in actual AWS environment**:
|
||||
|
||||
```typescript
|
||||
// integration.test.ts
|
||||
import { LambdaClient, InvokeCommand } from '@aws-sdk/client-lambda';
|
||||
import { DynamoDBClient, GetItemCommand } from '@aws-sdk/client-dynamodb';
|
||||
|
||||
describe('Order Processing Integration', () => {
|
||||
const lambda = new LambdaClient({});
|
||||
const dynamodb = new DynamoDBClient({});
|
||||
|
||||
it('processes order end-to-end', async () => {
|
||||
// Invoke Lambda
|
||||
const response = await lambda.send(new InvokeCommand({
|
||||
FunctionName: process.env.FUNCTION_NAME,
|
||||
Payload: JSON.stringify({
|
||||
orderId: 'test-123',
|
||||
items: [{ productId: 'prod-1', quantity: 2 }],
|
||||
}),
|
||||
}));
|
||||
|
||||
const result = JSON.parse(Buffer.from(response.Payload!).toString());
|
||||
|
||||
expect(result.statusCode).toBe(200);
|
||||
|
||||
// Verify database write
|
||||
const dbResult = await dynamodb.send(new GetItemCommand({
|
||||
TableName: process.env.TABLE_NAME,
|
||||
Key: { orderId: { S: 'test-123' } },
|
||||
}));
|
||||
|
||||
expect(dbResult.Item).toBeDefined();
|
||||
expect(dbResult.Item?.status.S).toBe('PROCESSED');
|
||||
});
|
||||
});
|
||||
```
|
||||
|
||||
### Local Testing with SAM
|
||||
|
||||
**Test locally before deployment**:
|
||||
|
||||
```bash
|
||||
# Start local API
|
||||
sam local start-api
|
||||
|
||||
# Invoke function locally
|
||||
sam local invoke OrderFunction -e events/create-order.json
|
||||
|
||||
# Generate sample events
|
||||
sam local generate-event apigateway aws-proxy > event.json
|
||||
|
||||
# Debug locally
|
||||
sam local invoke OrderFunction -d 5858
|
||||
|
||||
# Test with Docker
|
||||
sam local start-api --docker-network my-network
|
||||
```
|
||||
|
||||
### Load Testing
|
||||
|
||||
**Test under production load**:
|
||||
|
||||
```bash
|
||||
# Install Artillery
|
||||
npm install -g artillery
|
||||
|
||||
# Create load test
|
||||
cat > load-test.yml <<EOF
|
||||
config:
|
||||
target: https://api.example.com
|
||||
phases:
|
||||
- duration: 300 # 5 minutes
|
||||
arrivalRate: 50 # 50 requests/second
|
||||
rampTo: 200 # Ramp to 200 req/sec
|
||||
scenarios:
|
||||
- flow:
|
||||
- post:
|
||||
url: /orders
|
||||
json:
|
||||
orderId: "{{ $randomString() }}"
|
||||
EOF
|
||||
|
||||
# Run load test
|
||||
artillery run load-test.yml --output report.json
|
||||
|
||||
# Generate HTML report
|
||||
artillery report report.json
|
||||
```
|
||||
|
||||
## Deployment Strategies
|
||||
|
||||
### All-at-Once Deployment
|
||||
|
||||
**Simple, fast, risky**:
|
||||
|
||||
```yaml
|
||||
# SAM template
|
||||
Resources:
|
||||
OrderFunction:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
DeploymentPreference:
|
||||
Type: AllAtOnce # Deploy immediately
|
||||
```
|
||||
|
||||
**Use for**:
|
||||
- Development environments
|
||||
- Non-critical applications
|
||||
- Quick hotfixes (with caution)
|
||||
|
||||
### Blue/Green Deployment
|
||||
|
||||
**Zero-downtime deployment**:
|
||||
|
||||
```yaml
|
||||
Resources:
|
||||
OrderFunction:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
AutoPublishAlias: live
|
||||
DeploymentPreference:
|
||||
Type: Linear10PercentEvery1Minute
|
||||
Alarms:
|
||||
- !Ref ErrorAlarm
|
||||
- !Ref LatencyAlarm
|
||||
```
|
||||
|
||||
**Deployment types**:
|
||||
- **Linear10PercentEvery1Minute**: 10% traffic shift every minute
|
||||
- **Linear10PercentEvery2Minutes**: Slower, more conservative
|
||||
- **Linear10PercentEvery3Minutes**: Even slower
|
||||
- **Linear10PercentEvery10Minutes**: Very gradual
|
||||
- **Canary10Percent5Minutes**: 10% for 5 min, then 100%
|
||||
- **Canary10Percent10Minutes**: 10% for 10 min, then 100%
|
||||
- **Canary10Percent30Minutes**: 10% for 30 min, then 100%
|
||||
|
||||
### Canary Deployment
|
||||
|
||||
**Test with subset of traffic**:
|
||||
|
||||
```yaml
|
||||
Resources:
|
||||
OrderFunction:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
AutoPublishAlias: live
|
||||
DeploymentPreference:
|
||||
Type: Canary10Percent10Minutes
|
||||
Alarms:
|
||||
- !Ref ErrorAlarm
|
||||
- !Ref LatencyAlarm
|
||||
Hooks:
|
||||
PreTraffic: !Ref PreTrafficHook
|
||||
PostTraffic: !Ref PostTrafficHook
|
||||
|
||||
PreTrafficHook:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
Handler: hooks.pre_traffic
|
||||
Runtime: python3.12
|
||||
# Runs before traffic shift
|
||||
# Validates new version
|
||||
|
||||
PostTrafficHook:
|
||||
Type: AWS::Serverless::Function
|
||||
Properties:
|
||||
Handler: hooks.post_traffic
|
||||
Runtime: python3.12
|
||||
# Runs after traffic shift
|
||||
# Validates deployment success
|
||||
```
|
||||
|
||||
**CDK with CodeDeploy**:
|
||||
|
||||
```typescript
|
||||
import * as codedeploy from 'aws-cdk-lib/aws-codedeploy';
|
||||
|
||||
const alias = fn.currentVersion.addAlias('live');
|
||||
|
||||
new codedeploy.LambdaDeploymentGroup(this, 'DeploymentGroup', {
|
||||
alias,
|
||||
deploymentConfig: codedeploy.LambdaDeploymentConfig.CANARY_10PERCENT_10MINUTES,
|
||||
alarms: [errorAlarm, latencyAlarm],
|
||||
autoRollback: {
|
||||
failedDeployment: true,
|
||||
stoppedDeployment: true,
|
||||
deploymentInAlarm: true,
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Deployment Hooks
|
||||
|
||||
**Pre-traffic hook (validation)**:
|
||||
|
||||
```python
|
||||
# hooks.py
|
||||
import boto3
|
||||
|
||||
lambda_client = boto3.client('lambda')
|
||||
codedeploy = boto3.client('codedeploy')
|
||||
|
||||
def pre_traffic(event, context):
|
||||
"""
|
||||
Validate new version before traffic shift
|
||||
"""
|
||||
function_name = event['DeploymentId']
|
||||
version = event['NewVersion']
|
||||
|
||||
try:
|
||||
# Invoke new version with test payload
|
||||
response = lambda_client.invoke(
|
||||
FunctionName=f"{function_name}:{version}",
|
||||
InvocationType='RequestResponse',
|
||||
Payload=json.dumps({'test': True})
|
||||
)
|
||||
|
||||
# Validate response
|
||||
if response['StatusCode'] == 200:
|
||||
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||
deploymentId=event['DeploymentId'],
|
||||
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||
status='Succeeded'
|
||||
)
|
||||
else:
|
||||
raise Exception('Validation failed')
|
||||
|
||||
except Exception as e:
|
||||
print(f'Pre-traffic validation failed: {e}')
|
||||
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||
deploymentId=event['DeploymentId'],
|
||||
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||
status='Failed'
|
||||
)
|
||||
```
|
||||
|
||||
**Post-traffic hook (verification)**:
|
||||
|
||||
```python
|
||||
def post_traffic(event, context):
|
||||
"""
|
||||
Verify deployment success after traffic shift
|
||||
"""
|
||||
try:
|
||||
# Check CloudWatch metrics
|
||||
cloudwatch = boto3.client('cloudwatch')
|
||||
|
||||
metrics = cloudwatch.get_metric_statistics(
|
||||
Namespace='AWS/Lambda',
|
||||
MetricName='Errors',
|
||||
Dimensions=[{'Name': 'FunctionName', 'Value': function_name}],
|
||||
StartTime=deployment_start_time,
|
||||
EndTime=datetime.utcnow(),
|
||||
Period=300,
|
||||
Statistics=['Sum']
|
||||
)
|
||||
|
||||
# Validate no errors
|
||||
total_errors = sum(point['Sum'] for point in metrics['Datapoints'])
|
||||
|
||||
if total_errors == 0:
|
||||
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||
deploymentId=event['DeploymentId'],
|
||||
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||
status='Succeeded'
|
||||
)
|
||||
else:
|
||||
raise Exception(f'{total_errors} errors detected')
|
||||
|
||||
except Exception as e:
|
||||
print(f'Post-traffic verification failed: {e}')
|
||||
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||
deploymentId=event['DeploymentId'],
|
||||
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||
status='Failed'
|
||||
)
|
||||
```
|
||||
|
||||
## Rollback and Safety
|
||||
|
||||
### Automatic Rollback
|
||||
|
||||
**Configure rollback triggers**:
|
||||
|
||||
```yaml
|
||||
DeploymentPreference:
|
||||
Type: Canary10Percent10Minutes
|
||||
Alarms:
|
||||
- !Ref ErrorAlarm
|
||||
- !Ref LatencyAlarm
|
||||
# Automatically rolls back if alarms trigger
|
||||
```
|
||||
|
||||
**Rollback scenarios**:
|
||||
- CloudWatch alarm triggers during deployment
|
||||
- Pre-traffic hook fails
|
||||
- Post-traffic hook fails
|
||||
- Deployment manually stopped
|
||||
|
||||
### CloudWatch Alarms for Deployment
|
||||
|
||||
**Critical alarms during deployment**:
|
||||
|
||||
```typescript
|
||||
// Error rate alarm
|
||||
const errorAlarm = new cloudwatch.Alarm(this, 'ErrorAlarm', {
|
||||
metric: fn.metricErrors({
|
||||
statistic: 'Sum',
|
||||
period: Duration.minutes(1),
|
||||
}),
|
||||
threshold: 5,
|
||||
evaluationPeriods: 2,
|
||||
treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING,
|
||||
});
|
||||
|
||||
// Duration alarm (regression)
|
||||
const durationAlarm = new cloudwatch.Alarm(this, 'DurationAlarm', {
|
||||
metric: fn.metricDuration({
|
||||
statistic: 'Average',
|
||||
period: Duration.minutes(1),
|
||||
}),
|
||||
threshold: previousAvgDuration * 1.2, // 20% increase
|
||||
evaluationPeriods: 2,
|
||||
comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
|
||||
});
|
||||
|
||||
// Throttle alarm
|
||||
const throttleAlarm = new cloudwatch.Alarm(this, 'ThrottleAlarm', {
|
||||
metric: fn.metricThrottles({
|
||||
statistic: 'Sum',
|
||||
period: Duration.minutes(1),
|
||||
}),
|
||||
threshold: 1,
|
||||
evaluationPeriods: 1,
|
||||
});
|
||||
```
|
||||
|
||||
### Version Management
|
||||
|
||||
**Use Lambda versions and aliases**:
|
||||
|
||||
```typescript
|
||||
const version = fn.currentVersion;
|
||||
|
||||
const prodAlias = version.addAlias('prod');
|
||||
const devAlias = version.addAlias('dev');
|
||||
|
||||
// Gradual rollout with weighted aliases
|
||||
new lambda.Alias(this, 'LiveAlias', {
|
||||
aliasName: 'live',
|
||||
version: newVersion,
|
||||
additionalVersions: [
|
||||
{ version: oldVersion, weight: 0.9 }, // 90% old
|
||||
// 10% automatically goes to main version (new)
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
## Best Practices Checklist
|
||||
|
||||
### Pre-Deployment
|
||||
|
||||
- [ ] Code review completed
|
||||
- [ ] Unit tests passing
|
||||
- [ ] Integration tests passing
|
||||
- [ ] Security scan completed
|
||||
- [ ] Dependencies updated
|
||||
- [ ] Infrastructure validated (CDK synth, SAM validate)
|
||||
- [ ] Environment variables configured
|
||||
|
||||
### Deployment
|
||||
|
||||
- [ ] Use IaC (SAM, CDK, Terraform)
|
||||
- [ ] Separate environments (dev, staging, prod)
|
||||
- [ ] Automate deployments via CI/CD
|
||||
- [ ] Use gradual deployment (canary or linear)
|
||||
- [ ] Configure CloudWatch alarms
|
||||
- [ ] Enable automatic rollback
|
||||
- [ ] Use deployment hooks for validation
|
||||
|
||||
### Post-Deployment
|
||||
|
||||
- [ ] Monitor CloudWatch metrics
|
||||
- [ ] Check CloudWatch Logs for errors
|
||||
- [ ] Verify X-Ray traces
|
||||
- [ ] Validate business metrics
|
||||
- [ ] Check alarm status
|
||||
- [ ] Review deployment logs
|
||||
- [ ] Document any issues
|
||||
|
||||
### Rollback Preparation
|
||||
|
||||
- [ ] Keep previous version available
|
||||
- [ ] Document rollback procedure
|
||||
- [ ] Test rollback in non-prod
|
||||
- [ ] Configure automatic rollback
|
||||
- [ ] Monitor during rollback
|
||||
- [ ] Communication plan for rollback
|
||||
|
||||
## Deployment Patterns
|
||||
|
||||
### Multi-Region Deployment
|
||||
|
||||
**Active-Passive**:
|
||||
|
||||
```typescript
|
||||
// Primary region
|
||||
new ServerlessStack(app, 'PrimaryStack', {
|
||||
env: { region: 'us-east-1' },
|
||||
isPrimary: true,
|
||||
});
|
||||
|
||||
// Secondary region (standby)
|
||||
new ServerlessStack(app, 'SecondaryStack', {
|
||||
env: { region: 'us-west-2' },
|
||||
isPrimary: false,
|
||||
});
|
||||
|
||||
// Route 53 health check and failover
|
||||
const healthCheck = new route53.CfnHealthCheck(this, 'HealthCheck', {
|
||||
type: 'HTTPS',
|
||||
resourcePath: '/health',
|
||||
fullyQualifiedDomainName: 'api.example.com',
|
||||
});
|
||||
```
|
||||
|
||||
**Active-Active**:
|
||||
|
||||
```typescript
|
||||
// Deploy to multiple regions
|
||||
const regions = ['us-east-1', 'us-west-2', 'eu-west-1'];
|
||||
|
||||
for (const region of regions) {
|
||||
new ServerlessStack(app, `Stack-${region}`, {
|
||||
env: { region },
|
||||
});
|
||||
}
|
||||
|
||||
// Route 53 geolocation routing
|
||||
new route53.ARecord(this, 'GeoRecord', {
|
||||
zone: hostedZone,
|
||||
recordName: 'api',
|
||||
target: route53.RecordTarget.fromAlias(
|
||||
new targets.ApiGatewayDomain(domain)
|
||||
),
|
||||
geoLocation: route53.GeoLocation.country('US'),
|
||||
});
|
||||
```
|
||||
|
||||
### Feature Flags with AppConfig
|
||||
|
||||
**Safe feature rollout**:
|
||||
|
||||
```typescript
|
||||
import { AppConfigData } from '@aws-sdk/client-appconfigdata';
|
||||
|
||||
const appconfig = new AppConfigData({});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
// Fetch feature flags
|
||||
const config = await appconfig.getLatestConfiguration({
|
||||
ConfigurationToken: token,
|
||||
});
|
||||
|
||||
const features = JSON.parse(config.Configuration.toString());
|
||||
|
||||
if (features.newFeatureEnabled) {
|
||||
return newFeatureHandler(event);
|
||||
}
|
||||
|
||||
return legacyHandler(event);
|
||||
};
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
- **IaC**: Use SAM or CDK for all deployments
|
||||
- **Environments**: Separate dev, staging, production
|
||||
- **CI/CD**: Automate build, test, and deployment
|
||||
- **Testing**: Unit, integration, and load testing
|
||||
- **Gradual Deployment**: Use canary or linear for production
|
||||
- **Alarms**: Configure and monitor during deployment
|
||||
- **Rollback**: Enable automatic rollback on failures
|
||||
- **Hooks**: Validate before and after traffic shifts
|
||||
- **Versioning**: Use Lambda versions and aliases
|
||||
- **Multi-Region**: Plan for disaster recovery
|
||||
1002
skills/aws-serverless-eda/references/eda-patterns.md
Normal file
1002
skills/aws-serverless-eda/references/eda-patterns.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,770 @@
|
||||
# Serverless Observability Best Practices
|
||||
|
||||
Comprehensive observability patterns for serverless applications based on AWS best practices.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Three Pillars of Observability](#three-pillars-of-observability)
|
||||
- [Metrics](#metrics)
|
||||
- [Logging](#logging)
|
||||
- [Tracing](#tracing)
|
||||
- [Unified Observability](#unified-observability)
|
||||
- [Alerting](#alerting)
|
||||
|
||||
## Three Pillars of Observability
|
||||
|
||||
### Metrics
|
||||
**Numeric data measured at intervals (time series)**
|
||||
- Request rate, error rate, duration
|
||||
- CPU%, memory%, disk%
|
||||
- Custom business metrics
|
||||
- Service Level Indicators (SLIs)
|
||||
|
||||
### Logs
|
||||
**Timestamped records of discrete events**
|
||||
- Application events and errors
|
||||
- State transformations
|
||||
- Debugging information
|
||||
- Audit trails
|
||||
|
||||
### Traces
|
||||
**Single user's journey across services**
|
||||
- Request flow through distributed system
|
||||
- Service dependencies
|
||||
- Latency breakdown
|
||||
- Error propagation
|
||||
|
||||
## Metrics
|
||||
|
||||
### CloudWatch Metrics for Lambda
|
||||
|
||||
**Out-of-the-box metrics** (automatically available):
|
||||
```
|
||||
- Invocations
|
||||
- Errors
|
||||
- Throttles
|
||||
- Duration
|
||||
- ConcurrentExecutions
|
||||
- IteratorAge (for streams)
|
||||
```
|
||||
|
||||
**CDK Configuration**:
|
||||
```typescript
|
||||
const fn = new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
});
|
||||
|
||||
// Create alarms on metrics
|
||||
new cloudwatch.Alarm(this, 'ErrorAlarm', {
|
||||
metric: fn.metricErrors({
|
||||
statistic: 'Sum',
|
||||
period: Duration.minutes(5),
|
||||
}),
|
||||
threshold: 10,
|
||||
evaluationPeriods: 1,
|
||||
});
|
||||
|
||||
new cloudwatch.Alarm(this, 'DurationAlarm', {
|
||||
metric: fn.metricDuration({
|
||||
statistic: 'p99',
|
||||
period: Duration.minutes(5),
|
||||
}),
|
||||
threshold: 1000, // 1 second
|
||||
evaluationPeriods: 2,
|
||||
});
|
||||
```
|
||||
|
||||
### Custom Metrics
|
||||
|
||||
**Use CloudWatch Embedded Metric Format (EMF)**:
|
||||
|
||||
```typescript
|
||||
export const handler = async (event: any) => {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await processOrder(event);
|
||||
|
||||
// Emit custom metrics
|
||||
console.log(JSON.stringify({
|
||||
_aws: {
|
||||
Timestamp: Date.now(),
|
||||
CloudWatchMetrics: [{
|
||||
Namespace: 'MyApp/Orders',
|
||||
Dimensions: [['ServiceName', 'Operation']],
|
||||
Metrics: [
|
||||
{ Name: 'ProcessingTime', Unit: 'Milliseconds' },
|
||||
{ Name: 'OrderValue', Unit: 'None' },
|
||||
],
|
||||
}],
|
||||
},
|
||||
ServiceName: 'OrderService',
|
||||
Operation: 'ProcessOrder',
|
||||
ProcessingTime: Date.now() - startTime,
|
||||
OrderValue: result.amount,
|
||||
}));
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
// Emit error metric
|
||||
console.log(JSON.stringify({
|
||||
_aws: {
|
||||
CloudWatchMetrics: [{
|
||||
Namespace: 'MyApp/Orders',
|
||||
Dimensions: [['ServiceName']],
|
||||
Metrics: [{ Name: 'Errors', Unit: 'Count' }],
|
||||
}],
|
||||
},
|
||||
ServiceName: 'OrderService',
|
||||
Errors: 1,
|
||||
}));
|
||||
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Using Lambda Powertools**:
|
||||
|
||||
```typescript
|
||||
import { Metrics, MetricUnits } from '@aws-lambda-powertools/metrics';
|
||||
|
||||
const metrics = new Metrics({
|
||||
namespace: 'MyApp',
|
||||
serviceName: 'OrderService',
|
||||
});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
metrics.addMetric('Invocation', MetricUnits.Count, 1);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await processOrder(event);
|
||||
|
||||
metrics.addMetric('Success', MetricUnits.Count, 1);
|
||||
metrics.addMetric('ProcessingTime', MetricUnits.Milliseconds, Date.now() - startTime);
|
||||
metrics.addMetric('OrderValue', MetricUnits.None, result.amount);
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
metrics.addMetric('Error', MetricUnits.Count, 1);
|
||||
throw error;
|
||||
} finally {
|
||||
metrics.publishStoredMetrics();
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Logging
|
||||
|
||||
### Structured Logging
|
||||
|
||||
**Use JSON format for logs**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Structured JSON logging
|
||||
export const handler = async (event: any) => {
|
||||
console.log(JSON.stringify({
|
||||
level: 'INFO',
|
||||
message: 'Processing order',
|
||||
orderId: event.orderId,
|
||||
customerId: event.customerId,
|
||||
timestamp: new Date().toISOString(),
|
||||
requestId: context.requestId,
|
||||
}));
|
||||
|
||||
try {
|
||||
const result = await processOrder(event);
|
||||
|
||||
console.log(JSON.stringify({
|
||||
level: 'INFO',
|
||||
message: 'Order processed successfully',
|
||||
orderId: event.orderId,
|
||||
duration: Date.now() - startTime,
|
||||
timestamp: new Date().toISOString(),
|
||||
}));
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error(JSON.stringify({
|
||||
level: 'ERROR',
|
||||
message: 'Order processing failed',
|
||||
orderId: event.orderId,
|
||||
error: {
|
||||
name: error.name,
|
||||
message: error.message,
|
||||
stack: error.stack,
|
||||
},
|
||||
timestamp: new Date().toISOString(),
|
||||
}));
|
||||
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
|
||||
// ❌ BAD - Unstructured logging
|
||||
console.log('Processing order ' + orderId + ' for customer ' + customerId);
|
||||
```
|
||||
|
||||
**Using Lambda Powertools Logger**:
|
||||
|
||||
```typescript
|
||||
import { Logger } from '@aws-lambda-powertools/logger';
|
||||
|
||||
const logger = new Logger({
|
||||
serviceName: 'OrderService',
|
||||
logLevel: 'INFO',
|
||||
});
|
||||
|
||||
export const handler = async (event: any, context: Context) => {
|
||||
logger.addContext(context);
|
||||
|
||||
logger.info('Processing order', {
|
||||
orderId: event.orderId,
|
||||
customerId: event.customerId,
|
||||
});
|
||||
|
||||
try {
|
||||
const result = await processOrder(event);
|
||||
|
||||
logger.info('Order processed', {
|
||||
orderId: event.orderId,
|
||||
amount: result.amount,
|
||||
});
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
logger.error('Order processing failed', {
|
||||
orderId: event.orderId,
|
||||
error,
|
||||
});
|
||||
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Log Levels
|
||||
|
||||
**Use appropriate log levels**:
|
||||
- **ERROR**: Errors requiring immediate attention
|
||||
- **WARN**: Warnings or recoverable errors
|
||||
- **INFO**: Important business events
|
||||
- **DEBUG**: Detailed debugging information (disable in production)
|
||||
|
||||
```typescript
|
||||
const logger = new Logger({
|
||||
serviceName: 'OrderService',
|
||||
logLevel: process.env.LOG_LEVEL || 'INFO',
|
||||
});
|
||||
|
||||
logger.debug('Detailed processing info', { data });
|
||||
logger.info('Business event occurred', { event });
|
||||
logger.warn('Recoverable error', { error });
|
||||
logger.error('Critical failure', { error });
|
||||
```
|
||||
|
||||
### Log Insights Queries
|
||||
|
||||
**Common CloudWatch Logs Insights queries**:
|
||||
|
||||
```
|
||||
# Find errors in last hour
|
||||
fields @timestamp, @message, level, error.message
|
||||
| filter level = "ERROR"
|
||||
| sort @timestamp desc
|
||||
| limit 100
|
||||
|
||||
# Count errors by type
|
||||
stats count() by error.name as ErrorType
|
||||
| sort count desc
|
||||
|
||||
# Calculate p99 latency
|
||||
stats percentile(duration, 99) by serviceName
|
||||
|
||||
# Find slow requests
|
||||
fields @timestamp, orderId, duration
|
||||
| filter duration > 1000
|
||||
| sort duration desc
|
||||
| limit 50
|
||||
|
||||
# Track specific customer requests
|
||||
fields @timestamp, @message, orderId
|
||||
| filter customerId = "customer-123"
|
||||
| sort @timestamp desc
|
||||
```
|
||||
|
||||
## Tracing
|
||||
|
||||
### Enable X-Ray Tracing
|
||||
|
||||
**Configure X-Ray for Lambda**:
|
||||
|
||||
```typescript
|
||||
const fn = new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
tracing: lambda.Tracing.ACTIVE, // Enable X-Ray
|
||||
});
|
||||
|
||||
// API Gateway tracing
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
deployOptions: {
|
||||
tracingEnabled: true,
|
||||
},
|
||||
});
|
||||
|
||||
// Step Functions tracing
|
||||
new stepfunctions.StateMachine(this, 'StateMachine', {
|
||||
definition,
|
||||
tracingEnabled: true,
|
||||
});
|
||||
```
|
||||
|
||||
**Instrument application code**:
|
||||
|
||||
```typescript
|
||||
import { captureAWSv3Client } from 'aws-xray-sdk-core';
|
||||
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||
|
||||
// Wrap AWS SDK clients
|
||||
const client = captureAWSv3Client(new DynamoDBClient({}));
|
||||
|
||||
// Custom segments
|
||||
import AWSXRay from 'aws-xray-sdk-core';
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const segment = AWSXRay.getSegment();
|
||||
|
||||
// Custom subsegment
|
||||
const subsegment = segment.addNewSubsegment('ProcessOrder');
|
||||
|
||||
try {
|
||||
// Add annotations (indexed for filtering)
|
||||
subsegment.addAnnotation('orderId', event.orderId);
|
||||
subsegment.addAnnotation('customerId', event.customerId);
|
||||
|
||||
// Add metadata (not indexed, detailed info)
|
||||
subsegment.addMetadata('orderDetails', event);
|
||||
|
||||
const result = await processOrder(event);
|
||||
|
||||
subsegment.addAnnotation('status', 'success');
|
||||
subsegment.close();
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
subsegment.addError(error);
|
||||
subsegment.close();
|
||||
throw error;
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Using Lambda Powertools Tracer**:
|
||||
|
||||
```typescript
|
||||
import { Tracer } from '@aws-lambda-powertools/tracer';
|
||||
|
||||
const tracer = new Tracer({ serviceName: 'OrderService' });
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const segment = tracer.getSegment();
|
||||
|
||||
// Automatically captures and traces
|
||||
const result = await tracer.captureAWSv3Client(dynamodb).getItem({
|
||||
TableName: process.env.TABLE_NAME,
|
||||
Key: { orderId: event.orderId },
|
||||
});
|
||||
|
||||
// Custom annotation
|
||||
tracer.putAnnotation('orderId', event.orderId);
|
||||
tracer.putMetadata('orderDetails', event);
|
||||
|
||||
return result;
|
||||
};
|
||||
```
|
||||
|
||||
### Service Map
|
||||
|
||||
**Visualize service dependencies** with X-Ray:
|
||||
- Shows service-to-service communication
|
||||
- Identifies latency bottlenecks
|
||||
- Highlights error rates between services
|
||||
- Tracks downstream dependencies
|
||||
|
||||
### Distributed Tracing Best Practices
|
||||
|
||||
1. **Enable tracing everywhere**: Lambda, API Gateway, Step Functions
|
||||
2. **Use annotations for filtering**: Indexed fields for queries
|
||||
3. **Use metadata for details**: Non-indexed detailed information
|
||||
4. **Sample appropriately**: 100% for low traffic, sampled for high traffic
|
||||
5. **Correlate with logs**: Include trace ID in log entries
|
||||
|
||||
## Unified Observability
|
||||
|
||||
### Correlation Between Pillars
|
||||
|
||||
**Include trace ID in logs**:
|
||||
|
||||
```typescript
|
||||
export const handler = async (event: any, context: Context) => {
|
||||
const traceId = process.env._X_AMZN_TRACE_ID;
|
||||
|
||||
console.log(JSON.stringify({
|
||||
level: 'INFO',
|
||||
message: 'Processing order',
|
||||
traceId,
|
||||
requestId: context.requestId,
|
||||
orderId: event.orderId,
|
||||
}));
|
||||
};
|
||||
```
|
||||
|
||||
### CloudWatch ServiceLens
|
||||
|
||||
**Unified view of traces and metrics**:
|
||||
- Automatically correlates X-Ray traces with CloudWatch metrics
|
||||
- Shows service map with metrics overlay
|
||||
- Identifies performance and availability issues
|
||||
- Provides end-to-end request view
|
||||
|
||||
### Lambda Powertools Integration
|
||||
|
||||
**All three pillars in one**:
|
||||
|
||||
```typescript
|
||||
import { Logger } from '@aws-lambda-powertools/logger';
|
||||
import { Tracer } from '@aws-lambda-powertools/tracer';
|
||||
import { Metrics, MetricUnits } from '@aws-lambda-powertools/metrics';
|
||||
|
||||
const logger = new Logger({ serviceName: 'OrderService' });
|
||||
const tracer = new Tracer({ serviceName: 'OrderService' });
|
||||
const metrics = new Metrics({ namespace: 'MyApp', serviceName: 'OrderService' });
|
||||
|
||||
export const handler = async (event: any, context: Context) => {
|
||||
// Automatically adds trace context to logs
|
||||
logger.addContext(context);
|
||||
|
||||
logger.info('Processing order', { orderId: event.orderId });
|
||||
|
||||
// Add trace annotations
|
||||
tracer.putAnnotation('orderId', event.orderId);
|
||||
|
||||
// Add metrics
|
||||
metrics.addMetric('Invocation', MetricUnits.Count, 1);
|
||||
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const result = await processOrder(event);
|
||||
|
||||
metrics.addMetric('Success', MetricUnits.Count, 1);
|
||||
metrics.addMetric('Duration', MetricUnits.Milliseconds, Date.now() - startTime);
|
||||
|
||||
logger.info('Order processed', { orderId: event.orderId });
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
metrics.addMetric('Error', MetricUnits.Count, 1);
|
||||
logger.error('Processing failed', { orderId: event.orderId, error });
|
||||
throw error;
|
||||
} finally {
|
||||
metrics.publishStoredMetrics();
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## Alerting
|
||||
|
||||
### Effective Alerting Strategy
|
||||
|
||||
**Alert on what matters**:
|
||||
- **Critical**: Customer-impacting issues (errors, high latency)
|
||||
- **Warning**: Approaching thresholds (80% capacity)
|
||||
- **Info**: Trends and anomalies (cost spikes)
|
||||
|
||||
**Alarm fatigue prevention**:
|
||||
- Tune thresholds based on actual patterns
|
||||
- Use composite alarms to reduce noise
|
||||
- Set appropriate evaluation periods
|
||||
- Include clear remediation steps
|
||||
|
||||
### CloudWatch Alarms
|
||||
|
||||
**Common alarm patterns**:
|
||||
|
||||
```typescript
|
||||
// Error rate alarm
|
||||
new cloudwatch.Alarm(this, 'ErrorRateAlarm', {
|
||||
metric: new cloudwatch.MathExpression({
|
||||
expression: 'errors / invocations * 100',
|
||||
usingMetrics: {
|
||||
errors: fn.metricErrors({ statistic: 'Sum' }),
|
||||
invocations: fn.metricInvocations({ statistic: 'Sum' }),
|
||||
},
|
||||
}),
|
||||
threshold: 1, // 1% error rate
|
||||
evaluationPeriods: 2,
|
||||
alarmDescription: 'Error rate exceeded 1%',
|
||||
});
|
||||
|
||||
// Latency alarm (p99)
|
||||
new cloudwatch.Alarm(this, 'LatencyAlarm', {
|
||||
metric: fn.metricDuration({
|
||||
statistic: 'p99',
|
||||
period: Duration.minutes(5),
|
||||
}),
|
||||
threshold: 1000, // 1 second
|
||||
evaluationPeriods: 2,
|
||||
alarmDescription: 'p99 latency exceeded 1 second',
|
||||
});
|
||||
|
||||
// Concurrent executions approaching limit
|
||||
new cloudwatch.Alarm(this, 'ConcurrencyAlarm', {
|
||||
metric: fn.metricConcurrentExecutions({
|
||||
statistic: 'Maximum',
|
||||
}),
|
||||
threshold: 800, // 80% of 1000 default limit
|
||||
evaluationPeriods: 1,
|
||||
alarmDescription: 'Approaching concurrency limit',
|
||||
});
|
||||
```
|
||||
|
||||
### Composite Alarms
|
||||
|
||||
**Reduce alert noise**:
|
||||
|
||||
```typescript
|
||||
const errorAlarm = new cloudwatch.Alarm(this, 'Errors', {
|
||||
metric: fn.metricErrors(),
|
||||
threshold: 10,
|
||||
evaluationPeriods: 1,
|
||||
});
|
||||
|
||||
const throttleAlarm = new cloudwatch.Alarm(this, 'Throttles', {
|
||||
metric: fn.metricThrottles(),
|
||||
threshold: 5,
|
||||
evaluationPeriods: 1,
|
||||
});
|
||||
|
||||
const latencyAlarm = new cloudwatch.Alarm(this, 'Latency', {
|
||||
metric: fn.metricDuration({ statistic: 'p99' }),
|
||||
threshold: 2000,
|
||||
evaluationPeriods: 2,
|
||||
});
|
||||
|
||||
// Composite alarm (any of the above)
|
||||
new cloudwatch.CompositeAlarm(this, 'ServiceHealthAlarm', {
|
||||
compositeAlarmName: 'order-service-health',
|
||||
alarmRule: cloudwatch.AlarmRule.anyOf(
|
||||
errorAlarm,
|
||||
throttleAlarm,
|
||||
latencyAlarm
|
||||
),
|
||||
alarmDescription: 'Overall service health degraded',
|
||||
});
|
||||
```
|
||||
|
||||
## Dashboard Best Practices
|
||||
|
||||
### Service Dashboard Layout
|
||||
|
||||
**Recommended sections**:
|
||||
|
||||
1. **Overview**:
|
||||
- Total invocations
|
||||
- Error rate percentage
|
||||
- P50, P95, P99 latency
|
||||
- Availability percentage
|
||||
|
||||
2. **Resource Utilization**:
|
||||
- Concurrent executions
|
||||
- Memory utilization
|
||||
- Duration distribution
|
||||
- Throttles
|
||||
|
||||
3. **Business Metrics**:
|
||||
- Orders processed
|
||||
- Revenue per minute
|
||||
- Customer activity
|
||||
- Feature usage
|
||||
|
||||
4. **Errors and Alerts**:
|
||||
- Error count by type
|
||||
- Active alarms
|
||||
- DLQ message count
|
||||
- Failed transactions
|
||||
|
||||
### CloudWatch Dashboard CDK
|
||||
|
||||
```typescript
|
||||
const dashboard = new cloudwatch.Dashboard(this, 'ServiceDashboard', {
|
||||
dashboardName: 'order-service',
|
||||
});
|
||||
|
||||
dashboard.addWidgets(
|
||||
// Row 1: Overview
|
||||
new cloudwatch.GraphWidget({
|
||||
title: 'Invocations',
|
||||
left: [fn.metricInvocations()],
|
||||
}),
|
||||
new cloudwatch.SingleValueWidget({
|
||||
title: 'Error Rate',
|
||||
metrics: [
|
||||
new cloudwatch.MathExpression({
|
||||
expression: 'errors / invocations * 100',
|
||||
usingMetrics: {
|
||||
errors: fn.metricErrors({ statistic: 'Sum' }),
|
||||
invocations: fn.metricInvocations({ statistic: 'Sum' }),
|
||||
},
|
||||
}),
|
||||
],
|
||||
}),
|
||||
new cloudwatch.GraphWidget({
|
||||
title: 'Latency (p50, p95, p99)',
|
||||
left: [
|
||||
fn.metricDuration({ statistic: 'p50', label: 'p50' }),
|
||||
fn.metricDuration({ statistic: 'p95', label: 'p95' }),
|
||||
fn.metricDuration({ statistic: 'p99', label: 'p99' }),
|
||||
],
|
||||
})
|
||||
);
|
||||
|
||||
// Row 2: Errors
|
||||
dashboard.addWidgets(
|
||||
new cloudwatch.LogQueryWidget({
|
||||
title: 'Recent Errors',
|
||||
logGroupNames: [fn.logGroup.logGroupName],
|
||||
queryLines: [
|
||||
'fields @timestamp, @message',
|
||||
'filter level = "ERROR"',
|
||||
'sort @timestamp desc',
|
||||
'limit 20',
|
||||
],
|
||||
})
|
||||
);
|
||||
```
|
||||
|
||||
## Monitoring Serverless Architectures
|
||||
|
||||
### End-to-End Monitoring
|
||||
|
||||
**Monitor the entire flow**:
|
||||
|
||||
```
|
||||
API Gateway → Lambda → DynamoDB → EventBridge → Lambda
|
||||
↓ ↓ ↓ ↓ ↓
|
||||
Metrics Traces Metrics Metrics Logs
|
||||
```
|
||||
|
||||
**Key metrics per service**:
|
||||
|
||||
| Service | Key Metrics |
|
||||
|---------|-------------|
|
||||
| API Gateway | Count, 4XXError, 5XXError, Latency, CacheHitCount |
|
||||
| Lambda | Invocations, Errors, Duration, Throttles, ConcurrentExecutions |
|
||||
| DynamoDB | ConsumedReadCapacity, ConsumedWriteCapacity, UserErrors, SystemErrors |
|
||||
| SQS | NumberOfMessagesSent, NumberOfMessagesReceived, ApproximateAgeOfOldestMessage |
|
||||
| EventBridge | Invocations, FailedInvocations, TriggeredRules |
|
||||
| Step Functions | ExecutionsStarted, ExecutionsFailed, ExecutionTime |
|
||||
|
||||
### Synthetic Monitoring
|
||||
|
||||
**Use CloudWatch Synthetics for API monitoring**:
|
||||
|
||||
```typescript
|
||||
import { Canary, Test, Code, Schedule } from '@aws-cdk/aws-synthetics-alpha';
|
||||
|
||||
new Canary(this, 'ApiCanary', {
|
||||
canaryName: 'api-health-check',
|
||||
schedule: Schedule.rate(Duration.minutes(5)),
|
||||
test: Test.custom({
|
||||
code: Code.fromInline(`
|
||||
const synthetics = require('Synthetics');
|
||||
|
||||
const apiCanaryBlueprint = async function () {
|
||||
const response = await synthetics.executeHttpStep('Verify API', {
|
||||
url: 'https://api.example.com/health',
|
||||
method: 'GET',
|
||||
});
|
||||
|
||||
return response.statusCode === 200 ? 'success' : 'failure';
|
||||
};
|
||||
|
||||
exports.handler = async () => {
|
||||
return await apiCanaryBlueprint();
|
||||
};
|
||||
`),
|
||||
handler: 'index.handler',
|
||||
}),
|
||||
runtime: synthetics.Runtime.SYNTHETICS_NODEJS_PUPPETEER_6_2,
|
||||
});
|
||||
```
|
||||
|
||||
## OpenTelemetry Integration
|
||||
|
||||
### Amazon Distro for OpenTelemetry (ADOT)
|
||||
|
||||
**Use ADOT for vendor-neutral observability**:
|
||||
|
||||
```typescript
|
||||
// Lambda Layer with ADOT
|
||||
const adotLayer = lambda.LayerVersion.fromLayerVersionArn(
|
||||
this,
|
||||
'AdotLayer',
|
||||
`arn:aws:lambda:${this.region}:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:4`
|
||||
);
|
||||
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
layers: [adotLayer],
|
||||
tracing: lambda.Tracing.ACTIVE,
|
||||
environment: {
|
||||
AWS_LAMBDA_EXEC_WRAPPER: '/opt/otel-handler',
|
||||
OPENTELEMETRY_COLLECTOR_CONFIG_FILE: '/var/task/collector.yaml',
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
**Benefits of ADOT**:
|
||||
- Vendor-neutral (works with Datadog, New Relic, Honeycomb, etc.)
|
||||
- Automatic instrumentation
|
||||
- Consistent format across services
|
||||
- Export to multiple backends
|
||||
|
||||
## Best Practices Summary
|
||||
|
||||
### Metrics
|
||||
- ✅ Use CloudWatch Embedded Metric Format (EMF)
|
||||
- ✅ Track business metrics, not just technical metrics
|
||||
- ✅ Set alarms on error rate, latency, and throughput
|
||||
- ✅ Use p99 for latency, not average
|
||||
- ✅ Create dashboards for key services
|
||||
|
||||
### Logging
|
||||
- ✅ Use structured JSON logging
|
||||
- ✅ Include correlation IDs (request ID, trace ID)
|
||||
- ✅ Use appropriate log levels
|
||||
- ✅ Never log sensitive data (PII, secrets)
|
||||
- ✅ Use CloudWatch Logs Insights for analysis
|
||||
|
||||
### Tracing
|
||||
- ✅ Enable X-Ray tracing on all services
|
||||
- ✅ Instrument AWS SDK calls
|
||||
- ✅ Add custom annotations for business context
|
||||
- ✅ Use service map to understand dependencies
|
||||
- ✅ Correlate traces with logs and metrics
|
||||
|
||||
### Alerting
|
||||
- ✅ Alert on customer-impacting issues
|
||||
- ✅ Tune thresholds to reduce false positives
|
||||
- ✅ Use composite alarms to reduce noise
|
||||
- ✅ Include clear remediation steps
|
||||
- ✅ Escalate critical alarms appropriately
|
||||
|
||||
### Tools
|
||||
- ✅ Use Lambda Powertools for unified observability
|
||||
- ✅ Use CloudWatch ServiceLens for service view
|
||||
- ✅ Use Synthetics for proactive monitoring
|
||||
- ✅ Consider ADOT for vendor-neutral observability
|
||||
671
skills/aws-serverless-eda/references/performance-optimization.md
Normal file
671
skills/aws-serverless-eda/references/performance-optimization.md
Normal file
@@ -0,0 +1,671 @@
|
||||
# Serverless Performance Optimization
|
||||
|
||||
Performance optimization best practices for AWS Lambda and serverless architectures.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Lambda Execution Lifecycle](#lambda-execution-lifecycle)
|
||||
- [Cold Start Optimization](#cold-start-optimization)
|
||||
- [Memory and CPU Optimization](#memory-and-cpu-optimization)
|
||||
- [Package Size Optimization](#package-size-optimization)
|
||||
- [Initialization Optimization](#initialization-optimization)
|
||||
- [Runtime Performance](#runtime-performance)
|
||||
|
||||
## Lambda Execution Lifecycle
|
||||
|
||||
### Execution Environment Phases
|
||||
|
||||
**Three phases of Lambda execution**:
|
||||
|
||||
1. **Init Phase** (Cold Start):
|
||||
- Download and unpack function package
|
||||
- Create execution environment
|
||||
- Initialize runtime
|
||||
- Execute initialization code (outside handler)
|
||||
|
||||
2. **Invoke Phase**:
|
||||
- Execute handler code
|
||||
- Return response
|
||||
- Freeze execution environment
|
||||
|
||||
3. **Shutdown Phase**:
|
||||
- Runtime shutdown (after period of inactivity)
|
||||
- Execution environment destroyed
|
||||
|
||||
### Concurrency and Scaling
|
||||
|
||||
**Key concepts**:
|
||||
- **Concurrency**: Number of execution environments serving requests simultaneously
|
||||
- **One event per environment**: Each environment processes one event at a time
|
||||
- **Automatic scaling**: Lambda creates new environments as needed
|
||||
- **Environment reuse**: Warm starts reuse existing environments
|
||||
|
||||
**Example**:
|
||||
- Function takes 100ms to execute
|
||||
- Single environment can handle 10 requests/second
|
||||
- 100 concurrent requests = 10 environments needed
|
||||
- Default account limit: 1,000 concurrent executions (can be raised)
|
||||
|
||||
## Cold Start Optimization
|
||||
|
||||
### Understanding Cold Starts
|
||||
|
||||
**Cold start components**:
|
||||
```
|
||||
Total Cold Start = Download Package + Init Environment + Init Code + Handler
|
||||
```
|
||||
|
||||
**Cold start frequency**:
|
||||
- Development: Every code change creates new environments (frequent)
|
||||
- Production: Typically < 1% of invocations
|
||||
- Optimize for p95/p99 latency, not average
|
||||
|
||||
### Package Size Optimization
|
||||
|
||||
**Minimize deployment package**:
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
bundling: {
|
||||
minify: true, // Minify production code
|
||||
sourceMap: false, // Disable in production
|
||||
externalModules: [
|
||||
'@aws-sdk/*', // Use AWS SDK from runtime
|
||||
],
|
||||
// Tree-shaking removes unused code
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
**Tools for optimization**:
|
||||
- **esbuild**: Automatic tree-shaking and minification
|
||||
- **Webpack**: Bundle optimization
|
||||
- **Maven**: Dependency analysis
|
||||
- **Gradle**: Unused dependency detection
|
||||
|
||||
**Best practices**:
|
||||
1. Avoid monolithic functions
|
||||
2. Bundle only required dependencies
|
||||
3. Use tree-shaking to remove unused code
|
||||
4. Minify production code
|
||||
5. Exclude AWS SDK (provided by runtime)
|
||||
|
||||
### Provisioned Concurrency
|
||||
|
||||
**Pre-initialize environments for predictable latency**:
|
||||
|
||||
```typescript
|
||||
const fn = new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
});
|
||||
|
||||
// Static provisioned concurrency
|
||||
fn.currentVersion.addAlias('live', {
|
||||
provisionedConcurrentExecutions: 10,
|
||||
});
|
||||
|
||||
// Auto-scaling provisioned concurrency
|
||||
const alias = fn.currentVersion.addAlias('prod');
|
||||
|
||||
const target = new applicationautoscaling.ScalableTarget(this, 'ScalableTarget', {
|
||||
serviceNamespace: applicationautoscaling.ServiceNamespace.LAMBDA,
|
||||
maxCapacity: 100,
|
||||
minCapacity: 10,
|
||||
resourceId: `function:${fn.functionName}:${alias.aliasName}`,
|
||||
scalableDimension: 'lambda:function:ProvisionedConcurrentExecutions',
|
||||
});
|
||||
|
||||
target.scaleOnUtilization({
|
||||
utilizationTarget: 0.7, // 70% utilization
|
||||
});
|
||||
```
|
||||
|
||||
**When to use**:
|
||||
- **Consistent traffic patterns**: Predictable load
|
||||
- **Latency-sensitive APIs**: Sub-100ms requirements
|
||||
- **Cost consideration**: Compare cold start frequency vs. provisioned cost
|
||||
|
||||
**Cost comparison**:
|
||||
- **On-demand**: Pay only for actual usage
|
||||
- **Provisioned**: Pay for provisioned capacity + invocations
|
||||
- **Breakeven**: When cold starts > ~20% of invocations
|
||||
|
||||
### Lambda SnapStart (Java)
|
||||
|
||||
**Instant cold starts for Java**:
|
||||
|
||||
```typescript
|
||||
new lambda.Function(this, 'JavaFunction', {
|
||||
runtime: lambda.Runtime.JAVA_17,
|
||||
code: lambda.Code.fromAsset('target/function.jar'),
|
||||
handler: 'com.example.Handler::handleRequest',
|
||||
snapStart: lambda.SnapStartConf.ON_PUBLISHED_VERSIONS,
|
||||
});
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Up to 10x faster cold starts for Java
|
||||
- No code changes required
|
||||
- Works with published versions
|
||||
- No additional cost
|
||||
|
||||
## Memory and CPU Optimization
|
||||
|
||||
### Memory = CPU Allocation
|
||||
|
||||
**Key principle**: Memory and CPU are proportionally allocated
|
||||
|
||||
| Memory | vCPU |
|
||||
|--------|------|
|
||||
| 128 MB | 0.07 vCPU |
|
||||
| 512 MB | 0.28 vCPU |
|
||||
| 1,024 MB | 0.57 vCPU |
|
||||
| 1,769 MB | 1.00 vCPU |
|
||||
| 3,538 MB | 2.00 vCPU |
|
||||
| 10,240 MB | 6.00 vCPU |
|
||||
|
||||
### Cost vs. Performance Balancing
|
||||
|
||||
**Example - Compute-intensive function**:
|
||||
|
||||
| Memory | Duration | Cost |
|
||||
|--------|----------|------|
|
||||
| 128 MB | 11.72s | $0.0246 |
|
||||
| 256 MB | 6.68s | $0.0280 |
|
||||
| 512 MB | 3.19s | $0.0268 |
|
||||
| 1024 MB | 1.46s | $0.0246 |
|
||||
|
||||
**Key insight**: More memory = faster execution = similar or lower cost
|
||||
|
||||
**Formula**:
|
||||
```
|
||||
Duration = Allocated Memory (GB) × Execution Time (seconds)
|
||||
Cost = Duration × Number of Invocations × Price per GB-second
|
||||
```
|
||||
|
||||
### Finding Optimal Memory
|
||||
|
||||
**Use Lambda Power Tuning**:
|
||||
|
||||
```bash
|
||||
# Deploy power tuning state machine
|
||||
sam deploy --template-file template.yml --stack-name lambda-power-tuning
|
||||
|
||||
# Run power tuning
|
||||
aws lambda invoke \
|
||||
--function-name powerTuningFunction \
|
||||
--payload '{"lambdaARN": "arn:aws:lambda:...", "powerValues": [128, 256, 512, 1024, 1536, 3008]}' \
|
||||
response.json
|
||||
```
|
||||
|
||||
**Manual testing approach**:
|
||||
1. Test function at different memory levels
|
||||
2. Measure execution time at each level
|
||||
3. Calculate cost for each configuration
|
||||
4. Choose optimal balance for your use case
|
||||
|
||||
### Multi-Core Optimization
|
||||
|
||||
**Leverage multiple vCPUs** (at 1,769 MB+):
|
||||
|
||||
```typescript
|
||||
// Use Worker Threads for parallel processing
|
||||
import { Worker } from 'worker_threads';
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const items = event.items;
|
||||
|
||||
// Process in parallel using multiple cores
|
||||
const workers = items.map(item =>
|
||||
new Promise((resolve, reject) => {
|
||||
const worker = new Worker('./worker.js', {
|
||||
workerData: item,
|
||||
});
|
||||
|
||||
worker.on('message', resolve);
|
||||
worker.on('error', reject);
|
||||
})
|
||||
);
|
||||
|
||||
const results = await Promise.all(workers);
|
||||
return results;
|
||||
};
|
||||
```
|
||||
|
||||
**Python multiprocessing**:
|
||||
|
||||
```python
|
||||
import multiprocessing as mp
|
||||
|
||||
def handler(event, context):
|
||||
items = event['items']
|
||||
|
||||
# Use multiple cores for CPU-bound work
|
||||
with mp.Pool(mp.cpu_count()) as pool:
|
||||
results = pool.map(process_item, items)
|
||||
|
||||
return {'results': results}
|
||||
```
|
||||
|
||||
## Initialization Optimization
|
||||
|
||||
### Code Outside Handler
|
||||
|
||||
**Initialize once, reuse across invocations**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Initialize outside handler
|
||||
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||
import { S3Client } from '@aws-sdk/client-s3';
|
||||
|
||||
// Initialized once per execution environment
|
||||
const dynamodb = new DynamoDBClient({});
|
||||
const s3 = new S3Client({});
|
||||
|
||||
// Connection pool initialized once
|
||||
const pool = createConnectionPool({
|
||||
host: process.env.DB_HOST,
|
||||
max: 1, // One connection per execution environment
|
||||
});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
// Reuse connections across invocations
|
||||
const data = await dynamodb.getItem({ /* ... */ });
|
||||
const file = await s3.getObject({ /* ... */ });
|
||||
return processData(data, file);
|
||||
};
|
||||
|
||||
// ❌ BAD - Initialize in handler
|
||||
export const handler = async (event: any) => {
|
||||
const dynamodb = new DynamoDBClient({}); // Created every invocation
|
||||
const s3 = new S3Client({}); // Created every invocation
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### Lazy Loading
|
||||
|
||||
**Load dependencies only when needed**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Conditional loading
|
||||
export const handler = async (event: any) => {
|
||||
if (event.operation === 'generatePDF') {
|
||||
// Load heavy PDF library only when needed
|
||||
const pdfLib = await import('./pdf-generator');
|
||||
return pdfLib.generatePDF(event.data);
|
||||
}
|
||||
|
||||
if (event.operation === 'processImage') {
|
||||
const sharp = await import('sharp');
|
||||
return processImage(sharp, event.data);
|
||||
}
|
||||
|
||||
// Default operation (no heavy dependencies)
|
||||
return processDefault(event);
|
||||
};
|
||||
|
||||
// ❌ BAD - Load everything upfront
|
||||
import pdfLib from './pdf-generator'; // 50MB
|
||||
import sharp from 'sharp'; // 20MB
|
||||
// Even if not used!
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
if (event.operation === 'generatePDF') {
|
||||
return pdfLib.generatePDF(event.data);
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Connection Reuse
|
||||
|
||||
**Enable connection reuse**:
|
||||
|
||||
```typescript
|
||||
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||
|
||||
const client = new DynamoDBClient({
|
||||
// Enable keep-alive for connection reuse
|
||||
requestHandler: {
|
||||
connectionTimeout: 3000,
|
||||
socketTimeout: 3000,
|
||||
},
|
||||
});
|
||||
|
||||
// For Node.js AWS SDK
|
||||
process.env.AWS_NODEJS_CONNECTION_REUSE_ENABLED = '1';
|
||||
```
|
||||
|
||||
## Runtime Performance
|
||||
|
||||
### Choose the Right Runtime
|
||||
|
||||
**Runtime comparison**:
|
||||
|
||||
| Runtime | Cold Start | Execution Speed | Ecosystem | Best For |
|
||||
|---------|------------|-----------------|-----------|----------|
|
||||
| Node.js 20 | Fast | Fast | Excellent | APIs, I/O-bound |
|
||||
| Python 3.12 | Fast | Medium | Excellent | Data processing |
|
||||
| Java 17 + SnapStart | Fast (w/SnapStart) | Fast | Good | Enterprise apps |
|
||||
| .NET 8 | Medium | Fast | Good | Enterprise apps |
|
||||
| Go | Very Fast | Very Fast | Good | High performance |
|
||||
| Rust | Very Fast | Very Fast | Growing | High performance |
|
||||
|
||||
### Optimize Handler Code
|
||||
|
||||
**Efficient code patterns**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Batch operations
|
||||
const items = ['item1', 'item2', 'item3'];
|
||||
|
||||
// Single batch write
|
||||
await dynamodb.batchWriteItem({
|
||||
RequestItems: {
|
||||
[tableName]: items.map(item => ({
|
||||
PutRequest: { Item: item },
|
||||
})),
|
||||
},
|
||||
});
|
||||
|
||||
// ❌ BAD - Multiple single operations
|
||||
for (const item of items) {
|
||||
await dynamodb.putItem({
|
||||
TableName: tableName,
|
||||
Item: item,
|
||||
}); // Slow, multiple round trips
|
||||
}
|
||||
```
|
||||
|
||||
### Async Processing
|
||||
|
||||
**Use async/await effectively**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Parallel async operations
|
||||
const [userData, orderData, inventoryData] = await Promise.all([
|
||||
getUserData(userId),
|
||||
getOrderData(orderId),
|
||||
getInventoryData(productId),
|
||||
]);
|
||||
|
||||
// ❌ BAD - Sequential async operations
|
||||
const userData = await getUserData(userId);
|
||||
const orderData = await getOrderData(orderId); // Waits unnecessarily
|
||||
const inventoryData = await getInventoryData(productId); // Waits unnecessarily
|
||||
```
|
||||
|
||||
### Caching Strategies
|
||||
|
||||
**Cache frequently accessed data**:
|
||||
|
||||
```typescript
|
||||
// In-memory cache (persists in warm environments)
|
||||
const cache = new Map<string, any>();
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const key = event.key;
|
||||
|
||||
// Check cache first
|
||||
if (cache.has(key)) {
|
||||
console.log('Cache hit');
|
||||
return cache.get(key);
|
||||
}
|
||||
|
||||
// Fetch from database
|
||||
const data = await fetchFromDatabase(key);
|
||||
|
||||
// Store in cache
|
||||
cache.set(key, data);
|
||||
|
||||
return data;
|
||||
};
|
||||
```
|
||||
|
||||
**ElastiCache for shared cache**:
|
||||
|
||||
```typescript
|
||||
import Redis from 'ioredis';
|
||||
|
||||
// Initialize once
|
||||
const redis = new Redis({
|
||||
host: process.env.REDIS_HOST,
|
||||
port: 6379,
|
||||
lazyConnect: true,
|
||||
enableOfflineQueue: false,
|
||||
});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const key = `order:${event.orderId}`;
|
||||
|
||||
// Try cache
|
||||
const cached = await redis.get(key);
|
||||
if (cached) {
|
||||
return JSON.parse(cached);
|
||||
}
|
||||
|
||||
// Fetch and cache
|
||||
const data = await fetchOrder(event.orderId);
|
||||
await redis.setex(key, 300, JSON.stringify(data)); // 5 min TTL
|
||||
|
||||
return data;
|
||||
};
|
||||
```
|
||||
|
||||
## Performance Testing
|
||||
|
||||
### Load Testing
|
||||
|
||||
**Use Artillery for load testing**:
|
||||
|
||||
```yaml
|
||||
# load-test.yml
|
||||
config:
|
||||
target: https://api.example.com
|
||||
phases:
|
||||
- duration: 60
|
||||
arrivalRate: 10
|
||||
rampTo: 100 # Ramp from 10 to 100 req/sec
|
||||
scenarios:
|
||||
- flow:
|
||||
- post:
|
||||
url: /orders
|
||||
json:
|
||||
orderId: "{{ $randomString() }}"
|
||||
amount: "{{ $randomNumber(10, 1000) }}"
|
||||
```
|
||||
|
||||
```bash
|
||||
artillery run load-test.yml
|
||||
```
|
||||
|
||||
### Benchmarking
|
||||
|
||||
**Test different configurations**:
|
||||
|
||||
```typescript
|
||||
// benchmark.ts
|
||||
import { Lambda } from '@aws-sdk/client-lambda';
|
||||
|
||||
const lambda = new Lambda({});
|
||||
|
||||
const testConfigurations = [
|
||||
{ memory: 128, name: 'Function-128' },
|
||||
{ memory: 256, name: 'Function-256' },
|
||||
{ memory: 512, name: 'Function-512' },
|
||||
{ memory: 1024, name: 'Function-1024' },
|
||||
];
|
||||
|
||||
for (const config of testConfigurations) {
|
||||
const times: number[] = [];
|
||||
|
||||
// Warm up
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await lambda.invoke({ FunctionName: config.name });
|
||||
}
|
||||
|
||||
// Measure
|
||||
for (let i = 0; i < 100; i++) {
|
||||
const start = Date.now();
|
||||
await lambda.invoke({ FunctionName: config.name });
|
||||
times.push(Date.now() - start);
|
||||
}
|
||||
|
||||
const p99 = times.sort()[99];
|
||||
const avg = times.reduce((a, b) => a + b) / times.length;
|
||||
|
||||
console.log(`${config.memory}MB - Avg: ${avg}ms, p99: ${p99}ms`);
|
||||
}
|
||||
```
|
||||
|
||||
## Cost Optimization
|
||||
|
||||
### Right-Sizing Memory
|
||||
|
||||
**Balance cost and performance**:
|
||||
|
||||
**CPU-bound workloads**:
|
||||
- More memory = more CPU = faster execution
|
||||
- Often results in lower cost overall
|
||||
- Test at 1769MB (1 vCPU) and above
|
||||
|
||||
**I/O-bound workloads**:
|
||||
- Less sensitive to memory allocation
|
||||
- May not benefit from higher memory
|
||||
- Test at lower memory levels (256-512MB)
|
||||
|
||||
**Simple operations**:
|
||||
- Minimal CPU required
|
||||
- Use minimum memory (128-256MB)
|
||||
- Fast execution despite low resources
|
||||
|
||||
### Billing Granularity
|
||||
|
||||
**Lambda bills in 1ms increments**:
|
||||
- Precise billing (7ms execution = 7ms cost)
|
||||
- Optimize even small improvements
|
||||
- Consider trade-offs carefully
|
||||
|
||||
**Cost calculation**:
|
||||
```
|
||||
Cost = (Memory GB) × (Duration seconds) × (Invocations) × ($0.0000166667/GB-second)
|
||||
+ (Invocations) × ($0.20/1M requests)
|
||||
```
|
||||
|
||||
### Cost Reduction Strategies
|
||||
|
||||
1. **Optimize execution time**: Faster = cheaper
|
||||
2. **Right-size memory**: Balance CPU needs with cost
|
||||
3. **Reduce invocations**: Batch processing, caching
|
||||
4. **Use Graviton2**: 20% better price/performance
|
||||
5. **Reserved Concurrency**: Only when needed
|
||||
6. **Compression**: Reduce data transfer costs
|
||||
|
||||
## Advanced Optimization
|
||||
|
||||
### Lambda Extensions
|
||||
|
||||
**Use extensions for cross-cutting concerns**:
|
||||
|
||||
```typescript
|
||||
// Lambda layer with extension
|
||||
const extensionLayer = lambda.LayerVersion.fromLayerVersionArn(
|
||||
this,
|
||||
'Extension',
|
||||
'arn:aws:lambda:us-east-1:123456789:layer:my-extension:1'
|
||||
);
|
||||
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
layers: [extensionLayer],
|
||||
});
|
||||
```
|
||||
|
||||
**Common extensions**:
|
||||
- Secrets caching
|
||||
- Configuration caching
|
||||
- Custom logging
|
||||
- Security scanning
|
||||
- Performance monitoring
|
||||
|
||||
### Graviton2 Architecture
|
||||
|
||||
**20% better price/performance**:
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
architecture: lambda.Architecture.ARM_64, // Graviton2
|
||||
});
|
||||
```
|
||||
|
||||
**Considerations**:
|
||||
- Most runtimes support ARM64
|
||||
- Test thoroughly before migrating
|
||||
- Dependencies must support ARM64
|
||||
- Native extensions may need recompilation
|
||||
|
||||
### VPC Optimization
|
||||
|
||||
**Hyperplane ENIs** (automatic since 2019):
|
||||
- No ENI per function
|
||||
- Faster cold starts in VPC
|
||||
- Scales instantly
|
||||
|
||||
```typescript
|
||||
// Modern VPC configuration (fast)
|
||||
new NodejsFunction(this, 'VpcFunction', {
|
||||
entry: 'src/handler.ts',
|
||||
vpc,
|
||||
vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
|
||||
// Fast scaling, no ENI limitations
|
||||
});
|
||||
```
|
||||
|
||||
## Performance Monitoring
|
||||
|
||||
### Key Metrics
|
||||
|
||||
**Monitor these metrics**:
|
||||
- **Duration**: p50, p95, p99, max
|
||||
- **Cold Start %**: ColdStartDuration / TotalDuration
|
||||
- **Error Rate**: Errors / Invocations
|
||||
- **Throttles**: Indicates concurrency limit reached
|
||||
- **Iterator Age**: For stream processing lag
|
||||
|
||||
### Performance Dashboards
|
||||
|
||||
```typescript
|
||||
const dashboard = new cloudwatch.Dashboard(this, 'PerformanceDashboard');
|
||||
|
||||
dashboard.addWidgets(
|
||||
new cloudwatch.GraphWidget({
|
||||
title: 'Latency Distribution',
|
||||
left: [
|
||||
fn.metricDuration({ statistic: 'p50', label: 'p50' }),
|
||||
fn.metricDuration({ statistic: 'p95', label: 'p95' }),
|
||||
fn.metricDuration({ statistic: 'p99', label: 'p99' }),
|
||||
fn.metricDuration({ statistic: 'Maximum', label: 'max' }),
|
||||
],
|
||||
}),
|
||||
new cloudwatch.GraphWidget({
|
||||
title: 'Memory Utilization',
|
||||
left: [fn.metricDuration()],
|
||||
right: [fn.metricErrors()],
|
||||
})
|
||||
);
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
- **Cold Starts**: Optimize package size, use provisioned concurrency for critical paths
|
||||
- **Memory**: More memory often = faster execution = lower cost
|
||||
- **Initialization**: Initialize connections outside handler
|
||||
- **Lazy Loading**: Load dependencies only when needed
|
||||
- **Connection Reuse**: Enable for AWS SDK clients
|
||||
- **Testing**: Test at different memory levels to find optimal configuration
|
||||
- **Monitoring**: Track p99 latency, not average
|
||||
- **Graviton2**: Consider ARM64 for better price/performance
|
||||
- **Batch Operations**: Reduce round trips to services
|
||||
- **Caching**: Cache frequently accessed data
|
||||
625
skills/aws-serverless-eda/references/security-best-practices.md
Normal file
625
skills/aws-serverless-eda/references/security-best-practices.md
Normal file
@@ -0,0 +1,625 @@
|
||||
# Serverless Security Best Practices
|
||||
|
||||
Security best practices for serverless applications based on AWS Well-Architected Framework.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Shared Responsibility Model](#shared-responsibility-model)
|
||||
- [Identity and Access Management](#identity-and-access-management)
|
||||
- [Function Security](#function-security)
|
||||
- [API Security](#api-security)
|
||||
- [Data Protection](#data-protection)
|
||||
- [Network Security](#network-security)
|
||||
|
||||
## Shared Responsibility Model
|
||||
|
||||
### Serverless Shifts Responsibility to AWS
|
||||
|
||||
With serverless, AWS takes on more security responsibilities:
|
||||
|
||||
**AWS Responsibilities**:
|
||||
- Compute infrastructure
|
||||
- Execution environment
|
||||
- Runtime language and patches
|
||||
- Networking infrastructure
|
||||
- Server software and OS
|
||||
- Physical hardware and facilities
|
||||
- Automatic security patches (like Log4Shell mitigation)
|
||||
|
||||
**Customer Responsibilities**:
|
||||
- Function code and dependencies
|
||||
- Resource configuration
|
||||
- Identity and Access Management (IAM)
|
||||
- Data encryption (at rest and in transit)
|
||||
- Application-level security
|
||||
- Secure coding practices
|
||||
|
||||
### Benefits of Shifted Responsibility
|
||||
|
||||
- **Automatic Patching**: AWS applies security patches automatically (e.g., Log4Shell fixed within 3 days)
|
||||
- **Infrastructure Security**: No OS patching, server hardening, or vulnerability scanning
|
||||
- **Operational Agility**: Quick security response at scale
|
||||
- **Focus on Code**: Spend time on business logic, not infrastructure security
|
||||
|
||||
## Identity and Access Management
|
||||
|
||||
### Least Privilege Principle
|
||||
|
||||
**Always use least privilege IAM policies**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Specific grant
|
||||
const table = new dynamodb.Table(this, 'Table', {});
|
||||
const function = new lambda.Function(this, 'Function', {});
|
||||
|
||||
table.grantReadData(function); // Only read access
|
||||
|
||||
// ❌ BAD - Overly broad
|
||||
function.addToRolePolicy(new iam.PolicyStatement({
|
||||
actions: ['dynamodb:*'],
|
||||
resources: ['*'],
|
||||
}));
|
||||
```
|
||||
|
||||
### Function Execution Role
|
||||
|
||||
**Separate roles per function**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Each function has its own role
|
||||
const readFunction = new NodejsFunction(this, 'ReadFunction', {
|
||||
entry: 'src/read.ts',
|
||||
// Gets its own execution role
|
||||
});
|
||||
|
||||
const writeFunction = new NodejsFunction(this, 'WriteFunction', {
|
||||
entry: 'src/write.ts',
|
||||
// Gets its own execution role
|
||||
});
|
||||
|
||||
table.grantReadData(readFunction);
|
||||
table.grantReadWriteData(writeFunction);
|
||||
|
||||
// ❌ BAD - Shared role with excessive permissions
|
||||
const sharedRole = new iam.Role(this, 'SharedRole', {
|
||||
assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
||||
managedPolicies: [
|
||||
iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess'), // Too broad!
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
### Resource-Based Policies
|
||||
|
||||
Control who can invoke functions:
|
||||
|
||||
```typescript
|
||||
// Allow API Gateway to invoke function
|
||||
myFunction.grantInvoke(new iam.ServicePrincipal('apigateway.amazonaws.com'));
|
||||
|
||||
// Allow specific account
|
||||
myFunction.addPermission('AllowAccountInvoke', {
|
||||
principal: new iam.AccountPrincipal('123456789012'),
|
||||
action: 'lambda:InvokeFunction',
|
||||
});
|
||||
|
||||
// Conditional invoke (only from specific VPC endpoint)
|
||||
myFunction.addPermission('AllowVPCInvoke', {
|
||||
principal: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
||||
action: 'lambda:InvokeFunction',
|
||||
sourceArn: vpcEndpoint.vpcEndpointId,
|
||||
});
|
||||
```
|
||||
|
||||
### IAM Policies Best Practices
|
||||
|
||||
1. **Use grant methods**: Prefer `.grantXxx()` over manual policies
|
||||
2. **Condition keys**: Use IAM conditions for fine-grained control
|
||||
3. **Resource ARNs**: Always specify resource ARNs, avoid wildcards
|
||||
4. **Session policies**: Use for temporary elevated permissions
|
||||
5. **Service Control Policies (SCPs)**: Enforce organization-wide guardrails
|
||||
|
||||
## Function Security
|
||||
|
||||
### Lambda Isolation Model
|
||||
|
||||
**Each function runs in isolated sandbox**:
|
||||
- Built on Firecracker microVMs
|
||||
- Dedicated execution environment per function
|
||||
- No shared memory between functions
|
||||
- Isolated file system and network namespace
|
||||
- Strong workload isolation
|
||||
|
||||
**Execution Environment Security**:
|
||||
- One concurrent invocation per environment
|
||||
- Environment may be reused (warm starts)
|
||||
- `/tmp` storage persists between invocations
|
||||
- Sensitive data in memory may persist
|
||||
|
||||
### Secure Coding Practices
|
||||
|
||||
**Handle sensitive data securely**:
|
||||
|
||||
```typescript
|
||||
// ✅ GOOD - Clean up sensitive data
|
||||
export const handler = async (event: any) => {
|
||||
const apiKey = process.env.API_KEY;
|
||||
|
||||
try {
|
||||
const result = await callApi(apiKey);
|
||||
return result;
|
||||
} finally {
|
||||
// Clear sensitive data from memory
|
||||
delete process.env.API_KEY;
|
||||
}
|
||||
};
|
||||
|
||||
// ✅ GOOD - Use Secrets Manager
|
||||
import { SecretsManagerClient, GetSecretValueCommand } from '@aws-sdk/client-secrets-manager';
|
||||
|
||||
const secretsClient = new SecretsManagerClient({});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const secret = await secretsClient.send(
|
||||
new GetSecretValueCommand({ SecretId: process.env.SECRET_ARN })
|
||||
);
|
||||
|
||||
const apiKey = secret.SecretString;
|
||||
// Use apiKey
|
||||
};
|
||||
```
|
||||
|
||||
### Dependency Management
|
||||
|
||||
**Scan dependencies for vulnerabilities**:
|
||||
|
||||
```json
|
||||
// package.json
|
||||
{
|
||||
"scripts": {
|
||||
"audit": "npm audit",
|
||||
"audit:fix": "npm audit fix"
|
||||
},
|
||||
"devDependencies": {
|
||||
"snyk": "^1.0.0"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**Keep dependencies updated**:
|
||||
- Run `npm audit` or `pip-audit` regularly
|
||||
- Use Dependabot or Snyk for automated scanning
|
||||
- Update dependencies promptly when vulnerabilities found
|
||||
- Use minimal dependency sets
|
||||
|
||||
### Environment Variable Security
|
||||
|
||||
**Never store secrets in environment variables**:
|
||||
|
||||
```typescript
|
||||
// ❌ BAD - Secret in environment variable
|
||||
new NodejsFunction(this, 'Function', {
|
||||
environment: {
|
||||
API_KEY: 'sk-1234567890abcdef', // Never do this!
|
||||
},
|
||||
});
|
||||
|
||||
// ✅ GOOD - Reference to secret
|
||||
new NodejsFunction(this, 'Function', {
|
||||
environment: {
|
||||
SECRET_ARN: secret.secretArn,
|
||||
},
|
||||
});
|
||||
|
||||
secret.grantRead(myFunction);
|
||||
```
|
||||
|
||||
## API Security
|
||||
|
||||
### API Gateway Security
|
||||
|
||||
**Authentication and Authorization**:
|
||||
|
||||
```typescript
|
||||
// Cognito User Pool authorizer
|
||||
const authorizer = new apigateway.CognitoUserPoolsAuthorizer(this, 'Authorizer', {
|
||||
cognitoUserPools: [userPool],
|
||||
});
|
||||
|
||||
api.root.addMethod('GET', integration, {
|
||||
authorizer,
|
||||
authorizationType: apigateway.AuthorizationType.COGNITO,
|
||||
});
|
||||
|
||||
// Lambda authorizer for custom auth
|
||||
const customAuthorizer = new apigateway.TokenAuthorizer(this, 'CustomAuth', {
|
||||
handler: authorizerFunction,
|
||||
resultsCacheTtl: Duration.minutes(5),
|
||||
});
|
||||
|
||||
// IAM authorization for service-to-service
|
||||
api.root.addMethod('POST', integration, {
|
||||
authorizationType: apigateway.AuthorizationType.IAM,
|
||||
});
|
||||
```
|
||||
|
||||
### Request Validation
|
||||
|
||||
**Validate requests at API Gateway**:
|
||||
|
||||
```typescript
|
||||
const validator = new apigateway.RequestValidator(this, 'Validator', {
|
||||
api,
|
||||
validateRequestBody: true,
|
||||
validateRequestParameters: true,
|
||||
});
|
||||
|
||||
const model = api.addModel('Model', {
|
||||
schema: {
|
||||
type: apigateway.JsonSchemaType.OBJECT,
|
||||
required: ['email', 'name'],
|
||||
properties: {
|
||||
email: {
|
||||
type: apigateway.JsonSchemaType.STRING,
|
||||
format: 'email',
|
||||
},
|
||||
name: {
|
||||
type: apigateway.JsonSchemaType.STRING,
|
||||
minLength: 1,
|
||||
maxLength: 100,
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
resource.addMethod('POST', integration, {
|
||||
requestValidator: validator,
|
||||
requestModels: {
|
||||
'application/json': model,
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Rate Limiting and Throttling
|
||||
|
||||
```typescript
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
deployOptions: {
|
||||
throttlingRateLimit: 1000, // requests per second
|
||||
throttlingBurstLimit: 2000, // burst capacity
|
||||
},
|
||||
});
|
||||
|
||||
// Per-method throttling
|
||||
resource.addMethod('POST', integration, {
|
||||
methodResponses: [{ statusCode: '200' }],
|
||||
requestParameters: {
|
||||
'method.request.header.Authorization': true,
|
||||
},
|
||||
throttling: {
|
||||
rateLimit: 100,
|
||||
burstLimit: 200,
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### API Keys and Usage Plans
|
||||
|
||||
```typescript
|
||||
const apiKey = api.addApiKey('ApiKey', {
|
||||
apiKeyName: 'customer-key',
|
||||
});
|
||||
|
||||
const plan = api.addUsagePlan('UsagePlan', {
|
||||
name: 'Standard',
|
||||
throttle: {
|
||||
rateLimit: 100,
|
||||
burstLimit: 200,
|
||||
},
|
||||
quota: {
|
||||
limit: 10000,
|
||||
period: apigateway.Period.MONTH,
|
||||
},
|
||||
});
|
||||
|
||||
plan.addApiKey(apiKey);
|
||||
plan.addApiStage({
|
||||
stage: api.deploymentStage,
|
||||
});
|
||||
```
|
||||
|
||||
## Data Protection
|
||||
|
||||
### Encryption at Rest
|
||||
|
||||
**DynamoDB encryption**:
|
||||
|
||||
```typescript
|
||||
// Default: AWS-owned CMK (no additional cost)
|
||||
const table = new dynamodb.Table(this, 'Table', {
|
||||
encryption: dynamodb.TableEncryption.AWS_MANAGED, // AWS managed CMK
|
||||
});
|
||||
|
||||
// Customer-managed CMK (for compliance)
|
||||
const kmsKey = new kms.Key(this, 'Key', {
|
||||
enableKeyRotation: true,
|
||||
});
|
||||
|
||||
const table = new dynamodb.Table(this, 'Table', {
|
||||
encryption: dynamodb.TableEncryption.CUSTOMER_MANAGED,
|
||||
encryptionKey: kmsKey,
|
||||
});
|
||||
```
|
||||
|
||||
**S3 encryption**:
|
||||
|
||||
```typescript
|
||||
// SSE-S3 (default, no additional cost)
|
||||
const bucket = new s3.Bucket(this, 'Bucket', {
|
||||
encryption: s3.BucketEncryption.S3_MANAGED,
|
||||
});
|
||||
|
||||
// SSE-KMS (for fine-grained access control)
|
||||
const bucket = new s3.Bucket(this, 'Bucket', {
|
||||
encryption: s3.BucketEncryption.KMS,
|
||||
encryptionKey: kmsKey,
|
||||
});
|
||||
```
|
||||
|
||||
**SQS/SNS encryption**:
|
||||
|
||||
```typescript
|
||||
const queue = new sqs.Queue(this, 'Queue', {
|
||||
encryption: sqs.QueueEncryption.KMS,
|
||||
encryptionMasterKey: kmsKey,
|
||||
});
|
||||
|
||||
const topic = new sns.Topic(this, 'Topic', {
|
||||
masterKey: kmsKey,
|
||||
});
|
||||
```
|
||||
|
||||
### Encryption in Transit
|
||||
|
||||
**All AWS service APIs use TLS**:
|
||||
- API Gateway endpoints use HTTPS by default
|
||||
- Lambda to AWS service communication encrypted
|
||||
- EventBridge, SQS, SNS use TLS
|
||||
- Custom domains can use ACM certificates
|
||||
|
||||
```typescript
|
||||
// API Gateway with custom domain
|
||||
const certificate = new acm.Certificate(this, 'Certificate', {
|
||||
domainName: 'api.example.com',
|
||||
validation: acm.CertificateValidation.fromDns(hostedZone),
|
||||
});
|
||||
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
domainName: {
|
||||
domainName: 'api.example.com',
|
||||
certificate,
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Data Sanitization
|
||||
|
||||
**Validate and sanitize inputs**:
|
||||
|
||||
```typescript
|
||||
import DOMPurify from 'isomorphic-dompurify';
|
||||
import { z } from 'zod';
|
||||
|
||||
// Schema validation
|
||||
const OrderSchema = z.object({
|
||||
orderId: z.string().uuid(),
|
||||
amount: z.number().positive(),
|
||||
email: z.string().email(),
|
||||
});
|
||||
|
||||
export const handler = async (event: any) => {
|
||||
const body = JSON.parse(event.body);
|
||||
|
||||
// Validate schema
|
||||
const result = OrderSchema.safeParse(body);
|
||||
if (!result.success) {
|
||||
return {
|
||||
statusCode: 400,
|
||||
body: JSON.stringify({ error: result.error }),
|
||||
};
|
||||
}
|
||||
|
||||
// Sanitize HTML inputs
|
||||
const sanitized = {
|
||||
...result.data,
|
||||
description: DOMPurify.sanitize(result.data.description),
|
||||
};
|
||||
|
||||
await processOrder(sanitized);
|
||||
};
|
||||
```
|
||||
|
||||
## Network Security
|
||||
|
||||
### VPC Configuration
|
||||
|
||||
**Lambda in VPC for private resources**:
|
||||
|
||||
```typescript
|
||||
const vpc = new ec2.Vpc(this, 'Vpc', {
|
||||
maxAzs: 2,
|
||||
natGateways: 1,
|
||||
});
|
||||
|
||||
// Lambda in private subnet
|
||||
const vpcFunction = new NodejsFunction(this, 'VpcFunction', {
|
||||
entry: 'src/handler.ts',
|
||||
vpc,
|
||||
vpcSubnets: {
|
||||
subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS,
|
||||
},
|
||||
securityGroups: [securityGroup],
|
||||
});
|
||||
|
||||
// Security group for Lambda
|
||||
const securityGroup = new ec2.SecurityGroup(this, 'LambdaSG', {
|
||||
vpc,
|
||||
description: 'Security group for Lambda function',
|
||||
allowAllOutbound: false, // Restrict outbound
|
||||
});
|
||||
|
||||
// Only allow access to RDS
|
||||
securityGroup.addEgressRule(
|
||||
ec2.Peer.securityGroupId(rdsSecurityGroup.securityGroupId),
|
||||
ec2.Port.tcp(3306),
|
||||
'Allow MySQL access'
|
||||
);
|
||||
```
|
||||
|
||||
### VPC Endpoints
|
||||
|
||||
**Use VPC endpoints for AWS services**:
|
||||
|
||||
```typescript
|
||||
// S3 VPC endpoint (gateway endpoint, no cost)
|
||||
vpc.addGatewayEndpoint('S3Endpoint', {
|
||||
service: ec2.GatewayVpcEndpointAwsService.S3,
|
||||
});
|
||||
|
||||
// DynamoDB VPC endpoint (gateway endpoint, no cost)
|
||||
vpc.addGatewayEndpoint('DynamoDBEndpoint', {
|
||||
service: ec2.GatewayVpcEndpointAwsService.DYNAMODB,
|
||||
});
|
||||
|
||||
// Secrets Manager VPC endpoint (interface endpoint, cost applies)
|
||||
vpc.addInterfaceEndpoint('SecretsManagerEndpoint', {
|
||||
service: ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER,
|
||||
privateDnsEnabled: true,
|
||||
});
|
||||
```
|
||||
|
||||
### Security Groups
|
||||
|
||||
**Principle of least privilege for network access**:
|
||||
|
||||
```typescript
|
||||
// Lambda security group
|
||||
const lambdaSG = new ec2.SecurityGroup(this, 'LambdaSG', {
|
||||
vpc,
|
||||
allowAllOutbound: false,
|
||||
});
|
||||
|
||||
// RDS security group
|
||||
const rdsSG = new ec2.SecurityGroup(this, 'RDSSG', {
|
||||
vpc,
|
||||
allowAllOutbound: false,
|
||||
});
|
||||
|
||||
// Allow Lambda to access RDS only
|
||||
rdsSG.addIngressRule(
|
||||
ec2.Peer.securityGroupId(lambdaSG.securityGroupId),
|
||||
ec2.Port.tcp(3306),
|
||||
'Allow Lambda access'
|
||||
);
|
||||
|
||||
lambdaSG.addEgressRule(
|
||||
ec2.Peer.securityGroupId(rdsSG.securityGroupId),
|
||||
ec2.Port.tcp(3306),
|
||||
'Allow RDS access'
|
||||
);
|
||||
```
|
||||
|
||||
## Security Monitoring
|
||||
|
||||
### CloudWatch Logs
|
||||
|
||||
**Enable and encrypt logs**:
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
logRetention: logs.RetentionDays.ONE_WEEK,
|
||||
logGroup: new logs.LogGroup(this, 'LogGroup', {
|
||||
encryptionKey: kmsKey, // Encrypt logs
|
||||
retention: logs.RetentionDays.ONE_WEEK,
|
||||
}),
|
||||
});
|
||||
```
|
||||
|
||||
### CloudTrail
|
||||
|
||||
**Enable CloudTrail for audit**:
|
||||
|
||||
```typescript
|
||||
const trail = new cloudtrail.Trail(this, 'Trail', {
|
||||
isMultiRegionTrail: true,
|
||||
includeGlobalServiceEvents: true,
|
||||
managementEvents: cloudtrail.ReadWriteType.ALL,
|
||||
});
|
||||
|
||||
// Log Lambda invocations
|
||||
trail.addLambdaEventSelector([{
|
||||
includeManagementEvents: true,
|
||||
readWriteType: cloudtrail.ReadWriteType.ALL,
|
||||
}]);
|
||||
```
|
||||
|
||||
### GuardDuty
|
||||
|
||||
**Enable GuardDuty for threat detection**:
|
||||
- Analyzes VPC Flow Logs, DNS logs, CloudTrail events
|
||||
- Detects unusual API activity
|
||||
- Identifies compromised credentials
|
||||
- Monitors for cryptocurrency mining
|
||||
|
||||
## Security Best Practices Checklist
|
||||
|
||||
### Development
|
||||
|
||||
- [ ] Validate and sanitize all inputs
|
||||
- [ ] Scan dependencies for vulnerabilities
|
||||
- [ ] Use least privilege IAM permissions
|
||||
- [ ] Store secrets in Secrets Manager or Parameter Store
|
||||
- [ ] Never log sensitive data
|
||||
- [ ] Enable encryption for all data stores
|
||||
- [ ] Use environment variables for configuration, not secrets
|
||||
|
||||
### Deployment
|
||||
|
||||
- [ ] Enable CloudTrail in all regions
|
||||
- [ ] Configure VPC for sensitive workloads
|
||||
- [ ] Use VPC endpoints for AWS service access
|
||||
- [ ] Enable GuardDuty for threat detection
|
||||
- [ ] Implement resource-based policies
|
||||
- [ ] Use AWS WAF for API protection
|
||||
- [ ] Enable access logging for API Gateway
|
||||
|
||||
### Operations
|
||||
|
||||
- [ ] Monitor CloudTrail for unusual activity
|
||||
- [ ] Set up alarms for security events
|
||||
- [ ] Rotate secrets regularly
|
||||
- [ ] Review IAM policies periodically
|
||||
- [ ] Audit function permissions
|
||||
- [ ] Monitor GuardDuty findings
|
||||
- [ ] Implement automated security responses
|
||||
|
||||
### Testing
|
||||
|
||||
- [ ] Test with least privilege policies
|
||||
- [ ] Validate error handling for security failures
|
||||
- [ ] Test input validation and sanitization
|
||||
- [ ] Verify encryption configurations
|
||||
- [ ] Test with malicious payloads
|
||||
- [ ] Audit logs for security events
|
||||
|
||||
## Summary
|
||||
|
||||
- **Shared Responsibility**: AWS handles infrastructure, you handle application security
|
||||
- **Least Privilege**: Use IAM grant methods, avoid wildcards
|
||||
- **Encryption**: Enable encryption at rest and in transit
|
||||
- **Input Validation**: Validate and sanitize all inputs
|
||||
- **Dependency Security**: Scan and update dependencies regularly
|
||||
- **Monitoring**: Enable CloudTrail, GuardDuty, and CloudWatch
|
||||
- **Secrets Management**: Use Secrets Manager, never environment variables
|
||||
- **Network Security**: Use VPC, security groups, and VPC endpoints appropriately
|
||||
838
skills/aws-serverless-eda/references/serverless-patterns.md
Normal file
838
skills/aws-serverless-eda/references/serverless-patterns.md
Normal file
@@ -0,0 +1,838 @@
|
||||
# Serverless Architecture Patterns
|
||||
|
||||
Comprehensive patterns for building serverless applications on AWS based on Well-Architected Framework principles.
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [Core Serverless Patterns](#core-serverless-patterns)
|
||||
- [API Patterns](#api-patterns)
|
||||
- [Data Processing Patterns](#data-processing-patterns)
|
||||
- [Integration Patterns](#integration-patterns)
|
||||
- [Orchestration Patterns](#orchestration-patterns)
|
||||
- [Anti-Patterns](#anti-patterns)
|
||||
|
||||
## Core Serverless Patterns
|
||||
|
||||
### Pattern: Serverless Microservices
|
||||
|
||||
**Use case**: Independent, scalable services with separate databases
|
||||
|
||||
**Architecture**:
|
||||
```
|
||||
API Gateway → Lambda Functions → DynamoDB/RDS
|
||||
↓ (events)
|
||||
EventBridge → Other Services
|
||||
```
|
||||
|
||||
**CDK Implementation**:
|
||||
```typescript
|
||||
// User Service
|
||||
const userTable = new dynamodb.Table(this, 'Users', {
|
||||
partitionKey: { name: 'userId', type: dynamodb.AttributeType.STRING },
|
||||
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
|
||||
});
|
||||
|
||||
const userFunction = new NodejsFunction(this, 'UserHandler', {
|
||||
entry: 'src/services/users/handler.ts',
|
||||
environment: {
|
||||
TABLE_NAME: userTable.tableName,
|
||||
},
|
||||
});
|
||||
|
||||
userTable.grantReadWriteData(userFunction);
|
||||
|
||||
// Order Service (separate database)
|
||||
const orderTable = new dynamodb.Table(this, 'Orders', {
|
||||
partitionKey: { name: 'orderId', type: dynamodb.AttributeType.STRING },
|
||||
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
|
||||
});
|
||||
|
||||
const orderFunction = new NodejsFunction(this, 'OrderHandler', {
|
||||
entry: 'src/services/orders/handler.ts',
|
||||
environment: {
|
||||
TABLE_NAME: orderTable.tableName,
|
||||
EVENT_BUS: eventBus.eventBusName,
|
||||
},
|
||||
});
|
||||
|
||||
orderTable.grantReadWriteData(orderFunction);
|
||||
eventBus.grantPutEventsTo(orderFunction);
|
||||
```
|
||||
|
||||
**Benefits**:
|
||||
- Independent deployment and scaling
|
||||
- Database per service (data isolation)
|
||||
- Technology diversity
|
||||
- Fault isolation
|
||||
|
||||
### Pattern: Serverless API Backend
|
||||
|
||||
**Use case**: REST or GraphQL API with serverless compute
|
||||
|
||||
**REST API with API Gateway**:
|
||||
```typescript
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
restApiName: 'serverless-api',
|
||||
deployOptions: {
|
||||
stageName: 'prod',
|
||||
tracingEnabled: true,
|
||||
loggingLevel: apigateway.MethodLoggingLevel.INFO,
|
||||
dataTraceEnabled: true,
|
||||
metricsEnabled: true,
|
||||
},
|
||||
defaultCorsPreflightOptions: {
|
||||
allowOrigins: apigateway.Cors.ALL_ORIGINS,
|
||||
allowMethods: apigateway.Cors.ALL_METHODS,
|
||||
},
|
||||
});
|
||||
|
||||
// Resource-based routing
|
||||
const items = api.root.addResource('items');
|
||||
items.addMethod('GET', new apigateway.LambdaIntegration(listFunction));
|
||||
items.addMethod('POST', new apigateway.LambdaIntegration(createFunction));
|
||||
|
||||
const item = items.addResource('{id}');
|
||||
item.addMethod('GET', new apigateway.LambdaIntegration(getFunction));
|
||||
item.addMethod('PUT', new apigateway.LambdaIntegration(updateFunction));
|
||||
item.addMethod('DELETE', new apigateway.LambdaIntegration(deleteFunction));
|
||||
```
|
||||
|
||||
**GraphQL API with AppSync**:
|
||||
```typescript
|
||||
const api = new appsync.GraphqlApi(this, 'Api', {
|
||||
name: 'serverless-graphql-api',
|
||||
schema: appsync.SchemaFile.fromAsset('schema.graphql'),
|
||||
authorizationConfig: {
|
||||
defaultAuthorization: {
|
||||
authorizationType: appsync.AuthorizationType.API_KEY,
|
||||
},
|
||||
},
|
||||
xrayEnabled: true,
|
||||
});
|
||||
|
||||
// Lambda resolver
|
||||
const dataSource = api.addLambdaDataSource('lambda-ds', resolverFunction);
|
||||
|
||||
dataSource.createResolver('QueryGetItem', {
|
||||
typeName: 'Query',
|
||||
fieldName: 'getItem',
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: Serverless Data Lake
|
||||
|
||||
**Use case**: Ingest, process, and analyze large-scale data
|
||||
|
||||
**Architecture**:
|
||||
```
|
||||
S3 (raw data) → Lambda (transform) → S3 (processed)
|
||||
↓ (catalog)
|
||||
AWS Glue → Athena (query)
|
||||
```
|
||||
|
||||
**Implementation**:
|
||||
```typescript
|
||||
const rawBucket = new s3.Bucket(this, 'RawData');
|
||||
const processedBucket = new s3.Bucket(this, 'ProcessedData');
|
||||
|
||||
// Trigger Lambda on file upload
|
||||
rawBucket.addEventNotification(
|
||||
s3.EventType.OBJECT_CREATED,
|
||||
new s3n.LambdaDestination(transformFunction),
|
||||
{ prefix: 'incoming/' }
|
||||
);
|
||||
|
||||
// Transform function
|
||||
export const transform = async (event: S3Event) => {
|
||||
for (const record of event.Records) {
|
||||
const key = record.s3.object.key;
|
||||
|
||||
// Get raw data
|
||||
const raw = await s3.getObject({
|
||||
Bucket: record.s3.bucket.name,
|
||||
Key: key,
|
||||
});
|
||||
|
||||
// Transform data
|
||||
const transformed = await transformData(raw.Body);
|
||||
|
||||
// Write to processed bucket
|
||||
await s3.putObject({
|
||||
Bucket: process.env.PROCESSED_BUCKET,
|
||||
Key: `processed/${key}`,
|
||||
Body: JSON.stringify(transformed),
|
||||
});
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## API Patterns
|
||||
|
||||
### Pattern: Authorizer Pattern
|
||||
|
||||
**Use case**: Custom authentication and authorization
|
||||
|
||||
```typescript
|
||||
// Lambda authorizer
|
||||
const authorizer = new apigateway.TokenAuthorizer(this, 'Authorizer', {
|
||||
handler: authorizerFunction,
|
||||
identitySource: 'method.request.header.Authorization',
|
||||
resultsCacheTtl: Duration.minutes(5),
|
||||
});
|
||||
|
||||
// Apply to API methods
|
||||
const resource = api.root.addResource('protected');
|
||||
resource.addMethod('GET', new apigateway.LambdaIntegration(protectedFunction), {
|
||||
authorizer,
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: Request Validation
|
||||
|
||||
**Use case**: Validate requests before Lambda invocation
|
||||
|
||||
```typescript
|
||||
const requestModel = api.addModel('RequestModel', {
|
||||
contentType: 'application/json',
|
||||
schema: {
|
||||
type: apigateway.JsonSchemaType.OBJECT,
|
||||
required: ['name', 'email'],
|
||||
properties: {
|
||||
name: { type: apigateway.JsonSchemaType.STRING, minLength: 1 },
|
||||
email: { type: apigateway.JsonSchemaType.STRING, format: 'email' },
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
resource.addMethod('POST', integration, {
|
||||
requestValidator: new apigateway.RequestValidator(this, 'Validator', {
|
||||
api,
|
||||
validateRequestBody: true,
|
||||
validateRequestParameters: true,
|
||||
}),
|
||||
requestModels: {
|
||||
'application/json': requestModel,
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: Response Caching
|
||||
|
||||
**Use case**: Reduce backend load and improve latency
|
||||
|
||||
```typescript
|
||||
const api = new apigateway.RestApi(this, 'Api', {
|
||||
deployOptions: {
|
||||
cachingEnabled: true,
|
||||
cacheTtl: Duration.minutes(5),
|
||||
cacheClusterEnabled: true,
|
||||
cacheClusterSize: '0.5', // GB
|
||||
},
|
||||
});
|
||||
|
||||
// Enable caching per method
|
||||
resource.addMethod('GET', integration, {
|
||||
methodResponses: [{
|
||||
statusCode: '200',
|
||||
responseParameters: {
|
||||
'method.response.header.Cache-Control': true,
|
||||
},
|
||||
}],
|
||||
});
|
||||
```
|
||||
|
||||
## Data Processing Patterns
|
||||
|
||||
### Pattern: S3 Event Processing
|
||||
|
||||
**Use case**: Process files uploaded to S3
|
||||
|
||||
```typescript
|
||||
const bucket = new s3.Bucket(this, 'DataBucket');
|
||||
|
||||
// Process images
|
||||
bucket.addEventNotification(
|
||||
s3.EventType.OBJECT_CREATED,
|
||||
new s3n.LambdaDestination(imageProcessingFunction),
|
||||
{ suffix: '.jpg' }
|
||||
);
|
||||
|
||||
// Process CSV files
|
||||
bucket.addEventNotification(
|
||||
s3.EventType.OBJECT_CREATED,
|
||||
new s3n.LambdaDestination(csvProcessingFunction),
|
||||
{ suffix: '.csv' }
|
||||
);
|
||||
|
||||
// Large file processing with Step Functions
|
||||
bucket.addEventNotification(
|
||||
s3.EventType.OBJECT_CREATED,
|
||||
new s3n.SfnDestination(processingStateMachine),
|
||||
{ prefix: 'large-files/' }
|
||||
);
|
||||
```
|
||||
|
||||
### Pattern: DynamoDB Streams Processing
|
||||
|
||||
**Use case**: React to database changes
|
||||
|
||||
```typescript
|
||||
const table = new dynamodb.Table(this, 'Table', {
|
||||
partitionKey: { name: 'id', type: dynamodb.AttributeType.STRING },
|
||||
stream: dynamodb.StreamViewType.NEW_AND_OLD_IMAGES,
|
||||
});
|
||||
|
||||
// Process stream changes
|
||||
new lambda.EventSourceMapping(this, 'StreamConsumer', {
|
||||
target: streamProcessorFunction,
|
||||
eventSourceArn: table.tableStreamArn,
|
||||
startingPosition: lambda.StartingPosition.LATEST,
|
||||
batchSize: 100,
|
||||
maxBatchingWindow: Duration.seconds(5),
|
||||
bisectBatchOnError: true,
|
||||
retryAttempts: 3,
|
||||
});
|
||||
|
||||
// Example: Sync to search index
|
||||
export const processStream = async (event: DynamoDBStreamEvent) => {
|
||||
for (const record of event.Records) {
|
||||
if (record.eventName === 'INSERT' || record.eventName === 'MODIFY') {
|
||||
const newImage = record.dynamodb?.NewImage;
|
||||
await elasticSearch.index({
|
||||
index: 'items',
|
||||
id: newImage?.id.S,
|
||||
body: unmarshall(newImage),
|
||||
});
|
||||
} else if (record.eventName === 'REMOVE') {
|
||||
await elasticSearch.delete({
|
||||
index: 'items',
|
||||
id: record.dynamodb?.Keys?.id.S,
|
||||
});
|
||||
}
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
### Pattern: Kinesis Stream Processing
|
||||
|
||||
**Use case**: Real-time data streaming and analytics
|
||||
|
||||
```typescript
|
||||
const stream = new kinesis.Stream(this, 'EventStream', {
|
||||
shardCount: 2,
|
||||
streamMode: kinesis.StreamMode.PROVISIONED,
|
||||
});
|
||||
|
||||
// Fan-out with multiple consumers
|
||||
const consumer1 = new lambda.EventSourceMapping(this, 'Analytics', {
|
||||
target: analyticsFunction,
|
||||
eventSourceArn: stream.streamArn,
|
||||
startingPosition: lambda.StartingPosition.LATEST,
|
||||
batchSize: 100,
|
||||
parallelizationFactor: 10, // Process 10 batches per shard in parallel
|
||||
});
|
||||
|
||||
const consumer2 = new lambda.EventSourceMapping(this, 'Alerting', {
|
||||
target: alertingFunction,
|
||||
eventSourceArn: stream.streamArn,
|
||||
startingPosition: lambda.StartingPosition.LATEST,
|
||||
filters: [
|
||||
lambda.FilterCriteria.filter({
|
||||
eventName: lambda.FilterRule.isEqual('CRITICAL_EVENT'),
|
||||
}),
|
||||
],
|
||||
});
|
||||
```
|
||||
|
||||
## Integration Patterns
|
||||
|
||||
### Pattern: Service Integration with EventBridge
|
||||
|
||||
**Use case**: Decouple services with events
|
||||
|
||||
```typescript
|
||||
const eventBus = new events.EventBus(this, 'AppBus');
|
||||
|
||||
// Service A publishes events
|
||||
const serviceA = new NodejsFunction(this, 'ServiceA', {
|
||||
entry: 'src/services/a/handler.ts',
|
||||
environment: {
|
||||
EVENT_BUS: eventBus.eventBusName,
|
||||
},
|
||||
});
|
||||
|
||||
eventBus.grantPutEventsTo(serviceA);
|
||||
|
||||
// Service B subscribes to events
|
||||
new events.Rule(this, 'ServiceBRule', {
|
||||
eventBus,
|
||||
eventPattern: {
|
||||
source: ['service.a'],
|
||||
detailType: ['EntityCreated'],
|
||||
},
|
||||
targets: [new targets.LambdaFunction(serviceBFunction)],
|
||||
});
|
||||
|
||||
// Service C subscribes to same events
|
||||
new events.Rule(this, 'ServiceCRule', {
|
||||
eventBus,
|
||||
eventPattern: {
|
||||
source: ['service.a'],
|
||||
detailType: ['EntityCreated'],
|
||||
},
|
||||
targets: [new targets.LambdaFunction(serviceCFunction)],
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: API Gateway + SQS Integration
|
||||
|
||||
**Use case**: Async API requests without Lambda
|
||||
|
||||
```typescript
|
||||
const queue = new sqs.Queue(this, 'RequestQueue');
|
||||
|
||||
const api = new apigateway.RestApi(this, 'Api');
|
||||
|
||||
// Direct SQS integration (no Lambda)
|
||||
const sqsIntegration = new apigateway.AwsIntegration({
|
||||
service: 'sqs',
|
||||
path: `${process.env.AWS_ACCOUNT_ID}/${queue.queueName}`,
|
||||
integrationHttpMethod: 'POST',
|
||||
options: {
|
||||
credentialsRole: sqsRole,
|
||||
requestParameters: {
|
||||
'integration.request.header.Content-Type': "'application/x-www-form-urlencoded'",
|
||||
},
|
||||
requestTemplates: {
|
||||
'application/json': 'Action=SendMessage&MessageBody=$input.body',
|
||||
},
|
||||
integrationResponses: [{
|
||||
statusCode: '200',
|
||||
}],
|
||||
},
|
||||
});
|
||||
|
||||
api.root.addMethod('POST', sqsIntegration, {
|
||||
methodResponses: [{ statusCode: '200' }],
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: EventBridge + Step Functions
|
||||
|
||||
**Use case**: Event-triggered workflow orchestration
|
||||
|
||||
```typescript
|
||||
// State machine for order processing
|
||||
const orderStateMachine = new stepfunctions.StateMachine(this, 'OrderFlow', {
|
||||
definition: /* ... */,
|
||||
});
|
||||
|
||||
// EventBridge triggers state machine
|
||||
new events.Rule(this, 'OrderPlacedRule', {
|
||||
eventPattern: {
|
||||
source: ['orders'],
|
||||
detailType: ['OrderPlaced'],
|
||||
},
|
||||
targets: [new targets.SfnStateMachine(orderStateMachine)],
|
||||
});
|
||||
```
|
||||
|
||||
## Orchestration Patterns
|
||||
|
||||
### Pattern: Sequential Workflow
|
||||
|
||||
**Use case**: Multi-step process with dependencies
|
||||
|
||||
```typescript
|
||||
const definition = new tasks.LambdaInvoke(this, 'Step1', {
|
||||
lambdaFunction: step1Function,
|
||||
outputPath: '$.Payload',
|
||||
})
|
||||
.next(new tasks.LambdaInvoke(this, 'Step2', {
|
||||
lambdaFunction: step2Function,
|
||||
outputPath: '$.Payload',
|
||||
}))
|
||||
.next(new tasks.LambdaInvoke(this, 'Step3', {
|
||||
lambdaFunction: step3Function,
|
||||
outputPath: '$.Payload',
|
||||
}));
|
||||
|
||||
new stepfunctions.StateMachine(this, 'Sequential', {
|
||||
definition,
|
||||
});
|
||||
```
|
||||
|
||||
### Pattern: Parallel Execution
|
||||
|
||||
**Use case**: Execute independent tasks concurrently
|
||||
|
||||
```typescript
|
||||
const parallel = new stepfunctions.Parallel(this, 'ParallelProcessing');
|
||||
|
||||
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessA', {
|
||||
lambdaFunction: functionA,
|
||||
}));
|
||||
|
||||
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessB', {
|
||||
lambdaFunction: functionB,
|
||||
}));
|
||||
|
||||
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessC', {
|
||||
lambdaFunction: functionC,
|
||||
}));
|
||||
|
||||
const definition = parallel.next(new tasks.LambdaInvoke(this, 'Aggregate', {
|
||||
lambdaFunction: aggregateFunction,
|
||||
}));
|
||||
|
||||
new stepfunctions.StateMachine(this, 'Parallel', { definition });
|
||||
```
|
||||
|
||||
### Pattern: Map State (Dynamic Parallelism)
|
||||
|
||||
**Use case**: Process array of items in parallel
|
||||
|
||||
```typescript
|
||||
const mapState = new stepfunctions.Map(this, 'ProcessItems', {
|
||||
maxConcurrency: 10,
|
||||
itemsPath: '$.items',
|
||||
});
|
||||
|
||||
mapState.iterator(new tasks.LambdaInvoke(this, 'ProcessItem', {
|
||||
lambdaFunction: processItemFunction,
|
||||
}));
|
||||
|
||||
const definition = mapState.next(new tasks.LambdaInvoke(this, 'Finalize', {
|
||||
lambdaFunction: finalizeFunction,
|
||||
}));
|
||||
```
|
||||
|
||||
### Pattern: Choice State (Conditional Logic)
|
||||
|
||||
**Use case**: Branching logic based on input
|
||||
|
||||
```typescript
|
||||
const choice = new stepfunctions.Choice(this, 'OrderType');
|
||||
|
||||
choice.when(
|
||||
stepfunctions.Condition.stringEquals('$.orderType', 'STANDARD'),
|
||||
standardProcessing
|
||||
);
|
||||
|
||||
choice.when(
|
||||
stepfunctions.Condition.stringEquals('$.orderType', 'EXPRESS'),
|
||||
expressProcessing
|
||||
);
|
||||
|
||||
choice.otherwise(defaultProcessing);
|
||||
```
|
||||
|
||||
### Pattern: Wait State
|
||||
|
||||
**Use case**: Delay between steps or wait for callbacks
|
||||
|
||||
```typescript
|
||||
// Fixed delay
|
||||
const wait = new stepfunctions.Wait(this, 'Wait30Seconds', {
|
||||
time: stepfunctions.WaitTime.duration(Duration.seconds(30)),
|
||||
});
|
||||
|
||||
// Wait until timestamp
|
||||
const waitUntil = new stepfunctions.Wait(this, 'WaitUntil', {
|
||||
time: stepfunctions.WaitTime.timestampPath('$.expiryTime'),
|
||||
});
|
||||
|
||||
// Wait for callback (.waitForTaskToken)
|
||||
const waitForCallback = new tasks.LambdaInvoke(this, 'WaitForApproval', {
|
||||
lambdaFunction: approvalFunction,
|
||||
integrationPattern: stepfunctions.IntegrationPattern.WAIT_FOR_TASK_TOKEN,
|
||||
payload: stepfunctions.TaskInput.fromObject({
|
||||
token: stepfunctions.JsonPath.taskToken,
|
||||
data: stepfunctions.JsonPath.entirePayload,
|
||||
}),
|
||||
});
|
||||
```
|
||||
|
||||
## Anti-Patterns
|
||||
|
||||
### ❌ Lambda Monolith
|
||||
|
||||
**Problem**: Single Lambda handling all operations
|
||||
|
||||
```typescript
|
||||
// BAD
|
||||
export const handler = async (event: any) => {
|
||||
switch (event.operation) {
|
||||
case 'createUser': return createUser(event);
|
||||
case 'getUser': return getUser(event);
|
||||
case 'updateUser': return updateUser(event);
|
||||
case 'deleteUser': return deleteUser(event);
|
||||
case 'createOrder': return createOrder(event);
|
||||
// ... 20 more operations
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Solution**: Separate Lambda functions per operation
|
||||
|
||||
```typescript
|
||||
// GOOD - Separate functions
|
||||
export const createUser = async (event: any) => { /* ... */ };
|
||||
export const getUser = async (event: any) => { /* ... */ };
|
||||
export const updateUser = async (event: any) => { /* ... */ };
|
||||
```
|
||||
|
||||
### ❌ Recursive Lambda Pattern
|
||||
|
||||
**Problem**: Lambda invoking itself (runaway costs)
|
||||
|
||||
```typescript
|
||||
// BAD
|
||||
export const handler = async (event: any) => {
|
||||
await processItem(event);
|
||||
|
||||
if (hasMoreItems()) {
|
||||
await lambda.invoke({
|
||||
FunctionName: process.env.AWS_LAMBDA_FUNCTION_NAME,
|
||||
InvocationType: 'Event',
|
||||
Payload: JSON.stringify({ /* next batch */ }),
|
||||
});
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Solution**: Use SQS or Step Functions
|
||||
|
||||
```typescript
|
||||
// GOOD - Use SQS for iteration
|
||||
export const handler = async (event: SQSEvent) => {
|
||||
for (const record of event.Records) {
|
||||
await processItem(record);
|
||||
}
|
||||
// SQS handles iteration automatically
|
||||
};
|
||||
```
|
||||
|
||||
### ❌ Lambda Chaining
|
||||
|
||||
**Problem**: Lambda directly invoking another Lambda
|
||||
|
||||
```typescript
|
||||
// BAD
|
||||
export const handler1 = async (event: any) => {
|
||||
const result = await processStep1(event);
|
||||
|
||||
// Directly invoking next Lambda
|
||||
await lambda.invoke({
|
||||
FunctionName: 'handler2',
|
||||
Payload: JSON.stringify(result),
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
**Solution**: Use EventBridge, SQS, or Step Functions
|
||||
|
||||
```typescript
|
||||
// GOOD - Publish to EventBridge
|
||||
export const handler1 = async (event: any) => {
|
||||
const result = await processStep1(event);
|
||||
|
||||
await eventBridge.putEvents({
|
||||
Entries: [{
|
||||
Source: 'service.step1',
|
||||
DetailType: 'Step1Completed',
|
||||
Detail: JSON.stringify(result),
|
||||
}],
|
||||
});
|
||||
};
|
||||
```
|
||||
|
||||
### ❌ Synchronous Waiting in Lambda
|
||||
|
||||
**Problem**: Lambda waiting for slow operations
|
||||
|
||||
```typescript
|
||||
// BAD - Blocking on slow operation
|
||||
export const handler = async (event: any) => {
|
||||
await startBatchJob(); // Returns immediately
|
||||
|
||||
// Wait for job to complete (wastes Lambda time)
|
||||
while (true) {
|
||||
const status = await checkJobStatus();
|
||||
if (status === 'COMPLETE') break;
|
||||
await sleep(1000);
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
**Solution**: Use Step Functions with callback pattern
|
||||
|
||||
```typescript
|
||||
// GOOD - Step Functions waits, not Lambda
|
||||
const waitForJob = new tasks.LambdaInvoke(this, 'StartJob', {
|
||||
lambdaFunction: startJobFunction,
|
||||
integrationPattern: stepfunctions.IntegrationPattern.WAIT_FOR_TASK_TOKEN,
|
||||
payload: stepfunctions.TaskInput.fromObject({
|
||||
token: stepfunctions.JsonPath.taskToken,
|
||||
}),
|
||||
});
|
||||
```
|
||||
|
||||
### ❌ Large Deployment Packages
|
||||
|
||||
**Problem**: Large Lambda packages increase cold start time
|
||||
|
||||
**Solution**:
|
||||
- Use layers for shared dependencies
|
||||
- Externalize AWS SDK
|
||||
- Minimize bundle size
|
||||
|
||||
```typescript
|
||||
new NodejsFunction(this, 'Function', {
|
||||
entry: 'src/handler.ts',
|
||||
bundling: {
|
||||
minify: true,
|
||||
externalModules: ['@aws-sdk/*'], // Provided by runtime
|
||||
nodeModules: ['only-needed-deps'], // Selective bundling
|
||||
},
|
||||
});
|
||||
```
|
||||
|
||||
## Performance Optimization
|
||||
|
||||
### Cold Start Optimization
|
||||
|
||||
**Techniques**:
|
||||
1. Minimize package size
|
||||
2. Use provisioned concurrency for critical paths
|
||||
3. Lazy load dependencies
|
||||
4. Reuse connections outside handler
|
||||
5. Use Lambda SnapStart (Java)
|
||||
|
||||
```typescript
|
||||
// For latency-sensitive APIs
|
||||
const apiFunction = new NodejsFunction(this, 'ApiFunction', {
|
||||
entry: 'src/api.ts',
|
||||
memorySize: 1769, // 1 vCPU for faster initialization
|
||||
});
|
||||
|
||||
const alias = apiFunction.currentVersion.addAlias('live');
|
||||
alias.addAutoScaling({
|
||||
minCapacity: 2,
|
||||
maxCapacity: 10,
|
||||
}).scaleOnUtilization({
|
||||
utilizationTarget: 0.7,
|
||||
});
|
||||
```
|
||||
|
||||
### Right-Sizing Memory
|
||||
|
||||
**Test different memory configurations**:
|
||||
|
||||
```typescript
|
||||
// CPU-bound workload
|
||||
new NodejsFunction(this, 'ComputeFunction', {
|
||||
memorySize: 1769, // 1 vCPU
|
||||
timeout: Duration.seconds(30),
|
||||
});
|
||||
|
||||
// I/O-bound workload
|
||||
new NodejsFunction(this, 'IOFunction', {
|
||||
memorySize: 512, // Less CPU needed
|
||||
timeout: Duration.seconds(60),
|
||||
});
|
||||
|
||||
// Simple operations
|
||||
new NodejsFunction(this, 'SimpleFunction', {
|
||||
memorySize: 256,
|
||||
timeout: Duration.seconds(10),
|
||||
});
|
||||
```
|
||||
|
||||
### Concurrent Execution Control
|
||||
|
||||
```typescript
|
||||
// Protect downstream services
|
||||
new NodejsFunction(this, 'Function', {
|
||||
reservedConcurrentExecutions: 10, // Max 10 concurrent
|
||||
});
|
||||
|
||||
// Unreserved concurrency (shared pool)
|
||||
new NodejsFunction(this, 'Function', {
|
||||
// Uses unreserved account concurrency
|
||||
});
|
||||
```
|
||||
|
||||
## Testing Strategies
|
||||
|
||||
### Unit Testing
|
||||
|
||||
Test business logic separate from AWS services:
|
||||
|
||||
```typescript
|
||||
// handler.ts
|
||||
export const processOrder = async (order: Order): Promise<Result> => {
|
||||
// Business logic (easily testable)
|
||||
const validated = validateOrder(order);
|
||||
const priced = calculatePrice(validated);
|
||||
return transformResult(priced);
|
||||
};
|
||||
|
||||
export const handler = async (event: any): Promise<any> => {
|
||||
const order = parseEvent(event);
|
||||
const result = await processOrder(order);
|
||||
await saveToDatabase(result);
|
||||
return formatResponse(result);
|
||||
};
|
||||
|
||||
// handler.test.ts
|
||||
test('processOrder calculates price correctly', () => {
|
||||
const order = { items: [{ price: 10, quantity: 2 }] };
|
||||
const result = processOrder(order);
|
||||
expect(result.total).toBe(20);
|
||||
});
|
||||
```
|
||||
|
||||
### Integration Testing
|
||||
|
||||
Test with actual AWS services:
|
||||
|
||||
```typescript
|
||||
// integration.test.ts
|
||||
import { LambdaClient, InvokeCommand } from '@aws-sdk/client-lambda';
|
||||
|
||||
test('Lambda processes order correctly', async () => {
|
||||
const lambda = new LambdaClient({});
|
||||
|
||||
const response = await lambda.send(new InvokeCommand({
|
||||
FunctionName: process.env.FUNCTION_NAME,
|
||||
Payload: JSON.stringify({ orderId: '123' }),
|
||||
}));
|
||||
|
||||
const result = JSON.parse(Buffer.from(response.Payload!).toString());
|
||||
expect(result.statusCode).toBe(200);
|
||||
});
|
||||
```
|
||||
|
||||
### Local Testing with SAM
|
||||
|
||||
```bash
|
||||
# Test API locally
|
||||
sam local start-api
|
||||
|
||||
# Invoke function locally
|
||||
sam local invoke MyFunction -e events/test-event.json
|
||||
|
||||
# Generate sample event
|
||||
sam local generate-event apigateway aws-proxy > event.json
|
||||
```
|
||||
|
||||
## Summary
|
||||
|
||||
- **Single Purpose**: One function, one responsibility
|
||||
- **Concurrent Design**: Think concurrency, not volume
|
||||
- **Stateless**: Use external storage for state
|
||||
- **State Machines**: Orchestrate with Step Functions
|
||||
- **Event-Driven**: Use events over direct calls
|
||||
- **Idempotent**: Handle failures and duplicates gracefully
|
||||
- **Observability**: Enable tracing and structured logging
|
||||
Reference in New Issue
Block a user