Initial commit
This commit is contained in:
12
.claude-plugin/plugin.json
Normal file
12
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"name": "serverless-eda",
|
||||||
|
"description": "AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging",
|
||||||
|
"version": "0.0.0-2025.11.28",
|
||||||
|
"author": {
|
||||||
|
"name": "Kane Zhu",
|
||||||
|
"email": "me@kane.mx"
|
||||||
|
},
|
||||||
|
"skills": [
|
||||||
|
"./skills/aws-serverless-eda"
|
||||||
|
]
|
||||||
|
}
|
||||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# serverless-eda
|
||||||
|
|
||||||
|
AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging
|
||||||
68
plugin.lock.json
Normal file
68
plugin.lock.json
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
{
|
||||||
|
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||||
|
"pluginId": "gh:zxkane/aws-skills:serverless-eda",
|
||||||
|
"normalized": {
|
||||||
|
"repo": null,
|
||||||
|
"ref": "refs/tags/v20251128.0",
|
||||||
|
"commit": "ff0ed7dd84ee38c5963e2be6ddfd74065c81521b",
|
||||||
|
"treeHash": "9375638efaf61fa4e3870fbb415c277069d5ddac44feeb5c77f78f30ed9c22f7",
|
||||||
|
"generatedAt": "2025-11-28T10:29:14.139105Z",
|
||||||
|
"toolVersion": "publish_plugins.py@0.2.0"
|
||||||
|
},
|
||||||
|
"origin": {
|
||||||
|
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||||
|
"branch": "master",
|
||||||
|
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||||
|
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||||
|
},
|
||||||
|
"manifest": {
|
||||||
|
"name": "serverless-eda",
|
||||||
|
"description": "AWS serverless and event-driven architecture best practices based on Well-Architected Framework with MCP servers for SAM, Lambda, Step Functions, and messaging"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"path": "README.md",
|
||||||
|
"sha256": "5a7d0d76f54cbae89f1e7147bd50b25ba333fb16f79afb0442f11d7673b1cf2a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": ".claude-plugin/plugin.json",
|
||||||
|
"sha256": "18793e20fa1c2d078881402c0942a85186cd8e8c0604ce11318466a35e2d5292"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/SKILL.md",
|
||||||
|
"sha256": "f008433c085a85dc0da9063a7e69545f91eebc1253a9018fe0950473b024a3cd"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/observability-best-practices.md",
|
||||||
|
"sha256": "531977c659a774fec3dabd4c369789d5979bceb83cd4656bfe818929569c9e7a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/deployment-best-practices.md",
|
||||||
|
"sha256": "f94b18c62732f950a29b3e4cd11134ba397c2cfce4b3bb777ee729c17c9f1268"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/security-best-practices.md",
|
||||||
|
"sha256": "511f6e0921f852947db893ca64c093e15f728f6b4e35c40423a50f1398278261"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/serverless-patterns.md",
|
||||||
|
"sha256": "8b7408d9c98f8224290093acb831468b9a01bd8d17436e301d4383c18c556f2c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/eda-patterns.md",
|
||||||
|
"sha256": "c3518448e773c0e93d1a1f518d3d8d67475995bf3211ae1cf57cac46447ea6e1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/aws-serverless-eda/references/performance-optimization.md",
|
||||||
|
"sha256": "5086493fbeb4c97c1bc891484d6deecbfbc6d02a9422f6854dd8c7274320bfa6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dirSha256": "9375638efaf61fa4e3870fbb415c277069d5ddac44feeb5c77f78f30ed9c22f7"
|
||||||
|
},
|
||||||
|
"security": {
|
||||||
|
"scannedAt": null,
|
||||||
|
"scannerVersion": null,
|
||||||
|
"flags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
747
skills/aws-serverless-eda/SKILL.md
Normal file
747
skills/aws-serverless-eda/SKILL.md
Normal file
@@ -0,0 +1,747 @@
|
|||||||
|
---
|
||||||
|
name: aws-serverless-eda
|
||||||
|
description: AWS serverless and event-driven architecture expert based on Well-Architected Framework. Use when building serverless APIs, Lambda functions, REST APIs, microservices, or async workflows. Covers Lambda with TypeScript/Python, API Gateway (REST/HTTP), DynamoDB, Step Functions, EventBridge, SQS, SNS, and serverless patterns. Essential when user mentions serverless, Lambda, API Gateway, event-driven, async processing, queues, pub/sub, or wants to build scalable serverless applications with AWS best practices.
|
||||||
|
---
|
||||||
|
|
||||||
|
# AWS Serverless & Event-Driven Architecture
|
||||||
|
|
||||||
|
This skill provides comprehensive guidance for building serverless applications and event-driven architectures on AWS based on Well-Architected Framework principles.
|
||||||
|
|
||||||
|
## Integrated MCP Servers
|
||||||
|
|
||||||
|
This skill includes 5 MCP servers for serverless development:
|
||||||
|
|
||||||
|
### AWS Documentation MCP Server
|
||||||
|
**When to use**: Always verify AWS service information before implementation
|
||||||
|
- Search AWS documentation for latest features and best practices
|
||||||
|
- Check regional availability of AWS services
|
||||||
|
- Verify service limits and quotas
|
||||||
|
- Confirm API specifications and parameters
|
||||||
|
- Access up-to-date AWS service information
|
||||||
|
|
||||||
|
### AWS Serverless MCP Server
|
||||||
|
**Purpose**: Complete serverless application lifecycle with SAM CLI
|
||||||
|
- Initialize new serverless applications
|
||||||
|
- Deploy serverless applications
|
||||||
|
- Test Lambda functions locally
|
||||||
|
- Generate SAM templates
|
||||||
|
- Manage serverless application lifecycle
|
||||||
|
|
||||||
|
### AWS Lambda Tool MCP Server
|
||||||
|
**Purpose**: Execute Lambda functions as tools
|
||||||
|
- Invoke Lambda functions directly
|
||||||
|
- Test Lambda integrations
|
||||||
|
- Execute workflows requiring private resource access
|
||||||
|
- Run Lambda-based automation
|
||||||
|
|
||||||
|
### AWS Step Functions MCP Server
|
||||||
|
**Purpose**: Execute complex workflows and orchestration
|
||||||
|
- Create and manage state machines
|
||||||
|
- Execute workflow orchestrations
|
||||||
|
- Handle distributed transactions
|
||||||
|
- Implement saga patterns
|
||||||
|
- Coordinate microservices
|
||||||
|
|
||||||
|
### Amazon SNS/SQS MCP Server
|
||||||
|
**Purpose**: Event-driven messaging and queue management
|
||||||
|
- Publish messages to SNS topics
|
||||||
|
- Send/receive messages from SQS queues
|
||||||
|
- Manage event-driven communication
|
||||||
|
- Implement pub/sub patterns
|
||||||
|
- Handle asynchronous processing
|
||||||
|
|
||||||
|
## When to Use This Skill
|
||||||
|
|
||||||
|
Use this skill when:
|
||||||
|
- Building serverless applications with Lambda
|
||||||
|
- Designing event-driven architectures
|
||||||
|
- Implementing microservices patterns
|
||||||
|
- Creating asynchronous processing workflows
|
||||||
|
- Orchestrating multi-service transactions
|
||||||
|
- Building real-time data processing pipelines
|
||||||
|
- Implementing saga patterns for distributed transactions
|
||||||
|
- Designing for scale and resilience
|
||||||
|
|
||||||
|
## AWS Well-Architected Serverless Design Principles
|
||||||
|
|
||||||
|
### 1. Speedy, Simple, Singular
|
||||||
|
|
||||||
|
**Functions should be concise and single-purpose**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Single purpose, focused function
|
||||||
|
export const processOrder = async (event: OrderEvent) => {
|
||||||
|
// Only handles order processing
|
||||||
|
const order = await validateOrder(event);
|
||||||
|
await saveOrder(order);
|
||||||
|
await publishOrderCreatedEvent(order);
|
||||||
|
return { statusCode: 200, body: JSON.stringify({ orderId: order.id }) };
|
||||||
|
};
|
||||||
|
|
||||||
|
// ❌ BAD - Function does too much
|
||||||
|
export const handleEverything = async (event: any) => {
|
||||||
|
// Handles orders, inventory, payments, shipping...
|
||||||
|
// Too many responsibilities
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Keep functions environmentally efficient and cost-aware**:
|
||||||
|
- Minimize cold start times
|
||||||
|
- Optimize memory allocation
|
||||||
|
- Use provisioned concurrency only when needed
|
||||||
|
- Leverage connection reuse
|
||||||
|
|
||||||
|
### 2. Think Concurrent Requests, Not Total Requests
|
||||||
|
|
||||||
|
**Design for concurrency, not volume**
|
||||||
|
|
||||||
|
Lambda scales horizontally - design considerations should focus on:
|
||||||
|
- Concurrent execution limits
|
||||||
|
- Downstream service throttling
|
||||||
|
- Shared resource contention
|
||||||
|
- Connection pool sizing
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Consider concurrent Lambda executions accessing DynamoDB
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {
|
||||||
|
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, // Auto-scales with load
|
||||||
|
});
|
||||||
|
|
||||||
|
// Or with provisioned capacity + auto-scaling
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {
|
||||||
|
billingMode: dynamodb.BillingMode.PROVISIONED,
|
||||||
|
readCapacity: 5,
|
||||||
|
writeCapacity: 5,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Enable auto-scaling for concurrent load
|
||||||
|
table.autoScaleReadCapacity({ minCapacity: 5, maxCapacity: 100 });
|
||||||
|
table.autoScaleWriteCapacity({ minCapacity: 5, maxCapacity: 100 });
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Share Nothing
|
||||||
|
|
||||||
|
**Function runtime environments are short-lived**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ❌ BAD - Relying on local file system
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
fs.writeFileSync('/tmp/data.json', JSON.stringify(data)); // Lost after execution
|
||||||
|
};
|
||||||
|
|
||||||
|
// ✅ GOOD - Use persistent storage
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
await s3.putObject({
|
||||||
|
Bucket: process.env.BUCKET_NAME,
|
||||||
|
Key: 'data.json',
|
||||||
|
Body: JSON.stringify(data),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**State management**:
|
||||||
|
- Use DynamoDB for persistent state
|
||||||
|
- Use Step Functions for workflow state
|
||||||
|
- Use ElastiCache for session state
|
||||||
|
- Use S3 for file storage
|
||||||
|
|
||||||
|
### 4. Assume No Hardware Affinity
|
||||||
|
|
||||||
|
**Applications must be hardware-agnostic**
|
||||||
|
|
||||||
|
Infrastructure can change without notice:
|
||||||
|
- Lambda functions can run on different hardware
|
||||||
|
- Container instances can be replaced
|
||||||
|
- No assumption about underlying infrastructure
|
||||||
|
|
||||||
|
**Design for portability**:
|
||||||
|
- Use environment variables for configuration
|
||||||
|
- Avoid hardware-specific optimizations
|
||||||
|
- Test across different environments
|
||||||
|
|
||||||
|
### 5. Orchestrate with State Machines, Not Function Chaining
|
||||||
|
|
||||||
|
**Use Step Functions for orchestration**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ❌ BAD - Lambda function chaining
|
||||||
|
export const handler1 = async (event: any) => {
|
||||||
|
const result = await processStep1(event);
|
||||||
|
await lambda.invoke({
|
||||||
|
FunctionName: 'handler2',
|
||||||
|
Payload: JSON.stringify(result),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// ✅ GOOD - Step Functions orchestration
|
||||||
|
const stateMachine = new stepfunctions.StateMachine(this, 'OrderWorkflow', {
|
||||||
|
definition: stepfunctions.Chain
|
||||||
|
.start(validateOrder)
|
||||||
|
.next(processPayment)
|
||||||
|
.next(shipOrder)
|
||||||
|
.next(sendConfirmation),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits of Step Functions**:
|
||||||
|
- Visual workflow representation
|
||||||
|
- Built-in error handling and retries
|
||||||
|
- Execution history and debugging
|
||||||
|
- Parallel and sequential execution
|
||||||
|
- Service integrations without code
|
||||||
|
|
||||||
|
### 6. Use Events to Trigger Transactions
|
||||||
|
|
||||||
|
**Event-driven over synchronous request/response**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Pattern: Event-driven processing
|
||||||
|
const bucket = new s3.Bucket(this, 'DataBucket');
|
||||||
|
|
||||||
|
bucket.addEventNotification(
|
||||||
|
s3.EventType.OBJECT_CREATED,
|
||||||
|
new s3n.LambdaDestination(processFunction),
|
||||||
|
{ prefix: 'uploads/' }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Pattern: EventBridge integration
|
||||||
|
const rule = new events.Rule(this, 'OrderRule', {
|
||||||
|
eventPattern: {
|
||||||
|
source: ['orders'],
|
||||||
|
detailType: ['OrderPlaced'],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
rule.addTarget(new targets.LambdaFunction(processOrderFunction));
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- Loose coupling between services
|
||||||
|
- Asynchronous processing
|
||||||
|
- Better fault tolerance
|
||||||
|
- Independent scaling
|
||||||
|
|
||||||
|
### 7. Design for Failures and Duplicates
|
||||||
|
|
||||||
|
**Operations must be idempotent**
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Idempotent operation
|
||||||
|
export const handler = async (event: SQSEvent) => {
|
||||||
|
for (const record of event.Records) {
|
||||||
|
const orderId = JSON.parse(record.body).orderId;
|
||||||
|
|
||||||
|
// Check if already processed (idempotency)
|
||||||
|
const existing = await dynamodb.getItem({
|
||||||
|
TableName: process.env.TABLE_NAME,
|
||||||
|
Key: { orderId },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existing.Item) {
|
||||||
|
console.log('Order already processed:', orderId);
|
||||||
|
continue; // Skip duplicate
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process order
|
||||||
|
await processOrder(orderId);
|
||||||
|
|
||||||
|
// Mark as processed
|
||||||
|
await dynamodb.putItem({
|
||||||
|
TableName: process.env.TABLE_NAME,
|
||||||
|
Item: { orderId, processedAt: Date.now() },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implement retry logic with exponential backoff**:
|
||||||
|
```typescript
|
||||||
|
async function withRetry<T>(fn: () => Promise<T>, maxRetries = 3): Promise<T> {
|
||||||
|
for (let i = 0; i < maxRetries; i++) {
|
||||||
|
try {
|
||||||
|
return await fn();
|
||||||
|
} catch (error) {
|
||||||
|
if (i === maxRetries - 1) throw error;
|
||||||
|
await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new Error('Max retries exceeded');
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Event-Driven Architecture Patterns
|
||||||
|
|
||||||
|
### Pattern 1: Event Router (EventBridge)
|
||||||
|
|
||||||
|
Use EventBridge for event routing and filtering:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create custom event bus
|
||||||
|
const eventBus = new events.EventBus(this, 'AppEventBus', {
|
||||||
|
eventBusName: 'application-events',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Define event schema
|
||||||
|
const schema = new events.Schema(this, 'OrderSchema', {
|
||||||
|
schemaName: 'OrderPlaced',
|
||||||
|
definition: events.SchemaDefinition.fromInline({
|
||||||
|
openapi: '3.0.0',
|
||||||
|
info: { version: '1.0.0', title: 'Order Events' },
|
||||||
|
paths: {},
|
||||||
|
components: {
|
||||||
|
schemas: {
|
||||||
|
OrderPlaced: {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
orderId: { type: 'string' },
|
||||||
|
customerId: { type: 'string' },
|
||||||
|
amount: { type: 'number' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create rules for different consumers
|
||||||
|
new events.Rule(this, 'ProcessOrderRule', {
|
||||||
|
eventBus,
|
||||||
|
eventPattern: {
|
||||||
|
source: ['orders'],
|
||||||
|
detailType: ['OrderPlaced'],
|
||||||
|
},
|
||||||
|
targets: [new targets.LambdaFunction(processOrderFunction)],
|
||||||
|
});
|
||||||
|
|
||||||
|
new events.Rule(this, 'NotifyCustomerRule', {
|
||||||
|
eventBus,
|
||||||
|
eventPattern: {
|
||||||
|
source: ['orders'],
|
||||||
|
detailType: ['OrderPlaced'],
|
||||||
|
},
|
||||||
|
targets: [new targets.LambdaFunction(notifyCustomerFunction)],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 2: Queue-Based Processing (SQS)
|
||||||
|
|
||||||
|
Use SQS for reliable asynchronous processing:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Standard queue for at-least-once delivery
|
||||||
|
const queue = new sqs.Queue(this, 'ProcessingQueue', {
|
||||||
|
visibilityTimeout: Duration.seconds(300),
|
||||||
|
retentionPeriod: Duration.days(14),
|
||||||
|
deadLetterQueue: {
|
||||||
|
queue: dlq,
|
||||||
|
maxReceiveCount: 3,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// FIFO queue for ordered processing
|
||||||
|
const fifoQueue = new sqs.Queue(this, 'OrderedQueue', {
|
||||||
|
fifo: true,
|
||||||
|
contentBasedDeduplication: true,
|
||||||
|
deduplicationScope: sqs.DeduplicationScope.MESSAGE_GROUP,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda consumer
|
||||||
|
new lambda.EventSourceMapping(this, 'QueueConsumer', {
|
||||||
|
target: processingFunction,
|
||||||
|
eventSourceArn: queue.queueArn,
|
||||||
|
batchSize: 10,
|
||||||
|
maxBatchingWindow: Duration.seconds(5),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 3: Pub/Sub (SNS + SQS Fan-Out)
|
||||||
|
|
||||||
|
Implement fan-out pattern for multiple consumers:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Create SNS topic
|
||||||
|
const topic = new sns.Topic(this, 'OrderTopic', {
|
||||||
|
displayName: 'Order Events',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Multiple SQS queues subscribe to topic
|
||||||
|
const inventoryQueue = new sqs.Queue(this, 'InventoryQueue');
|
||||||
|
const shippingQueue = new sqs.Queue(this, 'ShippingQueue');
|
||||||
|
const analyticsQueue = new sqs.Queue(this, 'AnalyticsQueue');
|
||||||
|
|
||||||
|
topic.addSubscription(new subscriptions.SqsSubscription(inventoryQueue));
|
||||||
|
topic.addSubscription(new subscriptions.SqsSubscription(shippingQueue));
|
||||||
|
topic.addSubscription(new subscriptions.SqsSubscription(analyticsQueue));
|
||||||
|
|
||||||
|
// Each queue has its own Lambda consumer
|
||||||
|
new lambda.EventSourceMapping(this, 'InventoryConsumer', {
|
||||||
|
target: inventoryFunction,
|
||||||
|
eventSourceArn: inventoryQueue.queueArn,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 4: Saga Pattern with Step Functions
|
||||||
|
|
||||||
|
Implement distributed transactions:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const reserveFlight = new tasks.LambdaInvoke(this, 'ReserveFlight', {
|
||||||
|
lambdaFunction: reserveFlightFunction,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
});
|
||||||
|
|
||||||
|
const reserveHotel = new tasks.LambdaInvoke(this, 'ReserveHotel', {
|
||||||
|
lambdaFunction: reserveHotelFunction,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
});
|
||||||
|
|
||||||
|
const processPayment = new tasks.LambdaInvoke(this, 'ProcessPayment', {
|
||||||
|
lambdaFunction: processPaymentFunction,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Compensating transactions
|
||||||
|
const cancelFlight = new tasks.LambdaInvoke(this, 'CancelFlight', {
|
||||||
|
lambdaFunction: cancelFlightFunction,
|
||||||
|
});
|
||||||
|
|
||||||
|
const cancelHotel = new tasks.LambdaInvoke(this, 'CancelHotel', {
|
||||||
|
lambdaFunction: cancelHotelFunction,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Define saga with compensation
|
||||||
|
const definition = reserveFlight
|
||||||
|
.next(reserveHotel)
|
||||||
|
.next(processPayment)
|
||||||
|
.addCatch(cancelHotel.next(cancelFlight), {
|
||||||
|
resultPath: '$.error',
|
||||||
|
});
|
||||||
|
|
||||||
|
new stepfunctions.StateMachine(this, 'BookingStateMachine', {
|
||||||
|
definition,
|
||||||
|
timeout: Duration.minutes(5),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 5: Event Sourcing
|
||||||
|
|
||||||
|
Store events as source of truth:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Event store with DynamoDB
|
||||||
|
const eventStore = new dynamodb.Table(this, 'EventStore', {
|
||||||
|
partitionKey: { name: 'aggregateId', type: dynamodb.AttributeType.STRING },
|
||||||
|
sortKey: { name: 'version', type: dynamodb.AttributeType.NUMBER },
|
||||||
|
stream: dynamodb.StreamViewType.NEW_IMAGE,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda function stores events
|
||||||
|
export const handleCommand = async (event: any) => {
|
||||||
|
const { aggregateId, eventType, eventData } = event;
|
||||||
|
|
||||||
|
// Get current version
|
||||||
|
const items = await dynamodb.query({
|
||||||
|
TableName: process.env.EVENT_STORE,
|
||||||
|
KeyConditionExpression: 'aggregateId = :id',
|
||||||
|
ExpressionAttributeValues: { ':id': aggregateId },
|
||||||
|
ScanIndexForward: false,
|
||||||
|
Limit: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const nextVersion = items.Items?.[0]?.version + 1 || 1;
|
||||||
|
|
||||||
|
// Append new event
|
||||||
|
await dynamodb.putItem({
|
||||||
|
TableName: process.env.EVENT_STORE,
|
||||||
|
Item: {
|
||||||
|
aggregateId,
|
||||||
|
version: nextVersion,
|
||||||
|
eventType,
|
||||||
|
eventData,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
// Projections read from event stream
|
||||||
|
eventStore.grantStreamRead(projectionFunction);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Serverless Architecture Patterns
|
||||||
|
|
||||||
|
### Pattern 1: API-Driven Microservices
|
||||||
|
|
||||||
|
REST APIs with Lambda backend:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
restApiName: 'microservices-api',
|
||||||
|
deployOptions: {
|
||||||
|
throttlingRateLimit: 1000,
|
||||||
|
throttlingBurstLimit: 2000,
|
||||||
|
tracingEnabled: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// User service
|
||||||
|
const users = api.root.addResource('users');
|
||||||
|
users.addMethod('GET', new apigateway.LambdaIntegration(getUsersFunction));
|
||||||
|
users.addMethod('POST', new apigateway.LambdaIntegration(createUserFunction));
|
||||||
|
|
||||||
|
// Order service
|
||||||
|
const orders = api.root.addResource('orders');
|
||||||
|
orders.addMethod('GET', new apigateway.LambdaIntegration(getOrdersFunction));
|
||||||
|
orders.addMethod('POST', new apigateway.LambdaIntegration(createOrderFunction));
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 2: Stream Processing
|
||||||
|
|
||||||
|
Real-time data processing with Kinesis:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const stream = new kinesis.Stream(this, 'DataStream', {
|
||||||
|
shardCount: 2,
|
||||||
|
retentionPeriod: Duration.days(7),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda processes stream records
|
||||||
|
new lambda.EventSourceMapping(this, 'StreamProcessor', {
|
||||||
|
target: processFunction,
|
||||||
|
eventSourceArn: stream.streamArn,
|
||||||
|
batchSize: 100,
|
||||||
|
maxBatchingWindow: Duration.seconds(5),
|
||||||
|
parallelizationFactor: 10,
|
||||||
|
startingPosition: lambda.StartingPosition.LATEST,
|
||||||
|
retryAttempts: 3,
|
||||||
|
bisectBatchOnError: true,
|
||||||
|
onFailure: new lambdaDestinations.SqsDestination(dlq),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 3: Async Task Processing
|
||||||
|
|
||||||
|
Background job processing:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// SQS queue for tasks
|
||||||
|
const taskQueue = new sqs.Queue(this, 'TaskQueue', {
|
||||||
|
visibilityTimeout: Duration.minutes(5),
|
||||||
|
receiveMessageWaitTime: Duration.seconds(20), // Long polling
|
||||||
|
deadLetterQueue: {
|
||||||
|
queue: dlq,
|
||||||
|
maxReceiveCount: 3,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda worker processes tasks
|
||||||
|
const worker = new lambda.Function(this, 'TaskWorker', {
|
||||||
|
// ... configuration
|
||||||
|
reservedConcurrentExecutions: 10, // Control concurrency
|
||||||
|
});
|
||||||
|
|
||||||
|
new lambda.EventSourceMapping(this, 'TaskConsumer', {
|
||||||
|
target: worker,
|
||||||
|
eventSourceArn: taskQueue.queueArn,
|
||||||
|
batchSize: 10,
|
||||||
|
reportBatchItemFailures: true, // Partial batch failure handling
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 4: Scheduled Jobs
|
||||||
|
|
||||||
|
Periodic processing with EventBridge:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Daily cleanup job
|
||||||
|
new events.Rule(this, 'DailyCleanup', {
|
||||||
|
schedule: events.Schedule.cron({ hour: '2', minute: '0' }),
|
||||||
|
targets: [new targets.LambdaFunction(cleanupFunction)],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process every 5 minutes
|
||||||
|
new events.Rule(this, 'FrequentProcessing', {
|
||||||
|
schedule: events.Schedule.rate(Duration.minutes(5)),
|
||||||
|
targets: [new targets.LambdaFunction(processFunction)],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern 5: Webhook Processing
|
||||||
|
|
||||||
|
Handle external webhooks:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// API Gateway endpoint for webhooks
|
||||||
|
const webhookApi = new apigateway.RestApi(this, 'WebhookApi', {
|
||||||
|
restApiName: 'webhooks',
|
||||||
|
});
|
||||||
|
|
||||||
|
const webhook = webhookApi.root.addResource('webhook');
|
||||||
|
webhook.addMethod('POST', new apigateway.LambdaIntegration(webhookFunction, {
|
||||||
|
proxy: true,
|
||||||
|
timeout: Duration.seconds(29), // API Gateway max
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Lambda handler validates and queues webhook
|
||||||
|
export const handler = async (event: APIGatewayProxyEvent) => {
|
||||||
|
// Validate webhook signature
|
||||||
|
const isValid = validateSignature(event.headers, event.body);
|
||||||
|
if (!isValid) {
|
||||||
|
return { statusCode: 401, body: 'Invalid signature' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Queue for async processing
|
||||||
|
await sqs.sendMessage({
|
||||||
|
QueueUrl: process.env.QUEUE_URL,
|
||||||
|
MessageBody: event.body,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Return immediately
|
||||||
|
return { statusCode: 202, body: 'Accepted' };
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
**Implement comprehensive error handling**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const handler = async (event: SQSEvent) => {
|
||||||
|
const failures: SQSBatchItemFailure[] = [];
|
||||||
|
|
||||||
|
for (const record of event.Records) {
|
||||||
|
try {
|
||||||
|
await processRecord(record);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to process record:', record.messageId, error);
|
||||||
|
failures.push({ itemIdentifier: record.messageId });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return partial batch failures for retry
|
||||||
|
return { batchItemFailures: failures };
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dead Letter Queues
|
||||||
|
|
||||||
|
**Always configure DLQs for error handling**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const dlq = new sqs.Queue(this, 'DLQ', {
|
||||||
|
retentionPeriod: Duration.days(14),
|
||||||
|
});
|
||||||
|
|
||||||
|
const queue = new sqs.Queue(this, 'Queue', {
|
||||||
|
deadLetterQueue: {
|
||||||
|
queue: dlq,
|
||||||
|
maxReceiveCount: 3,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Monitor DLQ depth
|
||||||
|
new cloudwatch.Alarm(this, 'DLQAlarm', {
|
||||||
|
metric: dlq.metricApproximateNumberOfMessagesVisible(),
|
||||||
|
threshold: 1,
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
alarmDescription: 'Messages in DLQ require attention',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Observability
|
||||||
|
|
||||||
|
**Enable tracing and monitoring**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
tracing: lambda.Tracing.ACTIVE, // X-Ray tracing
|
||||||
|
environment: {
|
||||||
|
POWERTOOLS_SERVICE_NAME: 'order-service',
|
||||||
|
POWERTOOLS_METRICS_NAMESPACE: 'MyApp',
|
||||||
|
LOG_LEVEL: 'INFO',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Using MCP Servers Effectively
|
||||||
|
|
||||||
|
### AWS Serverless MCP Usage
|
||||||
|
|
||||||
|
**Lifecycle management**:
|
||||||
|
- Initialize new serverless projects
|
||||||
|
- Generate SAM templates
|
||||||
|
- Deploy applications
|
||||||
|
- Test locally before deployment
|
||||||
|
|
||||||
|
### Lambda Tool MCP Usage
|
||||||
|
|
||||||
|
**Function execution**:
|
||||||
|
- Test Lambda functions directly
|
||||||
|
- Execute automation workflows
|
||||||
|
- Access private resources
|
||||||
|
- Validate integrations
|
||||||
|
|
||||||
|
### Step Functions MCP Usage
|
||||||
|
|
||||||
|
**Workflow orchestration**:
|
||||||
|
- Create state machines for complex workflows
|
||||||
|
- Execute distributed transactions
|
||||||
|
- Implement saga patterns
|
||||||
|
- Coordinate microservices
|
||||||
|
|
||||||
|
### SNS/SQS MCP Usage
|
||||||
|
|
||||||
|
**Messaging operations**:
|
||||||
|
- Test pub/sub patterns
|
||||||
|
- Send test messages to queues
|
||||||
|
- Validate event routing
|
||||||
|
- Debug message processing
|
||||||
|
|
||||||
|
## Additional Resources
|
||||||
|
|
||||||
|
This skill includes comprehensive reference documentation based on AWS best practices:
|
||||||
|
|
||||||
|
- **Serverless Patterns**: `references/serverless-patterns.md`
|
||||||
|
- Core serverless architectures and API patterns
|
||||||
|
- Data processing and integration patterns
|
||||||
|
- Orchestration with Step Functions
|
||||||
|
- Anti-patterns to avoid
|
||||||
|
|
||||||
|
- **Event-Driven Architecture Patterns**: `references/eda-patterns.md`
|
||||||
|
- Event routing and processing patterns
|
||||||
|
- Event sourcing and saga patterns
|
||||||
|
- Idempotency and error handling
|
||||||
|
- Message ordering and deduplication
|
||||||
|
|
||||||
|
- **Security Best Practices**: `references/security-best-practices.md`
|
||||||
|
- Shared responsibility model
|
||||||
|
- IAM least privilege patterns
|
||||||
|
- Data protection and encryption
|
||||||
|
- Network security with VPC
|
||||||
|
|
||||||
|
- **Observability Best Practices**: `references/observability-best-practices.md`
|
||||||
|
- Three pillars: metrics, logs, traces
|
||||||
|
- Structured logging with Lambda Powertools
|
||||||
|
- X-Ray distributed tracing
|
||||||
|
- CloudWatch alarms and dashboards
|
||||||
|
|
||||||
|
- **Performance Optimization**: `references/performance-optimization.md`
|
||||||
|
- Cold start optimization techniques
|
||||||
|
- Memory and CPU optimization
|
||||||
|
- Package size reduction
|
||||||
|
- Provisioned concurrency patterns
|
||||||
|
|
||||||
|
- **Deployment Best Practices**: `references/deployment-best-practices.md`
|
||||||
|
- CI/CD pipeline design
|
||||||
|
- Testing strategies (unit, integration, load)
|
||||||
|
- Deployment strategies (canary, blue/green)
|
||||||
|
- Rollback and safety mechanisms
|
||||||
|
|
||||||
|
**External Resources**:
|
||||||
|
- **AWS Well-Architected Serverless Lens**: https://docs.aws.amazon.com/wellarchitected/latest/serverless-applications-lens/
|
||||||
|
- **ServerlessLand.com**: Pre-built serverless patterns
|
||||||
|
- **AWS Serverless Workshops**: https://serverlessland.com/learn?type=Workshops
|
||||||
|
|
||||||
|
For detailed implementation patterns, anti-patterns, and code examples, refer to the comprehensive references in the skill directory.
|
||||||
@@ -0,0 +1,830 @@
|
|||||||
|
# Serverless Deployment Best Practices
|
||||||
|
|
||||||
|
Deployment best practices for serverless applications including CI/CD, testing, and deployment strategies.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Software Release Process](#software-release-process)
|
||||||
|
- [Infrastructure as Code](#infrastructure-as-code)
|
||||||
|
- [CI/CD Pipeline Design](#cicd-pipeline-design)
|
||||||
|
- [Testing Strategies](#testing-strategies)
|
||||||
|
- [Deployment Strategies](#deployment-strategies)
|
||||||
|
- [Rollback and Safety](#rollback-and-safety)
|
||||||
|
|
||||||
|
## Software Release Process
|
||||||
|
|
||||||
|
### Four Stages of Release
|
||||||
|
|
||||||
|
**1. Source Phase**:
|
||||||
|
- Developers commit code changes
|
||||||
|
- Code review (peer review)
|
||||||
|
- Version control (Git)
|
||||||
|
|
||||||
|
**2. Build Phase**:
|
||||||
|
- Compile code
|
||||||
|
- Run unit tests
|
||||||
|
- Style checking and linting
|
||||||
|
- Create deployment packages
|
||||||
|
- Build container images
|
||||||
|
|
||||||
|
**3. Test Phase**:
|
||||||
|
- Integration tests with other systems
|
||||||
|
- Load testing
|
||||||
|
- UI testing
|
||||||
|
- Security testing (penetration testing)
|
||||||
|
- Acceptance testing
|
||||||
|
|
||||||
|
**4. Production Phase**:
|
||||||
|
- Deploy to production environment
|
||||||
|
- Monitor for errors
|
||||||
|
- Validate deployment success
|
||||||
|
- Rollback if needed
|
||||||
|
|
||||||
|
### CI/CD Maturity Levels
|
||||||
|
|
||||||
|
**Continuous Integration (CI)**:
|
||||||
|
- Automated build on code commit
|
||||||
|
- Automated unit testing
|
||||||
|
- Manual deployment to test/production
|
||||||
|
|
||||||
|
**Continuous Delivery (CD)**:
|
||||||
|
- Automated deployment to test environments
|
||||||
|
- Manual approval for production
|
||||||
|
- Automated testing in non-prod
|
||||||
|
|
||||||
|
**Continuous Deployment**:
|
||||||
|
- Fully automated pipeline
|
||||||
|
- Automated deployment to production
|
||||||
|
- No manual intervention after code commit
|
||||||
|
|
||||||
|
## Infrastructure as Code
|
||||||
|
|
||||||
|
### Framework Selection
|
||||||
|
|
||||||
|
**AWS SAM (Serverless Application Model)**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# template.yaml
|
||||||
|
AWSTemplateFormatVersion: '2010-09-09'
|
||||||
|
Transform: AWS::Serverless-2016-10-31
|
||||||
|
|
||||||
|
Resources:
|
||||||
|
OrderFunction:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
Handler: app.handler
|
||||||
|
Runtime: nodejs20.x
|
||||||
|
CodeUri: src/
|
||||||
|
Events:
|
||||||
|
Api:
|
||||||
|
Type: Api
|
||||||
|
Properties:
|
||||||
|
Path: /orders
|
||||||
|
Method: post
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- Simple, serverless-focused syntax
|
||||||
|
- Built-in best practices
|
||||||
|
- SAM CLI for local testing
|
||||||
|
- Integrates with CodeDeploy
|
||||||
|
|
||||||
|
**AWS CDK**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'OrderFunction', {
|
||||||
|
entry: 'src/orders/handler.ts',
|
||||||
|
environment: {
|
||||||
|
TABLE_NAME: ordersTable.tableName,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
ordersTable.grantReadWriteData(orderFunction);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- Type-safe, programmatic
|
||||||
|
- Reusable constructs
|
||||||
|
- Rich AWS service support
|
||||||
|
- Better for complex infrastructure
|
||||||
|
|
||||||
|
**When to use**:
|
||||||
|
- **SAM**: Serverless-only applications, simpler projects
|
||||||
|
- **CDK**: Complex infrastructure, multiple services, reusable patterns
|
||||||
|
|
||||||
|
### Environment Management
|
||||||
|
|
||||||
|
**Separate environments**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// CDK App
|
||||||
|
const app = new cdk.App();
|
||||||
|
|
||||||
|
new ServerlessStack(app, 'DevStack', {
|
||||||
|
env: { account: '111111111111', region: 'us-east-1' },
|
||||||
|
environment: 'dev',
|
||||||
|
logLevel: 'DEBUG',
|
||||||
|
});
|
||||||
|
|
||||||
|
new ServerlessStack(app, 'ProdStack', {
|
||||||
|
env: { account: '222222222222', region: 'us-east-1' },
|
||||||
|
environment: 'prod',
|
||||||
|
logLevel: 'INFO',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**SAM with parameters**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Parameters:
|
||||||
|
Environment:
|
||||||
|
Type: String
|
||||||
|
Default: dev
|
||||||
|
AllowedValues:
|
||||||
|
- dev
|
||||||
|
- staging
|
||||||
|
- prod
|
||||||
|
|
||||||
|
Resources:
|
||||||
|
Function:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
Environment:
|
||||||
|
Variables:
|
||||||
|
ENVIRONMENT: !Ref Environment
|
||||||
|
LOG_LEVEL: !If [IsProd, INFO, DEBUG]
|
||||||
|
```
|
||||||
|
|
||||||
|
## CI/CD Pipeline Design
|
||||||
|
|
||||||
|
### AWS CodePipeline
|
||||||
|
|
||||||
|
**Comprehensive pipeline**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import * as codepipeline from 'aws-cdk-lib/aws-codepipeline';
|
||||||
|
import * as codepipeline_actions from 'aws-cdk-lib/aws-codepipeline-actions';
|
||||||
|
|
||||||
|
const sourceOutput = new codepipeline.Artifact();
|
||||||
|
const buildOutput = new codepipeline.Artifact();
|
||||||
|
|
||||||
|
const pipeline = new codepipeline.Pipeline(this, 'Pipeline', {
|
||||||
|
pipelineName: 'serverless-pipeline',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Source stage
|
||||||
|
pipeline.addStage({
|
||||||
|
stageName: 'Source',
|
||||||
|
actions: [
|
||||||
|
new codepipeline_actions.CodeStarConnectionsSourceAction({
|
||||||
|
actionName: 'GitHub_Source',
|
||||||
|
owner: 'myorg',
|
||||||
|
repo: 'myrepo',
|
||||||
|
branch: 'main',
|
||||||
|
output: sourceOutput,
|
||||||
|
connectionArn: githubConnection.connectionArn,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build stage
|
||||||
|
pipeline.addStage({
|
||||||
|
stageName: 'Build',
|
||||||
|
actions: [
|
||||||
|
new codepipeline_actions.CodeBuildAction({
|
||||||
|
actionName: 'Build',
|
||||||
|
project: buildProject,
|
||||||
|
input: sourceOutput,
|
||||||
|
outputs: [buildOutput],
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Test stage
|
||||||
|
pipeline.addStage({
|
||||||
|
stageName: 'Test',
|
||||||
|
actions: [
|
||||||
|
new codepipeline_actions.CloudFormationCreateUpdateStackAction({
|
||||||
|
actionName: 'Deploy_Test',
|
||||||
|
templatePath: buildOutput.atPath('packaged.yaml'),
|
||||||
|
stackName: 'test-stack',
|
||||||
|
adminPermissions: true,
|
||||||
|
}),
|
||||||
|
new codepipeline_actions.CodeBuildAction({
|
||||||
|
actionName: 'Integration_Tests',
|
||||||
|
project: testProject,
|
||||||
|
input: buildOutput,
|
||||||
|
runOrder: 2,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Production stage (with manual approval)
|
||||||
|
pipeline.addStage({
|
||||||
|
stageName: 'Production',
|
||||||
|
actions: [
|
||||||
|
new codepipeline_actions.ManualApprovalAction({
|
||||||
|
actionName: 'Approve',
|
||||||
|
}),
|
||||||
|
new codepipeline_actions.CloudFormationCreateUpdateStackAction({
|
||||||
|
actionName: 'Deploy_Prod',
|
||||||
|
templatePath: buildOutput.atPath('packaged.yaml'),
|
||||||
|
stackName: 'prod-stack',
|
||||||
|
adminPermissions: true,
|
||||||
|
runOrder: 2,
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### GitHub Actions
|
||||||
|
|
||||||
|
**Serverless deployment workflow**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# .github/workflows/deploy.yml
|
||||||
|
name: Deploy Serverless Application
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-deploy:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Setup Node.js
|
||||||
|
uses: actions/setup-node@v3
|
||||||
|
with:
|
||||||
|
node-version: '20'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: npm ci
|
||||||
|
|
||||||
|
- name: Run tests
|
||||||
|
run: npm test
|
||||||
|
|
||||||
|
- name: Setup SAM CLI
|
||||||
|
uses: aws-actions/setup-sam@v2
|
||||||
|
|
||||||
|
- name: Build SAM application
|
||||||
|
run: sam build
|
||||||
|
|
||||||
|
- name: Deploy to Dev
|
||||||
|
if: github.ref != 'refs/heads/main'
|
||||||
|
run: |
|
||||||
|
sam deploy \
|
||||||
|
--no-confirm-changeset \
|
||||||
|
--no-fail-on-empty-changeset \
|
||||||
|
--stack-name dev-stack \
|
||||||
|
--parameter-overrides Environment=dev
|
||||||
|
|
||||||
|
- name: Run integration tests
|
||||||
|
run: npm run test:integration
|
||||||
|
|
||||||
|
- name: Deploy to Prod
|
||||||
|
if: github.ref == 'refs/heads/main'
|
||||||
|
run: |
|
||||||
|
sam deploy \
|
||||||
|
--no-confirm-changeset \
|
||||||
|
--no-fail-on-empty-changeset \
|
||||||
|
--stack-name prod-stack \
|
||||||
|
--parameter-overrides Environment=prod
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Strategies
|
||||||
|
|
||||||
|
### Unit Testing
|
||||||
|
|
||||||
|
**Test business logic independently**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// handler.ts
|
||||||
|
export const processOrder = (order: Order): ProcessedOrder => {
|
||||||
|
// Pure business logic (easily testable)
|
||||||
|
validateOrder(order);
|
||||||
|
calculateTotal(order);
|
||||||
|
return transformOrder(order);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const order = parseEvent(event);
|
||||||
|
const processed = processOrder(order); // Testable function
|
||||||
|
await saveToDatabase(processed);
|
||||||
|
return formatResponse(processed);
|
||||||
|
};
|
||||||
|
|
||||||
|
// handler.test.ts
|
||||||
|
import { processOrder } from './handler';
|
||||||
|
|
||||||
|
describe('processOrder', () => {
|
||||||
|
it('calculates total correctly', () => {
|
||||||
|
const order = {
|
||||||
|
items: [
|
||||||
|
{ price: 10, quantity: 2 },
|
||||||
|
{ price: 5, quantity: 3 },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = processOrder(order);
|
||||||
|
|
||||||
|
expect(result.total).toBe(35);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('throws on invalid order', () => {
|
||||||
|
const invalid = { items: [] };
|
||||||
|
expect(() => processOrder(invalid)).toThrow();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Integration Testing
|
||||||
|
|
||||||
|
**Test in actual AWS environment**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// integration.test.ts
|
||||||
|
import { LambdaClient, InvokeCommand } from '@aws-sdk/client-lambda';
|
||||||
|
import { DynamoDBClient, GetItemCommand } from '@aws-sdk/client-dynamodb';
|
||||||
|
|
||||||
|
describe('Order Processing Integration', () => {
|
||||||
|
const lambda = new LambdaClient({});
|
||||||
|
const dynamodb = new DynamoDBClient({});
|
||||||
|
|
||||||
|
it('processes order end-to-end', async () => {
|
||||||
|
// Invoke Lambda
|
||||||
|
const response = await lambda.send(new InvokeCommand({
|
||||||
|
FunctionName: process.env.FUNCTION_NAME,
|
||||||
|
Payload: JSON.stringify({
|
||||||
|
orderId: 'test-123',
|
||||||
|
items: [{ productId: 'prod-1', quantity: 2 }],
|
||||||
|
}),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const result = JSON.parse(Buffer.from(response.Payload!).toString());
|
||||||
|
|
||||||
|
expect(result.statusCode).toBe(200);
|
||||||
|
|
||||||
|
// Verify database write
|
||||||
|
const dbResult = await dynamodb.send(new GetItemCommand({
|
||||||
|
TableName: process.env.TABLE_NAME,
|
||||||
|
Key: { orderId: { S: 'test-123' } },
|
||||||
|
}));
|
||||||
|
|
||||||
|
expect(dbResult.Item).toBeDefined();
|
||||||
|
expect(dbResult.Item?.status.S).toBe('PROCESSED');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Local Testing with SAM
|
||||||
|
|
||||||
|
**Test locally before deployment**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Start local API
|
||||||
|
sam local start-api
|
||||||
|
|
||||||
|
# Invoke function locally
|
||||||
|
sam local invoke OrderFunction -e events/create-order.json
|
||||||
|
|
||||||
|
# Generate sample events
|
||||||
|
sam local generate-event apigateway aws-proxy > event.json
|
||||||
|
|
||||||
|
# Debug locally
|
||||||
|
sam local invoke OrderFunction -d 5858
|
||||||
|
|
||||||
|
# Test with Docker
|
||||||
|
sam local start-api --docker-network my-network
|
||||||
|
```
|
||||||
|
|
||||||
|
### Load Testing
|
||||||
|
|
||||||
|
**Test under production load**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install Artillery
|
||||||
|
npm install -g artillery
|
||||||
|
|
||||||
|
# Create load test
|
||||||
|
cat > load-test.yml <<EOF
|
||||||
|
config:
|
||||||
|
target: https://api.example.com
|
||||||
|
phases:
|
||||||
|
- duration: 300 # 5 minutes
|
||||||
|
arrivalRate: 50 # 50 requests/second
|
||||||
|
rampTo: 200 # Ramp to 200 req/sec
|
||||||
|
scenarios:
|
||||||
|
- flow:
|
||||||
|
- post:
|
||||||
|
url: /orders
|
||||||
|
json:
|
||||||
|
orderId: "{{ $randomString() }}"
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Run load test
|
||||||
|
artillery run load-test.yml --output report.json
|
||||||
|
|
||||||
|
# Generate HTML report
|
||||||
|
artillery report report.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Deployment Strategies
|
||||||
|
|
||||||
|
### All-at-Once Deployment
|
||||||
|
|
||||||
|
**Simple, fast, risky**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# SAM template
|
||||||
|
Resources:
|
||||||
|
OrderFunction:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
DeploymentPreference:
|
||||||
|
Type: AllAtOnce # Deploy immediately
|
||||||
|
```
|
||||||
|
|
||||||
|
**Use for**:
|
||||||
|
- Development environments
|
||||||
|
- Non-critical applications
|
||||||
|
- Quick hotfixes (with caution)
|
||||||
|
|
||||||
|
### Blue/Green Deployment
|
||||||
|
|
||||||
|
**Zero-downtime deployment**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Resources:
|
||||||
|
OrderFunction:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
AutoPublishAlias: live
|
||||||
|
DeploymentPreference:
|
||||||
|
Type: Linear10PercentEvery1Minute
|
||||||
|
Alarms:
|
||||||
|
- !Ref ErrorAlarm
|
||||||
|
- !Ref LatencyAlarm
|
||||||
|
```
|
||||||
|
|
||||||
|
**Deployment types**:
|
||||||
|
- **Linear10PercentEvery1Minute**: 10% traffic shift every minute
|
||||||
|
- **Linear10PercentEvery2Minutes**: Slower, more conservative
|
||||||
|
- **Linear10PercentEvery3Minutes**: Even slower
|
||||||
|
- **Linear10PercentEvery10Minutes**: Very gradual
|
||||||
|
- **Canary10Percent5Minutes**: 10% for 5 min, then 100%
|
||||||
|
- **Canary10Percent10Minutes**: 10% for 10 min, then 100%
|
||||||
|
- **Canary10Percent30Minutes**: 10% for 30 min, then 100%
|
||||||
|
|
||||||
|
### Canary Deployment
|
||||||
|
|
||||||
|
**Test with subset of traffic**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
Resources:
|
||||||
|
OrderFunction:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
AutoPublishAlias: live
|
||||||
|
DeploymentPreference:
|
||||||
|
Type: Canary10Percent10Minutes
|
||||||
|
Alarms:
|
||||||
|
- !Ref ErrorAlarm
|
||||||
|
- !Ref LatencyAlarm
|
||||||
|
Hooks:
|
||||||
|
PreTraffic: !Ref PreTrafficHook
|
||||||
|
PostTraffic: !Ref PostTrafficHook
|
||||||
|
|
||||||
|
PreTrafficHook:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
Handler: hooks.pre_traffic
|
||||||
|
Runtime: python3.12
|
||||||
|
# Runs before traffic shift
|
||||||
|
# Validates new version
|
||||||
|
|
||||||
|
PostTrafficHook:
|
||||||
|
Type: AWS::Serverless::Function
|
||||||
|
Properties:
|
||||||
|
Handler: hooks.post_traffic
|
||||||
|
Runtime: python3.12
|
||||||
|
# Runs after traffic shift
|
||||||
|
# Validates deployment success
|
||||||
|
```
|
||||||
|
|
||||||
|
**CDK with CodeDeploy**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import * as codedeploy from 'aws-cdk-lib/aws-codedeploy';
|
||||||
|
|
||||||
|
const alias = fn.currentVersion.addAlias('live');
|
||||||
|
|
||||||
|
new codedeploy.LambdaDeploymentGroup(this, 'DeploymentGroup', {
|
||||||
|
alias,
|
||||||
|
deploymentConfig: codedeploy.LambdaDeploymentConfig.CANARY_10PERCENT_10MINUTES,
|
||||||
|
alarms: [errorAlarm, latencyAlarm],
|
||||||
|
autoRollback: {
|
||||||
|
failedDeployment: true,
|
||||||
|
stoppedDeployment: true,
|
||||||
|
deploymentInAlarm: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Deployment Hooks
|
||||||
|
|
||||||
|
**Pre-traffic hook (validation)**:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# hooks.py
|
||||||
|
import boto3
|
||||||
|
|
||||||
|
lambda_client = boto3.client('lambda')
|
||||||
|
codedeploy = boto3.client('codedeploy')
|
||||||
|
|
||||||
|
def pre_traffic(event, context):
|
||||||
|
"""
|
||||||
|
Validate new version before traffic shift
|
||||||
|
"""
|
||||||
|
function_name = event['DeploymentId']
|
||||||
|
version = event['NewVersion']
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Invoke new version with test payload
|
||||||
|
response = lambda_client.invoke(
|
||||||
|
FunctionName=f"{function_name}:{version}",
|
||||||
|
InvocationType='RequestResponse',
|
||||||
|
Payload=json.dumps({'test': True})
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate response
|
||||||
|
if response['StatusCode'] == 200:
|
||||||
|
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||||
|
deploymentId=event['DeploymentId'],
|
||||||
|
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||||
|
status='Succeeded'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception('Validation failed')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Pre-traffic validation failed: {e}')
|
||||||
|
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||||
|
deploymentId=event['DeploymentId'],
|
||||||
|
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||||
|
status='Failed'
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Post-traffic hook (verification)**:
|
||||||
|
|
||||||
|
```python
|
||||||
|
def post_traffic(event, context):
|
||||||
|
"""
|
||||||
|
Verify deployment success after traffic shift
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Check CloudWatch metrics
|
||||||
|
cloudwatch = boto3.client('cloudwatch')
|
||||||
|
|
||||||
|
metrics = cloudwatch.get_metric_statistics(
|
||||||
|
Namespace='AWS/Lambda',
|
||||||
|
MetricName='Errors',
|
||||||
|
Dimensions=[{'Name': 'FunctionName', 'Value': function_name}],
|
||||||
|
StartTime=deployment_start_time,
|
||||||
|
EndTime=datetime.utcnow(),
|
||||||
|
Period=300,
|
||||||
|
Statistics=['Sum']
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate no errors
|
||||||
|
total_errors = sum(point['Sum'] for point in metrics['Datapoints'])
|
||||||
|
|
||||||
|
if total_errors == 0:
|
||||||
|
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||||
|
deploymentId=event['DeploymentId'],
|
||||||
|
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||||
|
status='Succeeded'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise Exception(f'{total_errors} errors detected')
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f'Post-traffic verification failed: {e}')
|
||||||
|
codedeploy.put_lifecycle_event_hook_execution_status(
|
||||||
|
deploymentId=event['DeploymentId'],
|
||||||
|
lifecycleEventHookExecutionId=event['LifecycleEventHookExecutionId'],
|
||||||
|
status='Failed'
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Rollback and Safety
|
||||||
|
|
||||||
|
### Automatic Rollback
|
||||||
|
|
||||||
|
**Configure rollback triggers**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
DeploymentPreference:
|
||||||
|
Type: Canary10Percent10Minutes
|
||||||
|
Alarms:
|
||||||
|
- !Ref ErrorAlarm
|
||||||
|
- !Ref LatencyAlarm
|
||||||
|
# Automatically rolls back if alarms trigger
|
||||||
|
```
|
||||||
|
|
||||||
|
**Rollback scenarios**:
|
||||||
|
- CloudWatch alarm triggers during deployment
|
||||||
|
- Pre-traffic hook fails
|
||||||
|
- Post-traffic hook fails
|
||||||
|
- Deployment manually stopped
|
||||||
|
|
||||||
|
### CloudWatch Alarms for Deployment
|
||||||
|
|
||||||
|
**Critical alarms during deployment**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Error rate alarm
|
||||||
|
const errorAlarm = new cloudwatch.Alarm(this, 'ErrorAlarm', {
|
||||||
|
metric: fn.metricErrors({
|
||||||
|
statistic: 'Sum',
|
||||||
|
period: Duration.minutes(1),
|
||||||
|
}),
|
||||||
|
threshold: 5,
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
treatMissingData: cloudwatch.TreatMissingData.NOT_BREACHING,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Duration alarm (regression)
|
||||||
|
const durationAlarm = new cloudwatch.Alarm(this, 'DurationAlarm', {
|
||||||
|
metric: fn.metricDuration({
|
||||||
|
statistic: 'Average',
|
||||||
|
period: Duration.minutes(1),
|
||||||
|
}),
|
||||||
|
threshold: previousAvgDuration * 1.2, // 20% increase
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
comparisonOperator: cloudwatch.ComparisonOperator.GREATER_THAN_THRESHOLD,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Throttle alarm
|
||||||
|
const throttleAlarm = new cloudwatch.Alarm(this, 'ThrottleAlarm', {
|
||||||
|
metric: fn.metricThrottles({
|
||||||
|
statistic: 'Sum',
|
||||||
|
period: Duration.minutes(1),
|
||||||
|
}),
|
||||||
|
threshold: 1,
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Version Management
|
||||||
|
|
||||||
|
**Use Lambda versions and aliases**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const version = fn.currentVersion;
|
||||||
|
|
||||||
|
const prodAlias = version.addAlias('prod');
|
||||||
|
const devAlias = version.addAlias('dev');
|
||||||
|
|
||||||
|
// Gradual rollout with weighted aliases
|
||||||
|
new lambda.Alias(this, 'LiveAlias', {
|
||||||
|
aliasName: 'live',
|
||||||
|
version: newVersion,
|
||||||
|
additionalVersions: [
|
||||||
|
{ version: oldVersion, weight: 0.9 }, // 90% old
|
||||||
|
// 10% automatically goes to main version (new)
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Best Practices Checklist
|
||||||
|
|
||||||
|
### Pre-Deployment
|
||||||
|
|
||||||
|
- [ ] Code review completed
|
||||||
|
- [ ] Unit tests passing
|
||||||
|
- [ ] Integration tests passing
|
||||||
|
- [ ] Security scan completed
|
||||||
|
- [ ] Dependencies updated
|
||||||
|
- [ ] Infrastructure validated (CDK synth, SAM validate)
|
||||||
|
- [ ] Environment variables configured
|
||||||
|
|
||||||
|
### Deployment
|
||||||
|
|
||||||
|
- [ ] Use IaC (SAM, CDK, Terraform)
|
||||||
|
- [ ] Separate environments (dev, staging, prod)
|
||||||
|
- [ ] Automate deployments via CI/CD
|
||||||
|
- [ ] Use gradual deployment (canary or linear)
|
||||||
|
- [ ] Configure CloudWatch alarms
|
||||||
|
- [ ] Enable automatic rollback
|
||||||
|
- [ ] Use deployment hooks for validation
|
||||||
|
|
||||||
|
### Post-Deployment
|
||||||
|
|
||||||
|
- [ ] Monitor CloudWatch metrics
|
||||||
|
- [ ] Check CloudWatch Logs for errors
|
||||||
|
- [ ] Verify X-Ray traces
|
||||||
|
- [ ] Validate business metrics
|
||||||
|
- [ ] Check alarm status
|
||||||
|
- [ ] Review deployment logs
|
||||||
|
- [ ] Document any issues
|
||||||
|
|
||||||
|
### Rollback Preparation
|
||||||
|
|
||||||
|
- [ ] Keep previous version available
|
||||||
|
- [ ] Document rollback procedure
|
||||||
|
- [ ] Test rollback in non-prod
|
||||||
|
- [ ] Configure automatic rollback
|
||||||
|
- [ ] Monitor during rollback
|
||||||
|
- [ ] Communication plan for rollback
|
||||||
|
|
||||||
|
## Deployment Patterns
|
||||||
|
|
||||||
|
### Multi-Region Deployment
|
||||||
|
|
||||||
|
**Active-Passive**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Primary region
|
||||||
|
new ServerlessStack(app, 'PrimaryStack', {
|
||||||
|
env: { region: 'us-east-1' },
|
||||||
|
isPrimary: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Secondary region (standby)
|
||||||
|
new ServerlessStack(app, 'SecondaryStack', {
|
||||||
|
env: { region: 'us-west-2' },
|
||||||
|
isPrimary: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Route 53 health check and failover
|
||||||
|
const healthCheck = new route53.CfnHealthCheck(this, 'HealthCheck', {
|
||||||
|
type: 'HTTPS',
|
||||||
|
resourcePath: '/health',
|
||||||
|
fullyQualifiedDomainName: 'api.example.com',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Active-Active**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Deploy to multiple regions
|
||||||
|
const regions = ['us-east-1', 'us-west-2', 'eu-west-1'];
|
||||||
|
|
||||||
|
for (const region of regions) {
|
||||||
|
new ServerlessStack(app, `Stack-${region}`, {
|
||||||
|
env: { region },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Route 53 geolocation routing
|
||||||
|
new route53.ARecord(this, 'GeoRecord', {
|
||||||
|
zone: hostedZone,
|
||||||
|
recordName: 'api',
|
||||||
|
target: route53.RecordTarget.fromAlias(
|
||||||
|
new targets.ApiGatewayDomain(domain)
|
||||||
|
),
|
||||||
|
geoLocation: route53.GeoLocation.country('US'),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Feature Flags with AppConfig
|
||||||
|
|
||||||
|
**Safe feature rollout**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { AppConfigData } from '@aws-sdk/client-appconfigdata';
|
||||||
|
|
||||||
|
const appconfig = new AppConfigData({});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
// Fetch feature flags
|
||||||
|
const config = await appconfig.getLatestConfiguration({
|
||||||
|
ConfigurationToken: token,
|
||||||
|
});
|
||||||
|
|
||||||
|
const features = JSON.parse(config.Configuration.toString());
|
||||||
|
|
||||||
|
if (features.newFeatureEnabled) {
|
||||||
|
return newFeatureHandler(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
return legacyHandler(event);
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- **IaC**: Use SAM or CDK for all deployments
|
||||||
|
- **Environments**: Separate dev, staging, production
|
||||||
|
- **CI/CD**: Automate build, test, and deployment
|
||||||
|
- **Testing**: Unit, integration, and load testing
|
||||||
|
- **Gradual Deployment**: Use canary or linear for production
|
||||||
|
- **Alarms**: Configure and monitor during deployment
|
||||||
|
- **Rollback**: Enable automatic rollback on failures
|
||||||
|
- **Hooks**: Validate before and after traffic shifts
|
||||||
|
- **Versioning**: Use Lambda versions and aliases
|
||||||
|
- **Multi-Region**: Plan for disaster recovery
|
||||||
1002
skills/aws-serverless-eda/references/eda-patterns.md
Normal file
1002
skills/aws-serverless-eda/references/eda-patterns.md
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,770 @@
|
|||||||
|
# Serverless Observability Best Practices
|
||||||
|
|
||||||
|
Comprehensive observability patterns for serverless applications based on AWS best practices.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Three Pillars of Observability](#three-pillars-of-observability)
|
||||||
|
- [Metrics](#metrics)
|
||||||
|
- [Logging](#logging)
|
||||||
|
- [Tracing](#tracing)
|
||||||
|
- [Unified Observability](#unified-observability)
|
||||||
|
- [Alerting](#alerting)
|
||||||
|
|
||||||
|
## Three Pillars of Observability
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
**Numeric data measured at intervals (time series)**
|
||||||
|
- Request rate, error rate, duration
|
||||||
|
- CPU%, memory%, disk%
|
||||||
|
- Custom business metrics
|
||||||
|
- Service Level Indicators (SLIs)
|
||||||
|
|
||||||
|
### Logs
|
||||||
|
**Timestamped records of discrete events**
|
||||||
|
- Application events and errors
|
||||||
|
- State transformations
|
||||||
|
- Debugging information
|
||||||
|
- Audit trails
|
||||||
|
|
||||||
|
### Traces
|
||||||
|
**Single user's journey across services**
|
||||||
|
- Request flow through distributed system
|
||||||
|
- Service dependencies
|
||||||
|
- Latency breakdown
|
||||||
|
- Error propagation
|
||||||
|
|
||||||
|
## Metrics
|
||||||
|
|
||||||
|
### CloudWatch Metrics for Lambda
|
||||||
|
|
||||||
|
**Out-of-the-box metrics** (automatically available):
|
||||||
|
```
|
||||||
|
- Invocations
|
||||||
|
- Errors
|
||||||
|
- Throttles
|
||||||
|
- Duration
|
||||||
|
- ConcurrentExecutions
|
||||||
|
- IteratorAge (for streams)
|
||||||
|
```
|
||||||
|
|
||||||
|
**CDK Configuration**:
|
||||||
|
```typescript
|
||||||
|
const fn = new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Create alarms on metrics
|
||||||
|
new cloudwatch.Alarm(this, 'ErrorAlarm', {
|
||||||
|
metric: fn.metricErrors({
|
||||||
|
statistic: 'Sum',
|
||||||
|
period: Duration.minutes(5),
|
||||||
|
}),
|
||||||
|
threshold: 10,
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
new cloudwatch.Alarm(this, 'DurationAlarm', {
|
||||||
|
metric: fn.metricDuration({
|
||||||
|
statistic: 'p99',
|
||||||
|
period: Duration.minutes(5),
|
||||||
|
}),
|
||||||
|
threshold: 1000, // 1 second
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Custom Metrics
|
||||||
|
|
||||||
|
**Use CloudWatch Embedded Metric Format (EMF)**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
// Emit custom metrics
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
_aws: {
|
||||||
|
Timestamp: Date.now(),
|
||||||
|
CloudWatchMetrics: [{
|
||||||
|
Namespace: 'MyApp/Orders',
|
||||||
|
Dimensions: [['ServiceName', 'Operation']],
|
||||||
|
Metrics: [
|
||||||
|
{ Name: 'ProcessingTime', Unit: 'Milliseconds' },
|
||||||
|
{ Name: 'OrderValue', Unit: 'None' },
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
ServiceName: 'OrderService',
|
||||||
|
Operation: 'ProcessOrder',
|
||||||
|
ProcessingTime: Date.now() - startTime,
|
||||||
|
OrderValue: result.amount,
|
||||||
|
}));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
// Emit error metric
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
_aws: {
|
||||||
|
CloudWatchMetrics: [{
|
||||||
|
Namespace: 'MyApp/Orders',
|
||||||
|
Dimensions: [['ServiceName']],
|
||||||
|
Metrics: [{ Name: 'Errors', Unit: 'Count' }],
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
ServiceName: 'OrderService',
|
||||||
|
Errors: 1,
|
||||||
|
}));
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Using Lambda Powertools**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Metrics, MetricUnits } from '@aws-lambda-powertools/metrics';
|
||||||
|
|
||||||
|
const metrics = new Metrics({
|
||||||
|
namespace: 'MyApp',
|
||||||
|
serviceName: 'OrderService',
|
||||||
|
});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
metrics.addMetric('Invocation', MetricUnits.Count, 1);
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
metrics.addMetric('Success', MetricUnits.Count, 1);
|
||||||
|
metrics.addMetric('ProcessingTime', MetricUnits.Milliseconds, Date.now() - startTime);
|
||||||
|
metrics.addMetric('OrderValue', MetricUnits.None, result.amount);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
metrics.addMetric('Error', MetricUnits.Count, 1);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
metrics.publishStoredMetrics();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Logging
|
||||||
|
|
||||||
|
### Structured Logging
|
||||||
|
|
||||||
|
**Use JSON format for logs**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Structured JSON logging
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
level: 'INFO',
|
||||||
|
message: 'Processing order',
|
||||||
|
orderId: event.orderId,
|
||||||
|
customerId: event.customerId,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
requestId: context.requestId,
|
||||||
|
}));
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
level: 'INFO',
|
||||||
|
message: 'Order processed successfully',
|
||||||
|
orderId: event.orderId,
|
||||||
|
duration: Date.now() - startTime,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(JSON.stringify({
|
||||||
|
level: 'ERROR',
|
||||||
|
message: 'Order processing failed',
|
||||||
|
orderId: event.orderId,
|
||||||
|
error: {
|
||||||
|
name: error.name,
|
||||||
|
message: error.message,
|
||||||
|
stack: error.stack,
|
||||||
|
},
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ❌ BAD - Unstructured logging
|
||||||
|
console.log('Processing order ' + orderId + ' for customer ' + customerId);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Using Lambda Powertools Logger**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Logger } from '@aws-lambda-powertools/logger';
|
||||||
|
|
||||||
|
const logger = new Logger({
|
||||||
|
serviceName: 'OrderService',
|
||||||
|
logLevel: 'INFO',
|
||||||
|
});
|
||||||
|
|
||||||
|
export const handler = async (event: any, context: Context) => {
|
||||||
|
logger.addContext(context);
|
||||||
|
|
||||||
|
logger.info('Processing order', {
|
||||||
|
orderId: event.orderId,
|
||||||
|
customerId: event.customerId,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
logger.info('Order processed', {
|
||||||
|
orderId: event.orderId,
|
||||||
|
amount: result.amount,
|
||||||
|
});
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Order processing failed', {
|
||||||
|
orderId: event.orderId,
|
||||||
|
error,
|
||||||
|
});
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Log Levels
|
||||||
|
|
||||||
|
**Use appropriate log levels**:
|
||||||
|
- **ERROR**: Errors requiring immediate attention
|
||||||
|
- **WARN**: Warnings or recoverable errors
|
||||||
|
- **INFO**: Important business events
|
||||||
|
- **DEBUG**: Detailed debugging information (disable in production)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const logger = new Logger({
|
||||||
|
serviceName: 'OrderService',
|
||||||
|
logLevel: process.env.LOG_LEVEL || 'INFO',
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug('Detailed processing info', { data });
|
||||||
|
logger.info('Business event occurred', { event });
|
||||||
|
logger.warn('Recoverable error', { error });
|
||||||
|
logger.error('Critical failure', { error });
|
||||||
|
```
|
||||||
|
|
||||||
|
### Log Insights Queries
|
||||||
|
|
||||||
|
**Common CloudWatch Logs Insights queries**:
|
||||||
|
|
||||||
|
```
|
||||||
|
# Find errors in last hour
|
||||||
|
fields @timestamp, @message, level, error.message
|
||||||
|
| filter level = "ERROR"
|
||||||
|
| sort @timestamp desc
|
||||||
|
| limit 100
|
||||||
|
|
||||||
|
# Count errors by type
|
||||||
|
stats count() by error.name as ErrorType
|
||||||
|
| sort count desc
|
||||||
|
|
||||||
|
# Calculate p99 latency
|
||||||
|
stats percentile(duration, 99) by serviceName
|
||||||
|
|
||||||
|
# Find slow requests
|
||||||
|
fields @timestamp, orderId, duration
|
||||||
|
| filter duration > 1000
|
||||||
|
| sort duration desc
|
||||||
|
| limit 50
|
||||||
|
|
||||||
|
# Track specific customer requests
|
||||||
|
fields @timestamp, @message, orderId
|
||||||
|
| filter customerId = "customer-123"
|
||||||
|
| sort @timestamp desc
|
||||||
|
```
|
||||||
|
|
||||||
|
## Tracing
|
||||||
|
|
||||||
|
### Enable X-Ray Tracing
|
||||||
|
|
||||||
|
**Configure X-Ray for Lambda**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const fn = new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
tracing: lambda.Tracing.ACTIVE, // Enable X-Ray
|
||||||
|
});
|
||||||
|
|
||||||
|
// API Gateway tracing
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
deployOptions: {
|
||||||
|
tracingEnabled: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step Functions tracing
|
||||||
|
new stepfunctions.StateMachine(this, 'StateMachine', {
|
||||||
|
definition,
|
||||||
|
tracingEnabled: true,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Instrument application code**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { captureAWSv3Client } from 'aws-xray-sdk-core';
|
||||||
|
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||||
|
|
||||||
|
// Wrap AWS SDK clients
|
||||||
|
const client = captureAWSv3Client(new DynamoDBClient({}));
|
||||||
|
|
||||||
|
// Custom segments
|
||||||
|
import AWSXRay from 'aws-xray-sdk-core';
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const segment = AWSXRay.getSegment();
|
||||||
|
|
||||||
|
// Custom subsegment
|
||||||
|
const subsegment = segment.addNewSubsegment('ProcessOrder');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Add annotations (indexed for filtering)
|
||||||
|
subsegment.addAnnotation('orderId', event.orderId);
|
||||||
|
subsegment.addAnnotation('customerId', event.customerId);
|
||||||
|
|
||||||
|
// Add metadata (not indexed, detailed info)
|
||||||
|
subsegment.addMetadata('orderDetails', event);
|
||||||
|
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
subsegment.addAnnotation('status', 'success');
|
||||||
|
subsegment.close();
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
subsegment.addError(error);
|
||||||
|
subsegment.close();
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Using Lambda Powertools Tracer**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Tracer } from '@aws-lambda-powertools/tracer';
|
||||||
|
|
||||||
|
const tracer = new Tracer({ serviceName: 'OrderService' });
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const segment = tracer.getSegment();
|
||||||
|
|
||||||
|
// Automatically captures and traces
|
||||||
|
const result = await tracer.captureAWSv3Client(dynamodb).getItem({
|
||||||
|
TableName: process.env.TABLE_NAME,
|
||||||
|
Key: { orderId: event.orderId },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Custom annotation
|
||||||
|
tracer.putAnnotation('orderId', event.orderId);
|
||||||
|
tracer.putMetadata('orderDetails', event);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Service Map
|
||||||
|
|
||||||
|
**Visualize service dependencies** with X-Ray:
|
||||||
|
- Shows service-to-service communication
|
||||||
|
- Identifies latency bottlenecks
|
||||||
|
- Highlights error rates between services
|
||||||
|
- Tracks downstream dependencies
|
||||||
|
|
||||||
|
### Distributed Tracing Best Practices
|
||||||
|
|
||||||
|
1. **Enable tracing everywhere**: Lambda, API Gateway, Step Functions
|
||||||
|
2. **Use annotations for filtering**: Indexed fields for queries
|
||||||
|
3. **Use metadata for details**: Non-indexed detailed information
|
||||||
|
4. **Sample appropriately**: 100% for low traffic, sampled for high traffic
|
||||||
|
5. **Correlate with logs**: Include trace ID in log entries
|
||||||
|
|
||||||
|
## Unified Observability
|
||||||
|
|
||||||
|
### Correlation Between Pillars
|
||||||
|
|
||||||
|
**Include trace ID in logs**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export const handler = async (event: any, context: Context) => {
|
||||||
|
const traceId = process.env._X_AMZN_TRACE_ID;
|
||||||
|
|
||||||
|
console.log(JSON.stringify({
|
||||||
|
level: 'INFO',
|
||||||
|
message: 'Processing order',
|
||||||
|
traceId,
|
||||||
|
requestId: context.requestId,
|
||||||
|
orderId: event.orderId,
|
||||||
|
}));
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### CloudWatch ServiceLens
|
||||||
|
|
||||||
|
**Unified view of traces and metrics**:
|
||||||
|
- Automatically correlates X-Ray traces with CloudWatch metrics
|
||||||
|
- Shows service map with metrics overlay
|
||||||
|
- Identifies performance and availability issues
|
||||||
|
- Provides end-to-end request view
|
||||||
|
|
||||||
|
### Lambda Powertools Integration
|
||||||
|
|
||||||
|
**All three pillars in one**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Logger } from '@aws-lambda-powertools/logger';
|
||||||
|
import { Tracer } from '@aws-lambda-powertools/tracer';
|
||||||
|
import { Metrics, MetricUnits } from '@aws-lambda-powertools/metrics';
|
||||||
|
|
||||||
|
const logger = new Logger({ serviceName: 'OrderService' });
|
||||||
|
const tracer = new Tracer({ serviceName: 'OrderService' });
|
||||||
|
const metrics = new Metrics({ namespace: 'MyApp', serviceName: 'OrderService' });
|
||||||
|
|
||||||
|
export const handler = async (event: any, context: Context) => {
|
||||||
|
// Automatically adds trace context to logs
|
||||||
|
logger.addContext(context);
|
||||||
|
|
||||||
|
logger.info('Processing order', { orderId: event.orderId });
|
||||||
|
|
||||||
|
// Add trace annotations
|
||||||
|
tracer.putAnnotation('orderId', event.orderId);
|
||||||
|
|
||||||
|
// Add metrics
|
||||||
|
metrics.addMetric('Invocation', MetricUnits.Count, 1);
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await processOrder(event);
|
||||||
|
|
||||||
|
metrics.addMetric('Success', MetricUnits.Count, 1);
|
||||||
|
metrics.addMetric('Duration', MetricUnits.Milliseconds, Date.now() - startTime);
|
||||||
|
|
||||||
|
logger.info('Order processed', { orderId: event.orderId });
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
metrics.addMetric('Error', MetricUnits.Count, 1);
|
||||||
|
logger.error('Processing failed', { orderId: event.orderId, error });
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
metrics.publishStoredMetrics();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Alerting
|
||||||
|
|
||||||
|
### Effective Alerting Strategy
|
||||||
|
|
||||||
|
**Alert on what matters**:
|
||||||
|
- **Critical**: Customer-impacting issues (errors, high latency)
|
||||||
|
- **Warning**: Approaching thresholds (80% capacity)
|
||||||
|
- **Info**: Trends and anomalies (cost spikes)
|
||||||
|
|
||||||
|
**Alarm fatigue prevention**:
|
||||||
|
- Tune thresholds based on actual patterns
|
||||||
|
- Use composite alarms to reduce noise
|
||||||
|
- Set appropriate evaluation periods
|
||||||
|
- Include clear remediation steps
|
||||||
|
|
||||||
|
### CloudWatch Alarms
|
||||||
|
|
||||||
|
**Common alarm patterns**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Error rate alarm
|
||||||
|
new cloudwatch.Alarm(this, 'ErrorRateAlarm', {
|
||||||
|
metric: new cloudwatch.MathExpression({
|
||||||
|
expression: 'errors / invocations * 100',
|
||||||
|
usingMetrics: {
|
||||||
|
errors: fn.metricErrors({ statistic: 'Sum' }),
|
||||||
|
invocations: fn.metricInvocations({ statistic: 'Sum' }),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
threshold: 1, // 1% error rate
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
alarmDescription: 'Error rate exceeded 1%',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Latency alarm (p99)
|
||||||
|
new cloudwatch.Alarm(this, 'LatencyAlarm', {
|
||||||
|
metric: fn.metricDuration({
|
||||||
|
statistic: 'p99',
|
||||||
|
period: Duration.minutes(5),
|
||||||
|
}),
|
||||||
|
threshold: 1000, // 1 second
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
alarmDescription: 'p99 latency exceeded 1 second',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Concurrent executions approaching limit
|
||||||
|
new cloudwatch.Alarm(this, 'ConcurrencyAlarm', {
|
||||||
|
metric: fn.metricConcurrentExecutions({
|
||||||
|
statistic: 'Maximum',
|
||||||
|
}),
|
||||||
|
threshold: 800, // 80% of 1000 default limit
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
alarmDescription: 'Approaching concurrency limit',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Composite Alarms
|
||||||
|
|
||||||
|
**Reduce alert noise**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const errorAlarm = new cloudwatch.Alarm(this, 'Errors', {
|
||||||
|
metric: fn.metricErrors(),
|
||||||
|
threshold: 10,
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const throttleAlarm = new cloudwatch.Alarm(this, 'Throttles', {
|
||||||
|
metric: fn.metricThrottles(),
|
||||||
|
threshold: 5,
|
||||||
|
evaluationPeriods: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
const latencyAlarm = new cloudwatch.Alarm(this, 'Latency', {
|
||||||
|
metric: fn.metricDuration({ statistic: 'p99' }),
|
||||||
|
threshold: 2000,
|
||||||
|
evaluationPeriods: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Composite alarm (any of the above)
|
||||||
|
new cloudwatch.CompositeAlarm(this, 'ServiceHealthAlarm', {
|
||||||
|
compositeAlarmName: 'order-service-health',
|
||||||
|
alarmRule: cloudwatch.AlarmRule.anyOf(
|
||||||
|
errorAlarm,
|
||||||
|
throttleAlarm,
|
||||||
|
latencyAlarm
|
||||||
|
),
|
||||||
|
alarmDescription: 'Overall service health degraded',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Dashboard Best Practices
|
||||||
|
|
||||||
|
### Service Dashboard Layout
|
||||||
|
|
||||||
|
**Recommended sections**:
|
||||||
|
|
||||||
|
1. **Overview**:
|
||||||
|
- Total invocations
|
||||||
|
- Error rate percentage
|
||||||
|
- P50, P95, P99 latency
|
||||||
|
- Availability percentage
|
||||||
|
|
||||||
|
2. **Resource Utilization**:
|
||||||
|
- Concurrent executions
|
||||||
|
- Memory utilization
|
||||||
|
- Duration distribution
|
||||||
|
- Throttles
|
||||||
|
|
||||||
|
3. **Business Metrics**:
|
||||||
|
- Orders processed
|
||||||
|
- Revenue per minute
|
||||||
|
- Customer activity
|
||||||
|
- Feature usage
|
||||||
|
|
||||||
|
4. **Errors and Alerts**:
|
||||||
|
- Error count by type
|
||||||
|
- Active alarms
|
||||||
|
- DLQ message count
|
||||||
|
- Failed transactions
|
||||||
|
|
||||||
|
### CloudWatch Dashboard CDK
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const dashboard = new cloudwatch.Dashboard(this, 'ServiceDashboard', {
|
||||||
|
dashboardName: 'order-service',
|
||||||
|
});
|
||||||
|
|
||||||
|
dashboard.addWidgets(
|
||||||
|
// Row 1: Overview
|
||||||
|
new cloudwatch.GraphWidget({
|
||||||
|
title: 'Invocations',
|
||||||
|
left: [fn.metricInvocations()],
|
||||||
|
}),
|
||||||
|
new cloudwatch.SingleValueWidget({
|
||||||
|
title: 'Error Rate',
|
||||||
|
metrics: [
|
||||||
|
new cloudwatch.MathExpression({
|
||||||
|
expression: 'errors / invocations * 100',
|
||||||
|
usingMetrics: {
|
||||||
|
errors: fn.metricErrors({ statistic: 'Sum' }),
|
||||||
|
invocations: fn.metricInvocations({ statistic: 'Sum' }),
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
new cloudwatch.GraphWidget({
|
||||||
|
title: 'Latency (p50, p95, p99)',
|
||||||
|
left: [
|
||||||
|
fn.metricDuration({ statistic: 'p50', label: 'p50' }),
|
||||||
|
fn.metricDuration({ statistic: 'p95', label: 'p95' }),
|
||||||
|
fn.metricDuration({ statistic: 'p99', label: 'p99' }),
|
||||||
|
],
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
// Row 2: Errors
|
||||||
|
dashboard.addWidgets(
|
||||||
|
new cloudwatch.LogQueryWidget({
|
||||||
|
title: 'Recent Errors',
|
||||||
|
logGroupNames: [fn.logGroup.logGroupName],
|
||||||
|
queryLines: [
|
||||||
|
'fields @timestamp, @message',
|
||||||
|
'filter level = "ERROR"',
|
||||||
|
'sort @timestamp desc',
|
||||||
|
'limit 20',
|
||||||
|
],
|
||||||
|
})
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring Serverless Architectures
|
||||||
|
|
||||||
|
### End-to-End Monitoring
|
||||||
|
|
||||||
|
**Monitor the entire flow**:
|
||||||
|
|
||||||
|
```
|
||||||
|
API Gateway → Lambda → DynamoDB → EventBridge → Lambda
|
||||||
|
↓ ↓ ↓ ↓ ↓
|
||||||
|
Metrics Traces Metrics Metrics Logs
|
||||||
|
```
|
||||||
|
|
||||||
|
**Key metrics per service**:
|
||||||
|
|
||||||
|
| Service | Key Metrics |
|
||||||
|
|---------|-------------|
|
||||||
|
| API Gateway | Count, 4XXError, 5XXError, Latency, CacheHitCount |
|
||||||
|
| Lambda | Invocations, Errors, Duration, Throttles, ConcurrentExecutions |
|
||||||
|
| DynamoDB | ConsumedReadCapacity, ConsumedWriteCapacity, UserErrors, SystemErrors |
|
||||||
|
| SQS | NumberOfMessagesSent, NumberOfMessagesReceived, ApproximateAgeOfOldestMessage |
|
||||||
|
| EventBridge | Invocations, FailedInvocations, TriggeredRules |
|
||||||
|
| Step Functions | ExecutionsStarted, ExecutionsFailed, ExecutionTime |
|
||||||
|
|
||||||
|
### Synthetic Monitoring
|
||||||
|
|
||||||
|
**Use CloudWatch Synthetics for API monitoring**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { Canary, Test, Code, Schedule } from '@aws-cdk/aws-synthetics-alpha';
|
||||||
|
|
||||||
|
new Canary(this, 'ApiCanary', {
|
||||||
|
canaryName: 'api-health-check',
|
||||||
|
schedule: Schedule.rate(Duration.minutes(5)),
|
||||||
|
test: Test.custom({
|
||||||
|
code: Code.fromInline(`
|
||||||
|
const synthetics = require('Synthetics');
|
||||||
|
|
||||||
|
const apiCanaryBlueprint = async function () {
|
||||||
|
const response = await synthetics.executeHttpStep('Verify API', {
|
||||||
|
url: 'https://api.example.com/health',
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
|
||||||
|
return response.statusCode === 200 ? 'success' : 'failure';
|
||||||
|
};
|
||||||
|
|
||||||
|
exports.handler = async () => {
|
||||||
|
return await apiCanaryBlueprint();
|
||||||
|
};
|
||||||
|
`),
|
||||||
|
handler: 'index.handler',
|
||||||
|
}),
|
||||||
|
runtime: synthetics.Runtime.SYNTHETICS_NODEJS_PUPPETEER_6_2,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## OpenTelemetry Integration
|
||||||
|
|
||||||
|
### Amazon Distro for OpenTelemetry (ADOT)
|
||||||
|
|
||||||
|
**Use ADOT for vendor-neutral observability**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Lambda Layer with ADOT
|
||||||
|
const adotLayer = lambda.LayerVersion.fromLayerVersionArn(
|
||||||
|
this,
|
||||||
|
'AdotLayer',
|
||||||
|
`arn:aws:lambda:${this.region}:901920570463:layer:aws-otel-nodejs-amd64-ver-1-18-1:4`
|
||||||
|
);
|
||||||
|
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
layers: [adotLayer],
|
||||||
|
tracing: lambda.Tracing.ACTIVE,
|
||||||
|
environment: {
|
||||||
|
AWS_LAMBDA_EXEC_WRAPPER: '/opt/otel-handler',
|
||||||
|
OPENTELEMETRY_COLLECTOR_CONFIG_FILE: '/var/task/collector.yaml',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits of ADOT**:
|
||||||
|
- Vendor-neutral (works with Datadog, New Relic, Honeycomb, etc.)
|
||||||
|
- Automatic instrumentation
|
||||||
|
- Consistent format across services
|
||||||
|
- Export to multiple backends
|
||||||
|
|
||||||
|
## Best Practices Summary
|
||||||
|
|
||||||
|
### Metrics
|
||||||
|
- ✅ Use CloudWatch Embedded Metric Format (EMF)
|
||||||
|
- ✅ Track business metrics, not just technical metrics
|
||||||
|
- ✅ Set alarms on error rate, latency, and throughput
|
||||||
|
- ✅ Use p99 for latency, not average
|
||||||
|
- ✅ Create dashboards for key services
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
- ✅ Use structured JSON logging
|
||||||
|
- ✅ Include correlation IDs (request ID, trace ID)
|
||||||
|
- ✅ Use appropriate log levels
|
||||||
|
- ✅ Never log sensitive data (PII, secrets)
|
||||||
|
- ✅ Use CloudWatch Logs Insights for analysis
|
||||||
|
|
||||||
|
### Tracing
|
||||||
|
- ✅ Enable X-Ray tracing on all services
|
||||||
|
- ✅ Instrument AWS SDK calls
|
||||||
|
- ✅ Add custom annotations for business context
|
||||||
|
- ✅ Use service map to understand dependencies
|
||||||
|
- ✅ Correlate traces with logs and metrics
|
||||||
|
|
||||||
|
### Alerting
|
||||||
|
- ✅ Alert on customer-impacting issues
|
||||||
|
- ✅ Tune thresholds to reduce false positives
|
||||||
|
- ✅ Use composite alarms to reduce noise
|
||||||
|
- ✅ Include clear remediation steps
|
||||||
|
- ✅ Escalate critical alarms appropriately
|
||||||
|
|
||||||
|
### Tools
|
||||||
|
- ✅ Use Lambda Powertools for unified observability
|
||||||
|
- ✅ Use CloudWatch ServiceLens for service view
|
||||||
|
- ✅ Use Synthetics for proactive monitoring
|
||||||
|
- ✅ Consider ADOT for vendor-neutral observability
|
||||||
671
skills/aws-serverless-eda/references/performance-optimization.md
Normal file
671
skills/aws-serverless-eda/references/performance-optimization.md
Normal file
@@ -0,0 +1,671 @@
|
|||||||
|
# Serverless Performance Optimization
|
||||||
|
|
||||||
|
Performance optimization best practices for AWS Lambda and serverless architectures.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Lambda Execution Lifecycle](#lambda-execution-lifecycle)
|
||||||
|
- [Cold Start Optimization](#cold-start-optimization)
|
||||||
|
- [Memory and CPU Optimization](#memory-and-cpu-optimization)
|
||||||
|
- [Package Size Optimization](#package-size-optimization)
|
||||||
|
- [Initialization Optimization](#initialization-optimization)
|
||||||
|
- [Runtime Performance](#runtime-performance)
|
||||||
|
|
||||||
|
## Lambda Execution Lifecycle
|
||||||
|
|
||||||
|
### Execution Environment Phases
|
||||||
|
|
||||||
|
**Three phases of Lambda execution**:
|
||||||
|
|
||||||
|
1. **Init Phase** (Cold Start):
|
||||||
|
- Download and unpack function package
|
||||||
|
- Create execution environment
|
||||||
|
- Initialize runtime
|
||||||
|
- Execute initialization code (outside handler)
|
||||||
|
|
||||||
|
2. **Invoke Phase**:
|
||||||
|
- Execute handler code
|
||||||
|
- Return response
|
||||||
|
- Freeze execution environment
|
||||||
|
|
||||||
|
3. **Shutdown Phase**:
|
||||||
|
- Runtime shutdown (after period of inactivity)
|
||||||
|
- Execution environment destroyed
|
||||||
|
|
||||||
|
### Concurrency and Scaling
|
||||||
|
|
||||||
|
**Key concepts**:
|
||||||
|
- **Concurrency**: Number of execution environments serving requests simultaneously
|
||||||
|
- **One event per environment**: Each environment processes one event at a time
|
||||||
|
- **Automatic scaling**: Lambda creates new environments as needed
|
||||||
|
- **Environment reuse**: Warm starts reuse existing environments
|
||||||
|
|
||||||
|
**Example**:
|
||||||
|
- Function takes 100ms to execute
|
||||||
|
- Single environment can handle 10 requests/second
|
||||||
|
- 100 concurrent requests = 10 environments needed
|
||||||
|
- Default account limit: 1,000 concurrent executions (can be raised)
|
||||||
|
|
||||||
|
## Cold Start Optimization
|
||||||
|
|
||||||
|
### Understanding Cold Starts
|
||||||
|
|
||||||
|
**Cold start components**:
|
||||||
|
```
|
||||||
|
Total Cold Start = Download Package + Init Environment + Init Code + Handler
|
||||||
|
```
|
||||||
|
|
||||||
|
**Cold start frequency**:
|
||||||
|
- Development: Every code change creates new environments (frequent)
|
||||||
|
- Production: Typically < 1% of invocations
|
||||||
|
- Optimize for p95/p99 latency, not average
|
||||||
|
|
||||||
|
### Package Size Optimization
|
||||||
|
|
||||||
|
**Minimize deployment package**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
bundling: {
|
||||||
|
minify: true, // Minify production code
|
||||||
|
sourceMap: false, // Disable in production
|
||||||
|
externalModules: [
|
||||||
|
'@aws-sdk/*', // Use AWS SDK from runtime
|
||||||
|
],
|
||||||
|
// Tree-shaking removes unused code
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Tools for optimization**:
|
||||||
|
- **esbuild**: Automatic tree-shaking and minification
|
||||||
|
- **Webpack**: Bundle optimization
|
||||||
|
- **Maven**: Dependency analysis
|
||||||
|
- **Gradle**: Unused dependency detection
|
||||||
|
|
||||||
|
**Best practices**:
|
||||||
|
1. Avoid monolithic functions
|
||||||
|
2. Bundle only required dependencies
|
||||||
|
3. Use tree-shaking to remove unused code
|
||||||
|
4. Minify production code
|
||||||
|
5. Exclude AWS SDK (provided by runtime)
|
||||||
|
|
||||||
|
### Provisioned Concurrency
|
||||||
|
|
||||||
|
**Pre-initialize environments for predictable latency**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const fn = new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Static provisioned concurrency
|
||||||
|
fn.currentVersion.addAlias('live', {
|
||||||
|
provisionedConcurrentExecutions: 10,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Auto-scaling provisioned concurrency
|
||||||
|
const alias = fn.currentVersion.addAlias('prod');
|
||||||
|
|
||||||
|
const target = new applicationautoscaling.ScalableTarget(this, 'ScalableTarget', {
|
||||||
|
serviceNamespace: applicationautoscaling.ServiceNamespace.LAMBDA,
|
||||||
|
maxCapacity: 100,
|
||||||
|
minCapacity: 10,
|
||||||
|
resourceId: `function:${fn.functionName}:${alias.aliasName}`,
|
||||||
|
scalableDimension: 'lambda:function:ProvisionedConcurrentExecutions',
|
||||||
|
});
|
||||||
|
|
||||||
|
target.scaleOnUtilization({
|
||||||
|
utilizationTarget: 0.7, // 70% utilization
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**When to use**:
|
||||||
|
- **Consistent traffic patterns**: Predictable load
|
||||||
|
- **Latency-sensitive APIs**: Sub-100ms requirements
|
||||||
|
- **Cost consideration**: Compare cold start frequency vs. provisioned cost
|
||||||
|
|
||||||
|
**Cost comparison**:
|
||||||
|
- **On-demand**: Pay only for actual usage
|
||||||
|
- **Provisioned**: Pay for provisioned capacity + invocations
|
||||||
|
- **Breakeven**: When cold starts > ~20% of invocations
|
||||||
|
|
||||||
|
### Lambda SnapStart (Java)
|
||||||
|
|
||||||
|
**Instant cold starts for Java**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new lambda.Function(this, 'JavaFunction', {
|
||||||
|
runtime: lambda.Runtime.JAVA_17,
|
||||||
|
code: lambda.Code.fromAsset('target/function.jar'),
|
||||||
|
handler: 'com.example.Handler::handleRequest',
|
||||||
|
snapStart: lambda.SnapStartConf.ON_PUBLISHED_VERSIONS,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- Up to 10x faster cold starts for Java
|
||||||
|
- No code changes required
|
||||||
|
- Works with published versions
|
||||||
|
- No additional cost
|
||||||
|
|
||||||
|
## Memory and CPU Optimization
|
||||||
|
|
||||||
|
### Memory = CPU Allocation
|
||||||
|
|
||||||
|
**Key principle**: Memory and CPU are proportionally allocated
|
||||||
|
|
||||||
|
| Memory | vCPU |
|
||||||
|
|--------|------|
|
||||||
|
| 128 MB | 0.07 vCPU |
|
||||||
|
| 512 MB | 0.28 vCPU |
|
||||||
|
| 1,024 MB | 0.57 vCPU |
|
||||||
|
| 1,769 MB | 1.00 vCPU |
|
||||||
|
| 3,538 MB | 2.00 vCPU |
|
||||||
|
| 10,240 MB | 6.00 vCPU |
|
||||||
|
|
||||||
|
### Cost vs. Performance Balancing
|
||||||
|
|
||||||
|
**Example - Compute-intensive function**:
|
||||||
|
|
||||||
|
| Memory | Duration | Cost |
|
||||||
|
|--------|----------|------|
|
||||||
|
| 128 MB | 11.72s | $0.0246 |
|
||||||
|
| 256 MB | 6.68s | $0.0280 |
|
||||||
|
| 512 MB | 3.19s | $0.0268 |
|
||||||
|
| 1024 MB | 1.46s | $0.0246 |
|
||||||
|
|
||||||
|
**Key insight**: More memory = faster execution = similar or lower cost
|
||||||
|
|
||||||
|
**Formula**:
|
||||||
|
```
|
||||||
|
Duration = Allocated Memory (GB) × Execution Time (seconds)
|
||||||
|
Cost = Duration × Number of Invocations × Price per GB-second
|
||||||
|
```
|
||||||
|
|
||||||
|
### Finding Optimal Memory
|
||||||
|
|
||||||
|
**Use Lambda Power Tuning**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Deploy power tuning state machine
|
||||||
|
sam deploy --template-file template.yml --stack-name lambda-power-tuning
|
||||||
|
|
||||||
|
# Run power tuning
|
||||||
|
aws lambda invoke \
|
||||||
|
--function-name powerTuningFunction \
|
||||||
|
--payload '{"lambdaARN": "arn:aws:lambda:...", "powerValues": [128, 256, 512, 1024, 1536, 3008]}' \
|
||||||
|
response.json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Manual testing approach**:
|
||||||
|
1. Test function at different memory levels
|
||||||
|
2. Measure execution time at each level
|
||||||
|
3. Calculate cost for each configuration
|
||||||
|
4. Choose optimal balance for your use case
|
||||||
|
|
||||||
|
### Multi-Core Optimization
|
||||||
|
|
||||||
|
**Leverage multiple vCPUs** (at 1,769 MB+):
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Use Worker Threads for parallel processing
|
||||||
|
import { Worker } from 'worker_threads';
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const items = event.items;
|
||||||
|
|
||||||
|
// Process in parallel using multiple cores
|
||||||
|
const workers = items.map(item =>
|
||||||
|
new Promise((resolve, reject) => {
|
||||||
|
const worker = new Worker('./worker.js', {
|
||||||
|
workerData: item,
|
||||||
|
});
|
||||||
|
|
||||||
|
worker.on('message', resolve);
|
||||||
|
worker.on('error', reject);
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
const results = await Promise.all(workers);
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Python multiprocessing**:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import multiprocessing as mp
|
||||||
|
|
||||||
|
def handler(event, context):
|
||||||
|
items = event['items']
|
||||||
|
|
||||||
|
# Use multiple cores for CPU-bound work
|
||||||
|
with mp.Pool(mp.cpu_count()) as pool:
|
||||||
|
results = pool.map(process_item, items)
|
||||||
|
|
||||||
|
return {'results': results}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Initialization Optimization
|
||||||
|
|
||||||
|
### Code Outside Handler
|
||||||
|
|
||||||
|
**Initialize once, reuse across invocations**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Initialize outside handler
|
||||||
|
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||||
|
import { S3Client } from '@aws-sdk/client-s3';
|
||||||
|
|
||||||
|
// Initialized once per execution environment
|
||||||
|
const dynamodb = new DynamoDBClient({});
|
||||||
|
const s3 = new S3Client({});
|
||||||
|
|
||||||
|
// Connection pool initialized once
|
||||||
|
const pool = createConnectionPool({
|
||||||
|
host: process.env.DB_HOST,
|
||||||
|
max: 1, // One connection per execution environment
|
||||||
|
});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
// Reuse connections across invocations
|
||||||
|
const data = await dynamodb.getItem({ /* ... */ });
|
||||||
|
const file = await s3.getObject({ /* ... */ });
|
||||||
|
return processData(data, file);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ❌ BAD - Initialize in handler
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const dynamodb = new DynamoDBClient({}); // Created every invocation
|
||||||
|
const s3 = new S3Client({}); // Created every invocation
|
||||||
|
// ...
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Lazy Loading
|
||||||
|
|
||||||
|
**Load dependencies only when needed**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Conditional loading
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
if (event.operation === 'generatePDF') {
|
||||||
|
// Load heavy PDF library only when needed
|
||||||
|
const pdfLib = await import('./pdf-generator');
|
||||||
|
return pdfLib.generatePDF(event.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (event.operation === 'processImage') {
|
||||||
|
const sharp = await import('sharp');
|
||||||
|
return processImage(sharp, event.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default operation (no heavy dependencies)
|
||||||
|
return processDefault(event);
|
||||||
|
};
|
||||||
|
|
||||||
|
// ❌ BAD - Load everything upfront
|
||||||
|
import pdfLib from './pdf-generator'; // 50MB
|
||||||
|
import sharp from 'sharp'; // 20MB
|
||||||
|
// Even if not used!
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
if (event.operation === 'generatePDF') {
|
||||||
|
return pdfLib.generatePDF(event.data);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Connection Reuse
|
||||||
|
|
||||||
|
**Enable connection reuse**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { DynamoDBClient } from '@aws-sdk/client-dynamodb';
|
||||||
|
|
||||||
|
const client = new DynamoDBClient({
|
||||||
|
// Enable keep-alive for connection reuse
|
||||||
|
requestHandler: {
|
||||||
|
connectionTimeout: 3000,
|
||||||
|
socketTimeout: 3000,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// For Node.js AWS SDK
|
||||||
|
process.env.AWS_NODEJS_CONNECTION_REUSE_ENABLED = '1';
|
||||||
|
```
|
||||||
|
|
||||||
|
## Runtime Performance
|
||||||
|
|
||||||
|
### Choose the Right Runtime
|
||||||
|
|
||||||
|
**Runtime comparison**:
|
||||||
|
|
||||||
|
| Runtime | Cold Start | Execution Speed | Ecosystem | Best For |
|
||||||
|
|---------|------------|-----------------|-----------|----------|
|
||||||
|
| Node.js 20 | Fast | Fast | Excellent | APIs, I/O-bound |
|
||||||
|
| Python 3.12 | Fast | Medium | Excellent | Data processing |
|
||||||
|
| Java 17 + SnapStart | Fast (w/SnapStart) | Fast | Good | Enterprise apps |
|
||||||
|
| .NET 8 | Medium | Fast | Good | Enterprise apps |
|
||||||
|
| Go | Very Fast | Very Fast | Good | High performance |
|
||||||
|
| Rust | Very Fast | Very Fast | Growing | High performance |
|
||||||
|
|
||||||
|
### Optimize Handler Code
|
||||||
|
|
||||||
|
**Efficient code patterns**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Batch operations
|
||||||
|
const items = ['item1', 'item2', 'item3'];
|
||||||
|
|
||||||
|
// Single batch write
|
||||||
|
await dynamodb.batchWriteItem({
|
||||||
|
RequestItems: {
|
||||||
|
[tableName]: items.map(item => ({
|
||||||
|
PutRequest: { Item: item },
|
||||||
|
})),
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ❌ BAD - Multiple single operations
|
||||||
|
for (const item of items) {
|
||||||
|
await dynamodb.putItem({
|
||||||
|
TableName: tableName,
|
||||||
|
Item: item,
|
||||||
|
}); // Slow, multiple round trips
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async Processing
|
||||||
|
|
||||||
|
**Use async/await effectively**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Parallel async operations
|
||||||
|
const [userData, orderData, inventoryData] = await Promise.all([
|
||||||
|
getUserData(userId),
|
||||||
|
getOrderData(orderId),
|
||||||
|
getInventoryData(productId),
|
||||||
|
]);
|
||||||
|
|
||||||
|
// ❌ BAD - Sequential async operations
|
||||||
|
const userData = await getUserData(userId);
|
||||||
|
const orderData = await getOrderData(orderId); // Waits unnecessarily
|
||||||
|
const inventoryData = await getInventoryData(productId); // Waits unnecessarily
|
||||||
|
```
|
||||||
|
|
||||||
|
### Caching Strategies
|
||||||
|
|
||||||
|
**Cache frequently accessed data**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// In-memory cache (persists in warm environments)
|
||||||
|
const cache = new Map<string, any>();
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const key = event.key;
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
if (cache.has(key)) {
|
||||||
|
console.log('Cache hit');
|
||||||
|
return cache.get(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch from database
|
||||||
|
const data = await fetchFromDatabase(key);
|
||||||
|
|
||||||
|
// Store in cache
|
||||||
|
cache.set(key, data);
|
||||||
|
|
||||||
|
return data;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**ElastiCache for shared cache**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import Redis from 'ioredis';
|
||||||
|
|
||||||
|
// Initialize once
|
||||||
|
const redis = new Redis({
|
||||||
|
host: process.env.REDIS_HOST,
|
||||||
|
port: 6379,
|
||||||
|
lazyConnect: true,
|
||||||
|
enableOfflineQueue: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const key = `order:${event.orderId}`;
|
||||||
|
|
||||||
|
// Try cache
|
||||||
|
const cached = await redis.get(key);
|
||||||
|
if (cached) {
|
||||||
|
return JSON.parse(cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch and cache
|
||||||
|
const data = await fetchOrder(event.orderId);
|
||||||
|
await redis.setex(key, 300, JSON.stringify(data)); // 5 min TTL
|
||||||
|
|
||||||
|
return data;
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Testing
|
||||||
|
|
||||||
|
### Load Testing
|
||||||
|
|
||||||
|
**Use Artillery for load testing**:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
# load-test.yml
|
||||||
|
config:
|
||||||
|
target: https://api.example.com
|
||||||
|
phases:
|
||||||
|
- duration: 60
|
||||||
|
arrivalRate: 10
|
||||||
|
rampTo: 100 # Ramp from 10 to 100 req/sec
|
||||||
|
scenarios:
|
||||||
|
- flow:
|
||||||
|
- post:
|
||||||
|
url: /orders
|
||||||
|
json:
|
||||||
|
orderId: "{{ $randomString() }}"
|
||||||
|
amount: "{{ $randomNumber(10, 1000) }}"
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
artillery run load-test.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
### Benchmarking
|
||||||
|
|
||||||
|
**Test different configurations**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// benchmark.ts
|
||||||
|
import { Lambda } from '@aws-sdk/client-lambda';
|
||||||
|
|
||||||
|
const lambda = new Lambda({});
|
||||||
|
|
||||||
|
const testConfigurations = [
|
||||||
|
{ memory: 128, name: 'Function-128' },
|
||||||
|
{ memory: 256, name: 'Function-256' },
|
||||||
|
{ memory: 512, name: 'Function-512' },
|
||||||
|
{ memory: 1024, name: 'Function-1024' },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const config of testConfigurations) {
|
||||||
|
const times: number[] = [];
|
||||||
|
|
||||||
|
// Warm up
|
||||||
|
for (let i = 0; i < 5; i++) {
|
||||||
|
await lambda.invoke({ FunctionName: config.name });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Measure
|
||||||
|
for (let i = 0; i < 100; i++) {
|
||||||
|
const start = Date.now();
|
||||||
|
await lambda.invoke({ FunctionName: config.name });
|
||||||
|
times.push(Date.now() - start);
|
||||||
|
}
|
||||||
|
|
||||||
|
const p99 = times.sort()[99];
|
||||||
|
const avg = times.reduce((a, b) => a + b) / times.length;
|
||||||
|
|
||||||
|
console.log(`${config.memory}MB - Avg: ${avg}ms, p99: ${p99}ms`);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cost Optimization
|
||||||
|
|
||||||
|
### Right-Sizing Memory
|
||||||
|
|
||||||
|
**Balance cost and performance**:
|
||||||
|
|
||||||
|
**CPU-bound workloads**:
|
||||||
|
- More memory = more CPU = faster execution
|
||||||
|
- Often results in lower cost overall
|
||||||
|
- Test at 1769MB (1 vCPU) and above
|
||||||
|
|
||||||
|
**I/O-bound workloads**:
|
||||||
|
- Less sensitive to memory allocation
|
||||||
|
- May not benefit from higher memory
|
||||||
|
- Test at lower memory levels (256-512MB)
|
||||||
|
|
||||||
|
**Simple operations**:
|
||||||
|
- Minimal CPU required
|
||||||
|
- Use minimum memory (128-256MB)
|
||||||
|
- Fast execution despite low resources
|
||||||
|
|
||||||
|
### Billing Granularity
|
||||||
|
|
||||||
|
**Lambda bills in 1ms increments**:
|
||||||
|
- Precise billing (7ms execution = 7ms cost)
|
||||||
|
- Optimize even small improvements
|
||||||
|
- Consider trade-offs carefully
|
||||||
|
|
||||||
|
**Cost calculation**:
|
||||||
|
```
|
||||||
|
Cost = (Memory GB) × (Duration seconds) × (Invocations) × ($0.0000166667/GB-second)
|
||||||
|
+ (Invocations) × ($0.20/1M requests)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Cost Reduction Strategies
|
||||||
|
|
||||||
|
1. **Optimize execution time**: Faster = cheaper
|
||||||
|
2. **Right-size memory**: Balance CPU needs with cost
|
||||||
|
3. **Reduce invocations**: Batch processing, caching
|
||||||
|
4. **Use Graviton2**: 20% better price/performance
|
||||||
|
5. **Reserved Concurrency**: Only when needed
|
||||||
|
6. **Compression**: Reduce data transfer costs
|
||||||
|
|
||||||
|
## Advanced Optimization
|
||||||
|
|
||||||
|
### Lambda Extensions
|
||||||
|
|
||||||
|
**Use extensions for cross-cutting concerns**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Lambda layer with extension
|
||||||
|
const extensionLayer = lambda.LayerVersion.fromLayerVersionArn(
|
||||||
|
this,
|
||||||
|
'Extension',
|
||||||
|
'arn:aws:lambda:us-east-1:123456789:layer:my-extension:1'
|
||||||
|
);
|
||||||
|
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
layers: [extensionLayer],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Common extensions**:
|
||||||
|
- Secrets caching
|
||||||
|
- Configuration caching
|
||||||
|
- Custom logging
|
||||||
|
- Security scanning
|
||||||
|
- Performance monitoring
|
||||||
|
|
||||||
|
### Graviton2 Architecture
|
||||||
|
|
||||||
|
**20% better price/performance**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
architecture: lambda.Architecture.ARM_64, // Graviton2
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**Considerations**:
|
||||||
|
- Most runtimes support ARM64
|
||||||
|
- Test thoroughly before migrating
|
||||||
|
- Dependencies must support ARM64
|
||||||
|
- Native extensions may need recompilation
|
||||||
|
|
||||||
|
### VPC Optimization
|
||||||
|
|
||||||
|
**Hyperplane ENIs** (automatic since 2019):
|
||||||
|
- No ENI per function
|
||||||
|
- Faster cold starts in VPC
|
||||||
|
- Scales instantly
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Modern VPC configuration (fast)
|
||||||
|
new NodejsFunction(this, 'VpcFunction', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
vpc,
|
||||||
|
vpcSubnets: { subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS },
|
||||||
|
// Fast scaling, no ENI limitations
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Monitoring
|
||||||
|
|
||||||
|
### Key Metrics
|
||||||
|
|
||||||
|
**Monitor these metrics**:
|
||||||
|
- **Duration**: p50, p95, p99, max
|
||||||
|
- **Cold Start %**: ColdStartDuration / TotalDuration
|
||||||
|
- **Error Rate**: Errors / Invocations
|
||||||
|
- **Throttles**: Indicates concurrency limit reached
|
||||||
|
- **Iterator Age**: For stream processing lag
|
||||||
|
|
||||||
|
### Performance Dashboards
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const dashboard = new cloudwatch.Dashboard(this, 'PerformanceDashboard');
|
||||||
|
|
||||||
|
dashboard.addWidgets(
|
||||||
|
new cloudwatch.GraphWidget({
|
||||||
|
title: 'Latency Distribution',
|
||||||
|
left: [
|
||||||
|
fn.metricDuration({ statistic: 'p50', label: 'p50' }),
|
||||||
|
fn.metricDuration({ statistic: 'p95', label: 'p95' }),
|
||||||
|
fn.metricDuration({ statistic: 'p99', label: 'p99' }),
|
||||||
|
fn.metricDuration({ statistic: 'Maximum', label: 'max' }),
|
||||||
|
],
|
||||||
|
}),
|
||||||
|
new cloudwatch.GraphWidget({
|
||||||
|
title: 'Memory Utilization',
|
||||||
|
left: [fn.metricDuration()],
|
||||||
|
right: [fn.metricErrors()],
|
||||||
|
})
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- **Cold Starts**: Optimize package size, use provisioned concurrency for critical paths
|
||||||
|
- **Memory**: More memory often = faster execution = lower cost
|
||||||
|
- **Initialization**: Initialize connections outside handler
|
||||||
|
- **Lazy Loading**: Load dependencies only when needed
|
||||||
|
- **Connection Reuse**: Enable for AWS SDK clients
|
||||||
|
- **Testing**: Test at different memory levels to find optimal configuration
|
||||||
|
- **Monitoring**: Track p99 latency, not average
|
||||||
|
- **Graviton2**: Consider ARM64 for better price/performance
|
||||||
|
- **Batch Operations**: Reduce round trips to services
|
||||||
|
- **Caching**: Cache frequently accessed data
|
||||||
625
skills/aws-serverless-eda/references/security-best-practices.md
Normal file
625
skills/aws-serverless-eda/references/security-best-practices.md
Normal file
@@ -0,0 +1,625 @@
|
|||||||
|
# Serverless Security Best Practices
|
||||||
|
|
||||||
|
Security best practices for serverless applications based on AWS Well-Architected Framework.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Shared Responsibility Model](#shared-responsibility-model)
|
||||||
|
- [Identity and Access Management](#identity-and-access-management)
|
||||||
|
- [Function Security](#function-security)
|
||||||
|
- [API Security](#api-security)
|
||||||
|
- [Data Protection](#data-protection)
|
||||||
|
- [Network Security](#network-security)
|
||||||
|
|
||||||
|
## Shared Responsibility Model
|
||||||
|
|
||||||
|
### Serverless Shifts Responsibility to AWS
|
||||||
|
|
||||||
|
With serverless, AWS takes on more security responsibilities:
|
||||||
|
|
||||||
|
**AWS Responsibilities**:
|
||||||
|
- Compute infrastructure
|
||||||
|
- Execution environment
|
||||||
|
- Runtime language and patches
|
||||||
|
- Networking infrastructure
|
||||||
|
- Server software and OS
|
||||||
|
- Physical hardware and facilities
|
||||||
|
- Automatic security patches (like Log4Shell mitigation)
|
||||||
|
|
||||||
|
**Customer Responsibilities**:
|
||||||
|
- Function code and dependencies
|
||||||
|
- Resource configuration
|
||||||
|
- Identity and Access Management (IAM)
|
||||||
|
- Data encryption (at rest and in transit)
|
||||||
|
- Application-level security
|
||||||
|
- Secure coding practices
|
||||||
|
|
||||||
|
### Benefits of Shifted Responsibility
|
||||||
|
|
||||||
|
- **Automatic Patching**: AWS applies security patches automatically (e.g., Log4Shell fixed within 3 days)
|
||||||
|
- **Infrastructure Security**: No OS patching, server hardening, or vulnerability scanning
|
||||||
|
- **Operational Agility**: Quick security response at scale
|
||||||
|
- **Focus on Code**: Spend time on business logic, not infrastructure security
|
||||||
|
|
||||||
|
## Identity and Access Management
|
||||||
|
|
||||||
|
### Least Privilege Principle
|
||||||
|
|
||||||
|
**Always use least privilege IAM policies**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Specific grant
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {});
|
||||||
|
const function = new lambda.Function(this, 'Function', {});
|
||||||
|
|
||||||
|
table.grantReadData(function); // Only read access
|
||||||
|
|
||||||
|
// ❌ BAD - Overly broad
|
||||||
|
function.addToRolePolicy(new iam.PolicyStatement({
|
||||||
|
actions: ['dynamodb:*'],
|
||||||
|
resources: ['*'],
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
### Function Execution Role
|
||||||
|
|
||||||
|
**Separate roles per function**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Each function has its own role
|
||||||
|
const readFunction = new NodejsFunction(this, 'ReadFunction', {
|
||||||
|
entry: 'src/read.ts',
|
||||||
|
// Gets its own execution role
|
||||||
|
});
|
||||||
|
|
||||||
|
const writeFunction = new NodejsFunction(this, 'WriteFunction', {
|
||||||
|
entry: 'src/write.ts',
|
||||||
|
// Gets its own execution role
|
||||||
|
});
|
||||||
|
|
||||||
|
table.grantReadData(readFunction);
|
||||||
|
table.grantReadWriteData(writeFunction);
|
||||||
|
|
||||||
|
// ❌ BAD - Shared role with excessive permissions
|
||||||
|
const sharedRole = new iam.Role(this, 'SharedRole', {
|
||||||
|
assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
||||||
|
managedPolicies: [
|
||||||
|
iam.ManagedPolicy.fromAwsManagedPolicyName('AdministratorAccess'), // Too broad!
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resource-Based Policies
|
||||||
|
|
||||||
|
Control who can invoke functions:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Allow API Gateway to invoke function
|
||||||
|
myFunction.grantInvoke(new iam.ServicePrincipal('apigateway.amazonaws.com'));
|
||||||
|
|
||||||
|
// Allow specific account
|
||||||
|
myFunction.addPermission('AllowAccountInvoke', {
|
||||||
|
principal: new iam.AccountPrincipal('123456789012'),
|
||||||
|
action: 'lambda:InvokeFunction',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Conditional invoke (only from specific VPC endpoint)
|
||||||
|
myFunction.addPermission('AllowVPCInvoke', {
|
||||||
|
principal: new iam.ServicePrincipal('lambda.amazonaws.com'),
|
||||||
|
action: 'lambda:InvokeFunction',
|
||||||
|
sourceArn: vpcEndpoint.vpcEndpointId,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### IAM Policies Best Practices
|
||||||
|
|
||||||
|
1. **Use grant methods**: Prefer `.grantXxx()` over manual policies
|
||||||
|
2. **Condition keys**: Use IAM conditions for fine-grained control
|
||||||
|
3. **Resource ARNs**: Always specify resource ARNs, avoid wildcards
|
||||||
|
4. **Session policies**: Use for temporary elevated permissions
|
||||||
|
5. **Service Control Policies (SCPs)**: Enforce organization-wide guardrails
|
||||||
|
|
||||||
|
## Function Security
|
||||||
|
|
||||||
|
### Lambda Isolation Model
|
||||||
|
|
||||||
|
**Each function runs in isolated sandbox**:
|
||||||
|
- Built on Firecracker microVMs
|
||||||
|
- Dedicated execution environment per function
|
||||||
|
- No shared memory between functions
|
||||||
|
- Isolated file system and network namespace
|
||||||
|
- Strong workload isolation
|
||||||
|
|
||||||
|
**Execution Environment Security**:
|
||||||
|
- One concurrent invocation per environment
|
||||||
|
- Environment may be reused (warm starts)
|
||||||
|
- `/tmp` storage persists between invocations
|
||||||
|
- Sensitive data in memory may persist
|
||||||
|
|
||||||
|
### Secure Coding Practices
|
||||||
|
|
||||||
|
**Handle sensitive data securely**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ✅ GOOD - Clean up sensitive data
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const apiKey = process.env.API_KEY;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await callApi(apiKey);
|
||||||
|
return result;
|
||||||
|
} finally {
|
||||||
|
// Clear sensitive data from memory
|
||||||
|
delete process.env.API_KEY;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// ✅ GOOD - Use Secrets Manager
|
||||||
|
import { SecretsManagerClient, GetSecretValueCommand } from '@aws-sdk/client-secrets-manager';
|
||||||
|
|
||||||
|
const secretsClient = new SecretsManagerClient({});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const secret = await secretsClient.send(
|
||||||
|
new GetSecretValueCommand({ SecretId: process.env.SECRET_ARN })
|
||||||
|
);
|
||||||
|
|
||||||
|
const apiKey = secret.SecretString;
|
||||||
|
// Use apiKey
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Dependency Management
|
||||||
|
|
||||||
|
**Scan dependencies for vulnerabilities**:
|
||||||
|
|
||||||
|
```json
|
||||||
|
// package.json
|
||||||
|
{
|
||||||
|
"scripts": {
|
||||||
|
"audit": "npm audit",
|
||||||
|
"audit:fix": "npm audit fix"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"snyk": "^1.0.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Keep dependencies updated**:
|
||||||
|
- Run `npm audit` or `pip-audit` regularly
|
||||||
|
- Use Dependabot or Snyk for automated scanning
|
||||||
|
- Update dependencies promptly when vulnerabilities found
|
||||||
|
- Use minimal dependency sets
|
||||||
|
|
||||||
|
### Environment Variable Security
|
||||||
|
|
||||||
|
**Never store secrets in environment variables**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// ❌ BAD - Secret in environment variable
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
environment: {
|
||||||
|
API_KEY: 'sk-1234567890abcdef', // Never do this!
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// ✅ GOOD - Reference to secret
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
environment: {
|
||||||
|
SECRET_ARN: secret.secretArn,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
secret.grantRead(myFunction);
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Security
|
||||||
|
|
||||||
|
### API Gateway Security
|
||||||
|
|
||||||
|
**Authentication and Authorization**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Cognito User Pool authorizer
|
||||||
|
const authorizer = new apigateway.CognitoUserPoolsAuthorizer(this, 'Authorizer', {
|
||||||
|
cognitoUserPools: [userPool],
|
||||||
|
});
|
||||||
|
|
||||||
|
api.root.addMethod('GET', integration, {
|
||||||
|
authorizer,
|
||||||
|
authorizationType: apigateway.AuthorizationType.COGNITO,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda authorizer for custom auth
|
||||||
|
const customAuthorizer = new apigateway.TokenAuthorizer(this, 'CustomAuth', {
|
||||||
|
handler: authorizerFunction,
|
||||||
|
resultsCacheTtl: Duration.minutes(5),
|
||||||
|
});
|
||||||
|
|
||||||
|
// IAM authorization for service-to-service
|
||||||
|
api.root.addMethod('POST', integration, {
|
||||||
|
authorizationType: apigateway.AuthorizationType.IAM,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Request Validation
|
||||||
|
|
||||||
|
**Validate requests at API Gateway**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const validator = new apigateway.RequestValidator(this, 'Validator', {
|
||||||
|
api,
|
||||||
|
validateRequestBody: true,
|
||||||
|
validateRequestParameters: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const model = api.addModel('Model', {
|
||||||
|
schema: {
|
||||||
|
type: apigateway.JsonSchemaType.OBJECT,
|
||||||
|
required: ['email', 'name'],
|
||||||
|
properties: {
|
||||||
|
email: {
|
||||||
|
type: apigateway.JsonSchemaType.STRING,
|
||||||
|
format: 'email',
|
||||||
|
},
|
||||||
|
name: {
|
||||||
|
type: apigateway.JsonSchemaType.STRING,
|
||||||
|
minLength: 1,
|
||||||
|
maxLength: 100,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
resource.addMethod('POST', integration, {
|
||||||
|
requestValidator: validator,
|
||||||
|
requestModels: {
|
||||||
|
'application/json': model,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Rate Limiting and Throttling
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
deployOptions: {
|
||||||
|
throttlingRateLimit: 1000, // requests per second
|
||||||
|
throttlingBurstLimit: 2000, // burst capacity
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Per-method throttling
|
||||||
|
resource.addMethod('POST', integration, {
|
||||||
|
methodResponses: [{ statusCode: '200' }],
|
||||||
|
requestParameters: {
|
||||||
|
'method.request.header.Authorization': true,
|
||||||
|
},
|
||||||
|
throttling: {
|
||||||
|
rateLimit: 100,
|
||||||
|
burstLimit: 200,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### API Keys and Usage Plans
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const apiKey = api.addApiKey('ApiKey', {
|
||||||
|
apiKeyName: 'customer-key',
|
||||||
|
});
|
||||||
|
|
||||||
|
const plan = api.addUsagePlan('UsagePlan', {
|
||||||
|
name: 'Standard',
|
||||||
|
throttle: {
|
||||||
|
rateLimit: 100,
|
||||||
|
burstLimit: 200,
|
||||||
|
},
|
||||||
|
quota: {
|
||||||
|
limit: 10000,
|
||||||
|
period: apigateway.Period.MONTH,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
plan.addApiKey(apiKey);
|
||||||
|
plan.addApiStage({
|
||||||
|
stage: api.deploymentStage,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Protection
|
||||||
|
|
||||||
|
### Encryption at Rest
|
||||||
|
|
||||||
|
**DynamoDB encryption**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Default: AWS-owned CMK (no additional cost)
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {
|
||||||
|
encryption: dynamodb.TableEncryption.AWS_MANAGED, // AWS managed CMK
|
||||||
|
});
|
||||||
|
|
||||||
|
// Customer-managed CMK (for compliance)
|
||||||
|
const kmsKey = new kms.Key(this, 'Key', {
|
||||||
|
enableKeyRotation: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {
|
||||||
|
encryption: dynamodb.TableEncryption.CUSTOMER_MANAGED,
|
||||||
|
encryptionKey: kmsKey,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**S3 encryption**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// SSE-S3 (default, no additional cost)
|
||||||
|
const bucket = new s3.Bucket(this, 'Bucket', {
|
||||||
|
encryption: s3.BucketEncryption.S3_MANAGED,
|
||||||
|
});
|
||||||
|
|
||||||
|
// SSE-KMS (for fine-grained access control)
|
||||||
|
const bucket = new s3.Bucket(this, 'Bucket', {
|
||||||
|
encryption: s3.BucketEncryption.KMS,
|
||||||
|
encryptionKey: kmsKey,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
**SQS/SNS encryption**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const queue = new sqs.Queue(this, 'Queue', {
|
||||||
|
encryption: sqs.QueueEncryption.KMS,
|
||||||
|
encryptionMasterKey: kmsKey,
|
||||||
|
});
|
||||||
|
|
||||||
|
const topic = new sns.Topic(this, 'Topic', {
|
||||||
|
masterKey: kmsKey,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Encryption in Transit
|
||||||
|
|
||||||
|
**All AWS service APIs use TLS**:
|
||||||
|
- API Gateway endpoints use HTTPS by default
|
||||||
|
- Lambda to AWS service communication encrypted
|
||||||
|
- EventBridge, SQS, SNS use TLS
|
||||||
|
- Custom domains can use ACM certificates
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// API Gateway with custom domain
|
||||||
|
const certificate = new acm.Certificate(this, 'Certificate', {
|
||||||
|
domainName: 'api.example.com',
|
||||||
|
validation: acm.CertificateValidation.fromDns(hostedZone),
|
||||||
|
});
|
||||||
|
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
domainName: {
|
||||||
|
domainName: 'api.example.com',
|
||||||
|
certificate,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Data Sanitization
|
||||||
|
|
||||||
|
**Validate and sanitize inputs**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import DOMPurify from 'isomorphic-dompurify';
|
||||||
|
import { z } from 'zod';
|
||||||
|
|
||||||
|
// Schema validation
|
||||||
|
const OrderSchema = z.object({
|
||||||
|
orderId: z.string().uuid(),
|
||||||
|
amount: z.number().positive(),
|
||||||
|
email: z.string().email(),
|
||||||
|
});
|
||||||
|
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
const body = JSON.parse(event.body);
|
||||||
|
|
||||||
|
// Validate schema
|
||||||
|
const result = OrderSchema.safeParse(body);
|
||||||
|
if (!result.success) {
|
||||||
|
return {
|
||||||
|
statusCode: 400,
|
||||||
|
body: JSON.stringify({ error: result.error }),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sanitize HTML inputs
|
||||||
|
const sanitized = {
|
||||||
|
...result.data,
|
||||||
|
description: DOMPurify.sanitize(result.data.description),
|
||||||
|
};
|
||||||
|
|
||||||
|
await processOrder(sanitized);
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## Network Security
|
||||||
|
|
||||||
|
### VPC Configuration
|
||||||
|
|
||||||
|
**Lambda in VPC for private resources**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const vpc = new ec2.Vpc(this, 'Vpc', {
|
||||||
|
maxAzs: 2,
|
||||||
|
natGateways: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda in private subnet
|
||||||
|
const vpcFunction = new NodejsFunction(this, 'VpcFunction', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
vpc,
|
||||||
|
vpcSubnets: {
|
||||||
|
subnetType: ec2.SubnetType.PRIVATE_WITH_EGRESS,
|
||||||
|
},
|
||||||
|
securityGroups: [securityGroup],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Security group for Lambda
|
||||||
|
const securityGroup = new ec2.SecurityGroup(this, 'LambdaSG', {
|
||||||
|
vpc,
|
||||||
|
description: 'Security group for Lambda function',
|
||||||
|
allowAllOutbound: false, // Restrict outbound
|
||||||
|
});
|
||||||
|
|
||||||
|
// Only allow access to RDS
|
||||||
|
securityGroup.addEgressRule(
|
||||||
|
ec2.Peer.securityGroupId(rdsSecurityGroup.securityGroupId),
|
||||||
|
ec2.Port.tcp(3306),
|
||||||
|
'Allow MySQL access'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### VPC Endpoints
|
||||||
|
|
||||||
|
**Use VPC endpoints for AWS services**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// S3 VPC endpoint (gateway endpoint, no cost)
|
||||||
|
vpc.addGatewayEndpoint('S3Endpoint', {
|
||||||
|
service: ec2.GatewayVpcEndpointAwsService.S3,
|
||||||
|
});
|
||||||
|
|
||||||
|
// DynamoDB VPC endpoint (gateway endpoint, no cost)
|
||||||
|
vpc.addGatewayEndpoint('DynamoDBEndpoint', {
|
||||||
|
service: ec2.GatewayVpcEndpointAwsService.DYNAMODB,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Secrets Manager VPC endpoint (interface endpoint, cost applies)
|
||||||
|
vpc.addInterfaceEndpoint('SecretsManagerEndpoint', {
|
||||||
|
service: ec2.InterfaceVpcEndpointAwsService.SECRETS_MANAGER,
|
||||||
|
privateDnsEnabled: true,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Security Groups
|
||||||
|
|
||||||
|
**Principle of least privilege for network access**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Lambda security group
|
||||||
|
const lambdaSG = new ec2.SecurityGroup(this, 'LambdaSG', {
|
||||||
|
vpc,
|
||||||
|
allowAllOutbound: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// RDS security group
|
||||||
|
const rdsSG = new ec2.SecurityGroup(this, 'RDSSG', {
|
||||||
|
vpc,
|
||||||
|
allowAllOutbound: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Allow Lambda to access RDS only
|
||||||
|
rdsSG.addIngressRule(
|
||||||
|
ec2.Peer.securityGroupId(lambdaSG.securityGroupId),
|
||||||
|
ec2.Port.tcp(3306),
|
||||||
|
'Allow Lambda access'
|
||||||
|
);
|
||||||
|
|
||||||
|
lambdaSG.addEgressRule(
|
||||||
|
ec2.Peer.securityGroupId(rdsSG.securityGroupId),
|
||||||
|
ec2.Port.tcp(3306),
|
||||||
|
'Allow RDS access'
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
## Security Monitoring
|
||||||
|
|
||||||
|
### CloudWatch Logs
|
||||||
|
|
||||||
|
**Enable and encrypt logs**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
logRetention: logs.RetentionDays.ONE_WEEK,
|
||||||
|
logGroup: new logs.LogGroup(this, 'LogGroup', {
|
||||||
|
encryptionKey: kmsKey, // Encrypt logs
|
||||||
|
retention: logs.RetentionDays.ONE_WEEK,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### CloudTrail
|
||||||
|
|
||||||
|
**Enable CloudTrail for audit**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const trail = new cloudtrail.Trail(this, 'Trail', {
|
||||||
|
isMultiRegionTrail: true,
|
||||||
|
includeGlobalServiceEvents: true,
|
||||||
|
managementEvents: cloudtrail.ReadWriteType.ALL,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Log Lambda invocations
|
||||||
|
trail.addLambdaEventSelector([{
|
||||||
|
includeManagementEvents: true,
|
||||||
|
readWriteType: cloudtrail.ReadWriteType.ALL,
|
||||||
|
}]);
|
||||||
|
```
|
||||||
|
|
||||||
|
### GuardDuty
|
||||||
|
|
||||||
|
**Enable GuardDuty for threat detection**:
|
||||||
|
- Analyzes VPC Flow Logs, DNS logs, CloudTrail events
|
||||||
|
- Detects unusual API activity
|
||||||
|
- Identifies compromised credentials
|
||||||
|
- Monitors for cryptocurrency mining
|
||||||
|
|
||||||
|
## Security Best Practices Checklist
|
||||||
|
|
||||||
|
### Development
|
||||||
|
|
||||||
|
- [ ] Validate and sanitize all inputs
|
||||||
|
- [ ] Scan dependencies for vulnerabilities
|
||||||
|
- [ ] Use least privilege IAM permissions
|
||||||
|
- [ ] Store secrets in Secrets Manager or Parameter Store
|
||||||
|
- [ ] Never log sensitive data
|
||||||
|
- [ ] Enable encryption for all data stores
|
||||||
|
- [ ] Use environment variables for configuration, not secrets
|
||||||
|
|
||||||
|
### Deployment
|
||||||
|
|
||||||
|
- [ ] Enable CloudTrail in all regions
|
||||||
|
- [ ] Configure VPC for sensitive workloads
|
||||||
|
- [ ] Use VPC endpoints for AWS service access
|
||||||
|
- [ ] Enable GuardDuty for threat detection
|
||||||
|
- [ ] Implement resource-based policies
|
||||||
|
- [ ] Use AWS WAF for API protection
|
||||||
|
- [ ] Enable access logging for API Gateway
|
||||||
|
|
||||||
|
### Operations
|
||||||
|
|
||||||
|
- [ ] Monitor CloudTrail for unusual activity
|
||||||
|
- [ ] Set up alarms for security events
|
||||||
|
- [ ] Rotate secrets regularly
|
||||||
|
- [ ] Review IAM policies periodically
|
||||||
|
- [ ] Audit function permissions
|
||||||
|
- [ ] Monitor GuardDuty findings
|
||||||
|
- [ ] Implement automated security responses
|
||||||
|
|
||||||
|
### Testing
|
||||||
|
|
||||||
|
- [ ] Test with least privilege policies
|
||||||
|
- [ ] Validate error handling for security failures
|
||||||
|
- [ ] Test input validation and sanitization
|
||||||
|
- [ ] Verify encryption configurations
|
||||||
|
- [ ] Test with malicious payloads
|
||||||
|
- [ ] Audit logs for security events
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- **Shared Responsibility**: AWS handles infrastructure, you handle application security
|
||||||
|
- **Least Privilege**: Use IAM grant methods, avoid wildcards
|
||||||
|
- **Encryption**: Enable encryption at rest and in transit
|
||||||
|
- **Input Validation**: Validate and sanitize all inputs
|
||||||
|
- **Dependency Security**: Scan and update dependencies regularly
|
||||||
|
- **Monitoring**: Enable CloudTrail, GuardDuty, and CloudWatch
|
||||||
|
- **Secrets Management**: Use Secrets Manager, never environment variables
|
||||||
|
- **Network Security**: Use VPC, security groups, and VPC endpoints appropriately
|
||||||
838
skills/aws-serverless-eda/references/serverless-patterns.md
Normal file
838
skills/aws-serverless-eda/references/serverless-patterns.md
Normal file
@@ -0,0 +1,838 @@
|
|||||||
|
# Serverless Architecture Patterns
|
||||||
|
|
||||||
|
Comprehensive patterns for building serverless applications on AWS based on Well-Architected Framework principles.
|
||||||
|
|
||||||
|
## Table of Contents
|
||||||
|
|
||||||
|
- [Core Serverless Patterns](#core-serverless-patterns)
|
||||||
|
- [API Patterns](#api-patterns)
|
||||||
|
- [Data Processing Patterns](#data-processing-patterns)
|
||||||
|
- [Integration Patterns](#integration-patterns)
|
||||||
|
- [Orchestration Patterns](#orchestration-patterns)
|
||||||
|
- [Anti-Patterns](#anti-patterns)
|
||||||
|
|
||||||
|
## Core Serverless Patterns
|
||||||
|
|
||||||
|
### Pattern: Serverless Microservices
|
||||||
|
|
||||||
|
**Use case**: Independent, scalable services with separate databases
|
||||||
|
|
||||||
|
**Architecture**:
|
||||||
|
```
|
||||||
|
API Gateway → Lambda Functions → DynamoDB/RDS
|
||||||
|
↓ (events)
|
||||||
|
EventBridge → Other Services
|
||||||
|
```
|
||||||
|
|
||||||
|
**CDK Implementation**:
|
||||||
|
```typescript
|
||||||
|
// User Service
|
||||||
|
const userTable = new dynamodb.Table(this, 'Users', {
|
||||||
|
partitionKey: { name: 'userId', type: dynamodb.AttributeType.STRING },
|
||||||
|
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
|
||||||
|
});
|
||||||
|
|
||||||
|
const userFunction = new NodejsFunction(this, 'UserHandler', {
|
||||||
|
entry: 'src/services/users/handler.ts',
|
||||||
|
environment: {
|
||||||
|
TABLE_NAME: userTable.tableName,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
userTable.grantReadWriteData(userFunction);
|
||||||
|
|
||||||
|
// Order Service (separate database)
|
||||||
|
const orderTable = new dynamodb.Table(this, 'Orders', {
|
||||||
|
partitionKey: { name: 'orderId', type: dynamodb.AttributeType.STRING },
|
||||||
|
billingMode: dynamodb.BillingMode.PAY_PER_REQUEST,
|
||||||
|
});
|
||||||
|
|
||||||
|
const orderFunction = new NodejsFunction(this, 'OrderHandler', {
|
||||||
|
entry: 'src/services/orders/handler.ts',
|
||||||
|
environment: {
|
||||||
|
TABLE_NAME: orderTable.tableName,
|
||||||
|
EVENT_BUS: eventBus.eventBusName,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
orderTable.grantReadWriteData(orderFunction);
|
||||||
|
eventBus.grantPutEventsTo(orderFunction);
|
||||||
|
```
|
||||||
|
|
||||||
|
**Benefits**:
|
||||||
|
- Independent deployment and scaling
|
||||||
|
- Database per service (data isolation)
|
||||||
|
- Technology diversity
|
||||||
|
- Fault isolation
|
||||||
|
|
||||||
|
### Pattern: Serverless API Backend
|
||||||
|
|
||||||
|
**Use case**: REST or GraphQL API with serverless compute
|
||||||
|
|
||||||
|
**REST API with API Gateway**:
|
||||||
|
```typescript
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
restApiName: 'serverless-api',
|
||||||
|
deployOptions: {
|
||||||
|
stageName: 'prod',
|
||||||
|
tracingEnabled: true,
|
||||||
|
loggingLevel: apigateway.MethodLoggingLevel.INFO,
|
||||||
|
dataTraceEnabled: true,
|
||||||
|
metricsEnabled: true,
|
||||||
|
},
|
||||||
|
defaultCorsPreflightOptions: {
|
||||||
|
allowOrigins: apigateway.Cors.ALL_ORIGINS,
|
||||||
|
allowMethods: apigateway.Cors.ALL_METHODS,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Resource-based routing
|
||||||
|
const items = api.root.addResource('items');
|
||||||
|
items.addMethod('GET', new apigateway.LambdaIntegration(listFunction));
|
||||||
|
items.addMethod('POST', new apigateway.LambdaIntegration(createFunction));
|
||||||
|
|
||||||
|
const item = items.addResource('{id}');
|
||||||
|
item.addMethod('GET', new apigateway.LambdaIntegration(getFunction));
|
||||||
|
item.addMethod('PUT', new apigateway.LambdaIntegration(updateFunction));
|
||||||
|
item.addMethod('DELETE', new apigateway.LambdaIntegration(deleteFunction));
|
||||||
|
```
|
||||||
|
|
||||||
|
**GraphQL API with AppSync**:
|
||||||
|
```typescript
|
||||||
|
const api = new appsync.GraphqlApi(this, 'Api', {
|
||||||
|
name: 'serverless-graphql-api',
|
||||||
|
schema: appsync.SchemaFile.fromAsset('schema.graphql'),
|
||||||
|
authorizationConfig: {
|
||||||
|
defaultAuthorization: {
|
||||||
|
authorizationType: appsync.AuthorizationType.API_KEY,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
xrayEnabled: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Lambda resolver
|
||||||
|
const dataSource = api.addLambdaDataSource('lambda-ds', resolverFunction);
|
||||||
|
|
||||||
|
dataSource.createResolver('QueryGetItem', {
|
||||||
|
typeName: 'Query',
|
||||||
|
fieldName: 'getItem',
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Serverless Data Lake
|
||||||
|
|
||||||
|
**Use case**: Ingest, process, and analyze large-scale data
|
||||||
|
|
||||||
|
**Architecture**:
|
||||||
|
```
|
||||||
|
S3 (raw data) → Lambda (transform) → S3 (processed)
|
||||||
|
↓ (catalog)
|
||||||
|
AWS Glue → Athena (query)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Implementation**:
|
||||||
|
```typescript
|
||||||
|
const rawBucket = new s3.Bucket(this, 'RawData');
|
||||||
|
const processedBucket = new s3.Bucket(this, 'ProcessedData');
|
||||||
|
|
||||||
|
// Trigger Lambda on file upload
|
||||||
|
rawBucket.addEventNotification(
|
||||||
|
s3.EventType.OBJECT_CREATED,
|
||||||
|
new s3n.LambdaDestination(transformFunction),
|
||||||
|
{ prefix: 'incoming/' }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Transform function
|
||||||
|
export const transform = async (event: S3Event) => {
|
||||||
|
for (const record of event.Records) {
|
||||||
|
const key = record.s3.object.key;
|
||||||
|
|
||||||
|
// Get raw data
|
||||||
|
const raw = await s3.getObject({
|
||||||
|
Bucket: record.s3.bucket.name,
|
||||||
|
Key: key,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Transform data
|
||||||
|
const transformed = await transformData(raw.Body);
|
||||||
|
|
||||||
|
// Write to processed bucket
|
||||||
|
await s3.putObject({
|
||||||
|
Bucket: process.env.PROCESSED_BUCKET,
|
||||||
|
Key: `processed/${key}`,
|
||||||
|
Body: JSON.stringify(transformed),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
## API Patterns
|
||||||
|
|
||||||
|
### Pattern: Authorizer Pattern
|
||||||
|
|
||||||
|
**Use case**: Custom authentication and authorization
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Lambda authorizer
|
||||||
|
const authorizer = new apigateway.TokenAuthorizer(this, 'Authorizer', {
|
||||||
|
handler: authorizerFunction,
|
||||||
|
identitySource: 'method.request.header.Authorization',
|
||||||
|
resultsCacheTtl: Duration.minutes(5),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Apply to API methods
|
||||||
|
const resource = api.root.addResource('protected');
|
||||||
|
resource.addMethod('GET', new apigateway.LambdaIntegration(protectedFunction), {
|
||||||
|
authorizer,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Request Validation
|
||||||
|
|
||||||
|
**Use case**: Validate requests before Lambda invocation
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const requestModel = api.addModel('RequestModel', {
|
||||||
|
contentType: 'application/json',
|
||||||
|
schema: {
|
||||||
|
type: apigateway.JsonSchemaType.OBJECT,
|
||||||
|
required: ['name', 'email'],
|
||||||
|
properties: {
|
||||||
|
name: { type: apigateway.JsonSchemaType.STRING, minLength: 1 },
|
||||||
|
email: { type: apigateway.JsonSchemaType.STRING, format: 'email' },
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
resource.addMethod('POST', integration, {
|
||||||
|
requestValidator: new apigateway.RequestValidator(this, 'Validator', {
|
||||||
|
api,
|
||||||
|
validateRequestBody: true,
|
||||||
|
validateRequestParameters: true,
|
||||||
|
}),
|
||||||
|
requestModels: {
|
||||||
|
'application/json': requestModel,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Response Caching
|
||||||
|
|
||||||
|
**Use case**: Reduce backend load and improve latency
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const api = new apigateway.RestApi(this, 'Api', {
|
||||||
|
deployOptions: {
|
||||||
|
cachingEnabled: true,
|
||||||
|
cacheTtl: Duration.minutes(5),
|
||||||
|
cacheClusterEnabled: true,
|
||||||
|
cacheClusterSize: '0.5', // GB
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Enable caching per method
|
||||||
|
resource.addMethod('GET', integration, {
|
||||||
|
methodResponses: [{
|
||||||
|
statusCode: '200',
|
||||||
|
responseParameters: {
|
||||||
|
'method.response.header.Cache-Control': true,
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Data Processing Patterns
|
||||||
|
|
||||||
|
### Pattern: S3 Event Processing
|
||||||
|
|
||||||
|
**Use case**: Process files uploaded to S3
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const bucket = new s3.Bucket(this, 'DataBucket');
|
||||||
|
|
||||||
|
// Process images
|
||||||
|
bucket.addEventNotification(
|
||||||
|
s3.EventType.OBJECT_CREATED,
|
||||||
|
new s3n.LambdaDestination(imageProcessingFunction),
|
||||||
|
{ suffix: '.jpg' }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Process CSV files
|
||||||
|
bucket.addEventNotification(
|
||||||
|
s3.EventType.OBJECT_CREATED,
|
||||||
|
new s3n.LambdaDestination(csvProcessingFunction),
|
||||||
|
{ suffix: '.csv' }
|
||||||
|
);
|
||||||
|
|
||||||
|
// Large file processing with Step Functions
|
||||||
|
bucket.addEventNotification(
|
||||||
|
s3.EventType.OBJECT_CREATED,
|
||||||
|
new s3n.SfnDestination(processingStateMachine),
|
||||||
|
{ prefix: 'large-files/' }
|
||||||
|
);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: DynamoDB Streams Processing
|
||||||
|
|
||||||
|
**Use case**: React to database changes
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const table = new dynamodb.Table(this, 'Table', {
|
||||||
|
partitionKey: { name: 'id', type: dynamodb.AttributeType.STRING },
|
||||||
|
stream: dynamodb.StreamViewType.NEW_AND_OLD_IMAGES,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process stream changes
|
||||||
|
new lambda.EventSourceMapping(this, 'StreamConsumer', {
|
||||||
|
target: streamProcessorFunction,
|
||||||
|
eventSourceArn: table.tableStreamArn,
|
||||||
|
startingPosition: lambda.StartingPosition.LATEST,
|
||||||
|
batchSize: 100,
|
||||||
|
maxBatchingWindow: Duration.seconds(5),
|
||||||
|
bisectBatchOnError: true,
|
||||||
|
retryAttempts: 3,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Example: Sync to search index
|
||||||
|
export const processStream = async (event: DynamoDBStreamEvent) => {
|
||||||
|
for (const record of event.Records) {
|
||||||
|
if (record.eventName === 'INSERT' || record.eventName === 'MODIFY') {
|
||||||
|
const newImage = record.dynamodb?.NewImage;
|
||||||
|
await elasticSearch.index({
|
||||||
|
index: 'items',
|
||||||
|
id: newImage?.id.S,
|
||||||
|
body: unmarshall(newImage),
|
||||||
|
});
|
||||||
|
} else if (record.eventName === 'REMOVE') {
|
||||||
|
await elasticSearch.delete({
|
||||||
|
index: 'items',
|
||||||
|
id: record.dynamodb?.Keys?.id.S,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Kinesis Stream Processing
|
||||||
|
|
||||||
|
**Use case**: Real-time data streaming and analytics
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const stream = new kinesis.Stream(this, 'EventStream', {
|
||||||
|
shardCount: 2,
|
||||||
|
streamMode: kinesis.StreamMode.PROVISIONED,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fan-out with multiple consumers
|
||||||
|
const consumer1 = new lambda.EventSourceMapping(this, 'Analytics', {
|
||||||
|
target: analyticsFunction,
|
||||||
|
eventSourceArn: stream.streamArn,
|
||||||
|
startingPosition: lambda.StartingPosition.LATEST,
|
||||||
|
batchSize: 100,
|
||||||
|
parallelizationFactor: 10, // Process 10 batches per shard in parallel
|
||||||
|
});
|
||||||
|
|
||||||
|
const consumer2 = new lambda.EventSourceMapping(this, 'Alerting', {
|
||||||
|
target: alertingFunction,
|
||||||
|
eventSourceArn: stream.streamArn,
|
||||||
|
startingPosition: lambda.StartingPosition.LATEST,
|
||||||
|
filters: [
|
||||||
|
lambda.FilterCriteria.filter({
|
||||||
|
eventName: lambda.FilterRule.isEqual('CRITICAL_EVENT'),
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Integration Patterns
|
||||||
|
|
||||||
|
### Pattern: Service Integration with EventBridge
|
||||||
|
|
||||||
|
**Use case**: Decouple services with events
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const eventBus = new events.EventBus(this, 'AppBus');
|
||||||
|
|
||||||
|
// Service A publishes events
|
||||||
|
const serviceA = new NodejsFunction(this, 'ServiceA', {
|
||||||
|
entry: 'src/services/a/handler.ts',
|
||||||
|
environment: {
|
||||||
|
EVENT_BUS: eventBus.eventBusName,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
eventBus.grantPutEventsTo(serviceA);
|
||||||
|
|
||||||
|
// Service B subscribes to events
|
||||||
|
new events.Rule(this, 'ServiceBRule', {
|
||||||
|
eventBus,
|
||||||
|
eventPattern: {
|
||||||
|
source: ['service.a'],
|
||||||
|
detailType: ['EntityCreated'],
|
||||||
|
},
|
||||||
|
targets: [new targets.LambdaFunction(serviceBFunction)],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Service C subscribes to same events
|
||||||
|
new events.Rule(this, 'ServiceCRule', {
|
||||||
|
eventBus,
|
||||||
|
eventPattern: {
|
||||||
|
source: ['service.a'],
|
||||||
|
detailType: ['EntityCreated'],
|
||||||
|
},
|
||||||
|
targets: [new targets.LambdaFunction(serviceCFunction)],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: API Gateway + SQS Integration
|
||||||
|
|
||||||
|
**Use case**: Async API requests without Lambda
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const queue = new sqs.Queue(this, 'RequestQueue');
|
||||||
|
|
||||||
|
const api = new apigateway.RestApi(this, 'Api');
|
||||||
|
|
||||||
|
// Direct SQS integration (no Lambda)
|
||||||
|
const sqsIntegration = new apigateway.AwsIntegration({
|
||||||
|
service: 'sqs',
|
||||||
|
path: `${process.env.AWS_ACCOUNT_ID}/${queue.queueName}`,
|
||||||
|
integrationHttpMethod: 'POST',
|
||||||
|
options: {
|
||||||
|
credentialsRole: sqsRole,
|
||||||
|
requestParameters: {
|
||||||
|
'integration.request.header.Content-Type': "'application/x-www-form-urlencoded'",
|
||||||
|
},
|
||||||
|
requestTemplates: {
|
||||||
|
'application/json': 'Action=SendMessage&MessageBody=$input.body',
|
||||||
|
},
|
||||||
|
integrationResponses: [{
|
||||||
|
statusCode: '200',
|
||||||
|
}],
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
api.root.addMethod('POST', sqsIntegration, {
|
||||||
|
methodResponses: [{ statusCode: '200' }],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: EventBridge + Step Functions
|
||||||
|
|
||||||
|
**Use case**: Event-triggered workflow orchestration
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// State machine for order processing
|
||||||
|
const orderStateMachine = new stepfunctions.StateMachine(this, 'OrderFlow', {
|
||||||
|
definition: /* ... */,
|
||||||
|
});
|
||||||
|
|
||||||
|
// EventBridge triggers state machine
|
||||||
|
new events.Rule(this, 'OrderPlacedRule', {
|
||||||
|
eventPattern: {
|
||||||
|
source: ['orders'],
|
||||||
|
detailType: ['OrderPlaced'],
|
||||||
|
},
|
||||||
|
targets: [new targets.SfnStateMachine(orderStateMachine)],
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Orchestration Patterns
|
||||||
|
|
||||||
|
### Pattern: Sequential Workflow
|
||||||
|
|
||||||
|
**Use case**: Multi-step process with dependencies
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const definition = new tasks.LambdaInvoke(this, 'Step1', {
|
||||||
|
lambdaFunction: step1Function,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
})
|
||||||
|
.next(new tasks.LambdaInvoke(this, 'Step2', {
|
||||||
|
lambdaFunction: step2Function,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
}))
|
||||||
|
.next(new tasks.LambdaInvoke(this, 'Step3', {
|
||||||
|
lambdaFunction: step3Function,
|
||||||
|
outputPath: '$.Payload',
|
||||||
|
}));
|
||||||
|
|
||||||
|
new stepfunctions.StateMachine(this, 'Sequential', {
|
||||||
|
definition,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Parallel Execution
|
||||||
|
|
||||||
|
**Use case**: Execute independent tasks concurrently
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const parallel = new stepfunctions.Parallel(this, 'ParallelProcessing');
|
||||||
|
|
||||||
|
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessA', {
|
||||||
|
lambdaFunction: functionA,
|
||||||
|
}));
|
||||||
|
|
||||||
|
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessB', {
|
||||||
|
lambdaFunction: functionB,
|
||||||
|
}));
|
||||||
|
|
||||||
|
parallel.branch(new tasks.LambdaInvoke(this, 'ProcessC', {
|
||||||
|
lambdaFunction: functionC,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const definition = parallel.next(new tasks.LambdaInvoke(this, 'Aggregate', {
|
||||||
|
lambdaFunction: aggregateFunction,
|
||||||
|
}));
|
||||||
|
|
||||||
|
new stepfunctions.StateMachine(this, 'Parallel', { definition });
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Map State (Dynamic Parallelism)
|
||||||
|
|
||||||
|
**Use case**: Process array of items in parallel
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const mapState = new stepfunctions.Map(this, 'ProcessItems', {
|
||||||
|
maxConcurrency: 10,
|
||||||
|
itemsPath: '$.items',
|
||||||
|
});
|
||||||
|
|
||||||
|
mapState.iterator(new tasks.LambdaInvoke(this, 'ProcessItem', {
|
||||||
|
lambdaFunction: processItemFunction,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const definition = mapState.next(new tasks.LambdaInvoke(this, 'Finalize', {
|
||||||
|
lambdaFunction: finalizeFunction,
|
||||||
|
}));
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Choice State (Conditional Logic)
|
||||||
|
|
||||||
|
**Use case**: Branching logic based on input
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
const choice = new stepfunctions.Choice(this, 'OrderType');
|
||||||
|
|
||||||
|
choice.when(
|
||||||
|
stepfunctions.Condition.stringEquals('$.orderType', 'STANDARD'),
|
||||||
|
standardProcessing
|
||||||
|
);
|
||||||
|
|
||||||
|
choice.when(
|
||||||
|
stepfunctions.Condition.stringEquals('$.orderType', 'EXPRESS'),
|
||||||
|
expressProcessing
|
||||||
|
);
|
||||||
|
|
||||||
|
choice.otherwise(defaultProcessing);
|
||||||
|
```
|
||||||
|
|
||||||
|
### Pattern: Wait State
|
||||||
|
|
||||||
|
**Use case**: Delay between steps or wait for callbacks
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Fixed delay
|
||||||
|
const wait = new stepfunctions.Wait(this, 'Wait30Seconds', {
|
||||||
|
time: stepfunctions.WaitTime.duration(Duration.seconds(30)),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait until timestamp
|
||||||
|
const waitUntil = new stepfunctions.Wait(this, 'WaitUntil', {
|
||||||
|
time: stepfunctions.WaitTime.timestampPath('$.expiryTime'),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Wait for callback (.waitForTaskToken)
|
||||||
|
const waitForCallback = new tasks.LambdaInvoke(this, 'WaitForApproval', {
|
||||||
|
lambdaFunction: approvalFunction,
|
||||||
|
integrationPattern: stepfunctions.IntegrationPattern.WAIT_FOR_TASK_TOKEN,
|
||||||
|
payload: stepfunctions.TaskInput.fromObject({
|
||||||
|
token: stepfunctions.JsonPath.taskToken,
|
||||||
|
data: stepfunctions.JsonPath.entirePayload,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Anti-Patterns
|
||||||
|
|
||||||
|
### ❌ Lambda Monolith
|
||||||
|
|
||||||
|
**Problem**: Single Lambda handling all operations
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// BAD
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
switch (event.operation) {
|
||||||
|
case 'createUser': return createUser(event);
|
||||||
|
case 'getUser': return getUser(event);
|
||||||
|
case 'updateUser': return updateUser(event);
|
||||||
|
case 'deleteUser': return deleteUser(event);
|
||||||
|
case 'createOrder': return createOrder(event);
|
||||||
|
// ... 20 more operations
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**: Separate Lambda functions per operation
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// GOOD - Separate functions
|
||||||
|
export const createUser = async (event: any) => { /* ... */ };
|
||||||
|
export const getUser = async (event: any) => { /* ... */ };
|
||||||
|
export const updateUser = async (event: any) => { /* ... */ };
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Recursive Lambda Pattern
|
||||||
|
|
||||||
|
**Problem**: Lambda invoking itself (runaway costs)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// BAD
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
await processItem(event);
|
||||||
|
|
||||||
|
if (hasMoreItems()) {
|
||||||
|
await lambda.invoke({
|
||||||
|
FunctionName: process.env.AWS_LAMBDA_FUNCTION_NAME,
|
||||||
|
InvocationType: 'Event',
|
||||||
|
Payload: JSON.stringify({ /* next batch */ }),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**: Use SQS or Step Functions
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// GOOD - Use SQS for iteration
|
||||||
|
export const handler = async (event: SQSEvent) => {
|
||||||
|
for (const record of event.Records) {
|
||||||
|
await processItem(record);
|
||||||
|
}
|
||||||
|
// SQS handles iteration automatically
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Lambda Chaining
|
||||||
|
|
||||||
|
**Problem**: Lambda directly invoking another Lambda
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// BAD
|
||||||
|
export const handler1 = async (event: any) => {
|
||||||
|
const result = await processStep1(event);
|
||||||
|
|
||||||
|
// Directly invoking next Lambda
|
||||||
|
await lambda.invoke({
|
||||||
|
FunctionName: 'handler2',
|
||||||
|
Payload: JSON.stringify(result),
|
||||||
|
});
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**: Use EventBridge, SQS, or Step Functions
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// GOOD - Publish to EventBridge
|
||||||
|
export const handler1 = async (event: any) => {
|
||||||
|
const result = await processStep1(event);
|
||||||
|
|
||||||
|
await eventBridge.putEvents({
|
||||||
|
Entries: [{
|
||||||
|
Source: 'service.step1',
|
||||||
|
DetailType: 'Step1Completed',
|
||||||
|
Detail: JSON.stringify(result),
|
||||||
|
}],
|
||||||
|
});
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Synchronous Waiting in Lambda
|
||||||
|
|
||||||
|
**Problem**: Lambda waiting for slow operations
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// BAD - Blocking on slow operation
|
||||||
|
export const handler = async (event: any) => {
|
||||||
|
await startBatchJob(); // Returns immediately
|
||||||
|
|
||||||
|
// Wait for job to complete (wastes Lambda time)
|
||||||
|
while (true) {
|
||||||
|
const status = await checkJobStatus();
|
||||||
|
if (status === 'COMPLETE') break;
|
||||||
|
await sleep(1000);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**: Use Step Functions with callback pattern
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// GOOD - Step Functions waits, not Lambda
|
||||||
|
const waitForJob = new tasks.LambdaInvoke(this, 'StartJob', {
|
||||||
|
lambdaFunction: startJobFunction,
|
||||||
|
integrationPattern: stepfunctions.IntegrationPattern.WAIT_FOR_TASK_TOKEN,
|
||||||
|
payload: stepfunctions.TaskInput.fromObject({
|
||||||
|
token: stepfunctions.JsonPath.taskToken,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### ❌ Large Deployment Packages
|
||||||
|
|
||||||
|
**Problem**: Large Lambda packages increase cold start time
|
||||||
|
|
||||||
|
**Solution**:
|
||||||
|
- Use layers for shared dependencies
|
||||||
|
- Externalize AWS SDK
|
||||||
|
- Minimize bundle size
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
entry: 'src/handler.ts',
|
||||||
|
bundling: {
|
||||||
|
minify: true,
|
||||||
|
externalModules: ['@aws-sdk/*'], // Provided by runtime
|
||||||
|
nodeModules: ['only-needed-deps'], // Selective bundling
|
||||||
|
},
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Performance Optimization
|
||||||
|
|
||||||
|
### Cold Start Optimization
|
||||||
|
|
||||||
|
**Techniques**:
|
||||||
|
1. Minimize package size
|
||||||
|
2. Use provisioned concurrency for critical paths
|
||||||
|
3. Lazy load dependencies
|
||||||
|
4. Reuse connections outside handler
|
||||||
|
5. Use Lambda SnapStart (Java)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// For latency-sensitive APIs
|
||||||
|
const apiFunction = new NodejsFunction(this, 'ApiFunction', {
|
||||||
|
entry: 'src/api.ts',
|
||||||
|
memorySize: 1769, // 1 vCPU for faster initialization
|
||||||
|
});
|
||||||
|
|
||||||
|
const alias = apiFunction.currentVersion.addAlias('live');
|
||||||
|
alias.addAutoScaling({
|
||||||
|
minCapacity: 2,
|
||||||
|
maxCapacity: 10,
|
||||||
|
}).scaleOnUtilization({
|
||||||
|
utilizationTarget: 0.7,
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Right-Sizing Memory
|
||||||
|
|
||||||
|
**Test different memory configurations**:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// CPU-bound workload
|
||||||
|
new NodejsFunction(this, 'ComputeFunction', {
|
||||||
|
memorySize: 1769, // 1 vCPU
|
||||||
|
timeout: Duration.seconds(30),
|
||||||
|
});
|
||||||
|
|
||||||
|
// I/O-bound workload
|
||||||
|
new NodejsFunction(this, 'IOFunction', {
|
||||||
|
memorySize: 512, // Less CPU needed
|
||||||
|
timeout: Duration.seconds(60),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simple operations
|
||||||
|
new NodejsFunction(this, 'SimpleFunction', {
|
||||||
|
memorySize: 256,
|
||||||
|
timeout: Duration.seconds(10),
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Concurrent Execution Control
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// Protect downstream services
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
reservedConcurrentExecutions: 10, // Max 10 concurrent
|
||||||
|
});
|
||||||
|
|
||||||
|
// Unreserved concurrency (shared pool)
|
||||||
|
new NodejsFunction(this, 'Function', {
|
||||||
|
// Uses unreserved account concurrency
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Strategies
|
||||||
|
|
||||||
|
### Unit Testing
|
||||||
|
|
||||||
|
Test business logic separate from AWS services:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// handler.ts
|
||||||
|
export const processOrder = async (order: Order): Promise<Result> => {
|
||||||
|
// Business logic (easily testable)
|
||||||
|
const validated = validateOrder(order);
|
||||||
|
const priced = calculatePrice(validated);
|
||||||
|
return transformResult(priced);
|
||||||
|
};
|
||||||
|
|
||||||
|
export const handler = async (event: any): Promise<any> => {
|
||||||
|
const order = parseEvent(event);
|
||||||
|
const result = await processOrder(order);
|
||||||
|
await saveToDatabase(result);
|
||||||
|
return formatResponse(result);
|
||||||
|
};
|
||||||
|
|
||||||
|
// handler.test.ts
|
||||||
|
test('processOrder calculates price correctly', () => {
|
||||||
|
const order = { items: [{ price: 10, quantity: 2 }] };
|
||||||
|
const result = processOrder(order);
|
||||||
|
expect(result.total).toBe(20);
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Integration Testing
|
||||||
|
|
||||||
|
Test with actual AWS services:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
// integration.test.ts
|
||||||
|
import { LambdaClient, InvokeCommand } from '@aws-sdk/client-lambda';
|
||||||
|
|
||||||
|
test('Lambda processes order correctly', async () => {
|
||||||
|
const lambda = new LambdaClient({});
|
||||||
|
|
||||||
|
const response = await lambda.send(new InvokeCommand({
|
||||||
|
FunctionName: process.env.FUNCTION_NAME,
|
||||||
|
Payload: JSON.stringify({ orderId: '123' }),
|
||||||
|
}));
|
||||||
|
|
||||||
|
const result = JSON.parse(Buffer.from(response.Payload!).toString());
|
||||||
|
expect(result.statusCode).toBe(200);
|
||||||
|
});
|
||||||
|
```
|
||||||
|
|
||||||
|
### Local Testing with SAM
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Test API locally
|
||||||
|
sam local start-api
|
||||||
|
|
||||||
|
# Invoke function locally
|
||||||
|
sam local invoke MyFunction -e events/test-event.json
|
||||||
|
|
||||||
|
# Generate sample event
|
||||||
|
sam local generate-event apigateway aws-proxy > event.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
- **Single Purpose**: One function, one responsibility
|
||||||
|
- **Concurrent Design**: Think concurrency, not volume
|
||||||
|
- **Stateless**: Use external storage for state
|
||||||
|
- **State Machines**: Orchestrate with Step Functions
|
||||||
|
- **Event-Driven**: Use events over direct calls
|
||||||
|
- **Idempotent**: Handle failures and duplicates gracefully
|
||||||
|
- **Observability**: Enable tracing and structured logging
|
||||||
Reference in New Issue
Block a user