Initial commit
This commit is contained in:
@@ -0,0 +1,372 @@
|
||||
# Advanced Amazon Bedrock Topics
|
||||
|
||||
This document covers advanced patterns and topics for working with Amazon Bedrock using AWS SDK for Java 2.x.
|
||||
|
||||
## Multi-Model Service Pattern
|
||||
|
||||
Create a service that can handle multiple foundation models with unified interfaces.
|
||||
|
||||
```java
|
||||
@Service
|
||||
public class MultiModelAIService {
|
||||
|
||||
private final BedrockRuntimeClient bedrockRuntimeClient;
|
||||
|
||||
public MultiModelAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
||||
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
||||
}
|
||||
|
||||
public GenerationResult generate(GenerationRequest request) {
|
||||
String modelId = request.getModelId();
|
||||
String prompt = request.getPrompt();
|
||||
|
||||
switch (getModelProvider(modelId)) {
|
||||
case ANTHROPIC:
|
||||
return generateWithAnthropic(modelId, prompt, request.getConfig());
|
||||
case AMAZON:
|
||||
return generateWithAmazon(modelId, prompt, request.getConfig());
|
||||
case META:
|
||||
return generateWithMeta(modelId, prompt, request.getConfig());
|
||||
default:
|
||||
throw new IllegalArgumentException("Unsupported model provider: " + modelId);
|
||||
}
|
||||
}
|
||||
|
||||
private GenerationProvider getModelProvider(String modelId) {
|
||||
if (modelId.startsWith("anthropic.")) return GenerationProvider.ANTHROPIC;
|
||||
if (modelId.startsWith("amazon.")) return GenerationProvider.AMazon;
|
||||
if (modelId.startsWith("meta.")) return GenerationProvider.META;
|
||||
throw new IllegalArgumentException("Unknown provider for model: " + modelId);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Error Handling with Retries
|
||||
|
||||
Implement robust error handling with exponential backoff:
|
||||
|
||||
```java
|
||||
import software.amazon.awssdk.core.retry.RetryPolicy;
|
||||
import software.amazon.awssdk.core.retry.backoff.BackoffStrategy;
|
||||
import software.amazon.awssdk.core.retry.conditions.RetryCondition;
|
||||
import software.amazon.awssdk.core.retry.predicates.RetryExceptionPredicates;
|
||||
|
||||
public class BedrockWithRetry {
|
||||
|
||||
private final BedrockRuntimeClient client;
|
||||
private final RetryPolicy retryPolicy;
|
||||
|
||||
public BedrockWithRetry(BedrockRuntimeClient client) {
|
||||
this.client = client;
|
||||
this.retryPolicy = RetryPolicy.builder()
|
||||
.numRetries(3)
|
||||
.retryCondition(RetryExceptionPredicates.equalTo(
|
||||
ThrottlingException.class))
|
||||
.backoffStrategy(BackoffStrategy.defaultStrategy())
|
||||
.build();
|
||||
}
|
||||
|
||||
public String invokeModelWithRetry(String modelId, String payload) {
|
||||
try {
|
||||
InvokeModelRequest request = InvokeModelRequest.builder()
|
||||
.modelId(modelId)
|
||||
.body(SdkBytes.fromUtf8String(payload))
|
||||
.build();
|
||||
|
||||
InvokeModelResponse response = client.invokeModel(request);
|
||||
return response.body().asUtf8String();
|
||||
|
||||
} catch (ThrottlingException e) {
|
||||
throw new BedrockThrottledException("Rate limit exceeded for model: " + modelId, e);
|
||||
} catch (ValidationException e) {
|
||||
throw new BedrockValidationException("Invalid request for model: " + modelId, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Batch Processing Strategies
|
||||
|
||||
Process multiple requests efficiently:
|
||||
|
||||
```java
|
||||
@Service
|
||||
public class BatchGenerationService {
|
||||
|
||||
private final BedrockRuntimeClient bedrockRuntimeClient;
|
||||
|
||||
public BatchGenerationService(BedrockRuntimeClient bedrockRuntimeClient) {
|
||||
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
||||
}
|
||||
|
||||
public List<BatchResult> processBatch(List<BatchRequest> requests) {
|
||||
// Process in parallel
|
||||
return requests.parallelStream()
|
||||
.map(this::processSingleRequest)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
private BatchResult processSingleRequest(BatchRequest request) {
|
||||
try {
|
||||
InvokeModelRequest modelRequest = InvokeModelRequest.builder()
|
||||
.modelId(request.getModelId())
|
||||
.body(SdkBytes.fromUtf8String(request.getPayload()))
|
||||
.build();
|
||||
|
||||
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(modelRequest);
|
||||
|
||||
return BatchResult.success(
|
||||
request.getRequestId(),
|
||||
response.body().asUtf8String()
|
||||
);
|
||||
|
||||
} catch (Exception e) {
|
||||
return BatchResult.failure(request.getRequestId(), e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Optimization Techniques
|
||||
|
||||
### Connection Pooling
|
||||
|
||||
```java
|
||||
import software.amazon.awssdk.http.nio.apache.ApacheHttpClient;
|
||||
import software.amazon.awssdk.http.apache.ProxyConfiguration;
|
||||
import software.amazon.awssdk.regions.Region;
|
||||
|
||||
public class BedrockClientFactory {
|
||||
|
||||
public static BedrockRuntimeClient createOptimizedClient() {
|
||||
ApacheHttpClient httpClient = ApacheHttpClient.builder()
|
||||
.connectionPoolMaxConnections(50)
|
||||
.socketTimeout(Duration.ofSeconds(30))
|
||||
.connectionTimeout(Duration.ofSeconds(30))
|
||||
.build();
|
||||
|
||||
return BedrockRuntimeClient.builder()
|
||||
.region(Region.US_EAST_1)
|
||||
.httpClient(httpClient)
|
||||
.build();
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Response Caching
|
||||
|
||||
```java
|
||||
import com.github.benmanes.caffeine.cache.Cache;
|
||||
import com.github.benmanes.caffeine.cache.Caffeine;
|
||||
|
||||
@Service
|
||||
public class CachedAIService {
|
||||
|
||||
private final BedrockRuntimeClient bedrockRuntimeClient;
|
||||
private final Cache<String, String> responseCache;
|
||||
|
||||
public CachedAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
||||
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
||||
this.responseCache = Caffeine.newBuilder()
|
||||
.maximumSize(1000)
|
||||
.expireAfterWrite(1, TimeUnit.HOURS)
|
||||
.build();
|
||||
}
|
||||
|
||||
public String generateText(String prompt, String modelId) {
|
||||
String cacheKey = modelId + ":" + prompt.hashCode();
|
||||
|
||||
return responseCache.get(cacheKey, key -> {
|
||||
String payload = createPayload(modelId, prompt);
|
||||
InvokeModelRequest request = InvokeModelRequest.builder()
|
||||
.modelId(modelId)
|
||||
.body(SdkBytes.fromUtf8String(payload))
|
||||
.build();
|
||||
|
||||
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
|
||||
return response.body().asUtf8String();
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Custom Response Parsing
|
||||
|
||||
Create specialized parsers for different model responses:
|
||||
|
||||
```java
|
||||
public interface ResponseParser {
|
||||
String parse(String responseJson);
|
||||
}
|
||||
|
||||
public class AnthropicResponseParser implements ResponseParser {
|
||||
@Override
|
||||
public String parse(String responseJson) {
|
||||
try {
|
||||
JSONObject jsonResponse = new JSONObject(responseJson);
|
||||
return jsonResponse.getJSONArray("content")
|
||||
.getJSONObject(0)
|
||||
.getString("text");
|
||||
} catch (Exception e) {
|
||||
throw new ResponseParsingException("Failed to parse Anthropic response", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class AmazonTitanResponseParser implements ResponseParser {
|
||||
@Override
|
||||
public String parse(String responseJson) {
|
||||
try {
|
||||
JSONObject jsonResponse = new JSONObject(responseJson);
|
||||
return jsonResponse.getJSONArray("results")
|
||||
.getJSONObject(0)
|
||||
.getString("outputText");
|
||||
} catch (Exception e) {
|
||||
throw new ResponseParsingException("Failed to parse Amazon Titan response", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public class LlamaResponseParser implements ResponseParser {
|
||||
@Override
|
||||
public String parse(String responseJson) {
|
||||
try {
|
||||
JSONObject jsonResponse = new JSONObject(responseJson);
|
||||
return jsonResponse.getString("generation");
|
||||
} catch (Exception e) {
|
||||
throw new ResponseParsingException("Failed to parse Llama response", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Metrics and Monitoring
|
||||
|
||||
Implement comprehensive monitoring:
|
||||
|
||||
```java
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
|
||||
@Service
|
||||
public class MonitoredAIService {
|
||||
|
||||
private final BedrockRuntimeClient bedrockRuntimeClient;
|
||||
private final Timer generationTimer;
|
||||
private final Counter errorCounter;
|
||||
|
||||
public MonitoredAIService(BedrockRuntimeClient bedrockRuntimeClient,
|
||||
MeterRegistry meterRegistry) {
|
||||
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
||||
this.generationTimer = Timer.builder("bedrock.generation.time")
|
||||
.description("Time spent generating text with Bedrock")
|
||||
.register(meterRegistry);
|
||||
this.errorCounter = Counter.builder("bedrock.generation.errors")
|
||||
.description("Number of generation errors")
|
||||
.register(meterRegistry);
|
||||
}
|
||||
|
||||
public String generateText(String prompt, String modelId) {
|
||||
return generationTimer.record(() -> {
|
||||
try {
|
||||
String payload = createPayload(modelId, prompt);
|
||||
InvokeModelRequest request = InvokeModelRequest.builder()
|
||||
.modelId(modelId)
|
||||
.body(SdkBytes.fromUtf8String(payload))
|
||||
.build();
|
||||
|
||||
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
|
||||
return response.body().asUtf8String();
|
||||
|
||||
} catch (Exception e) {
|
||||
errorCounter.increment();
|
||||
throw new GenerationException("Failed to generate text", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Advanced Configuration Management
|
||||
|
||||
```java
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "bedrock")
|
||||
public class AdvancedBedrockConfiguration {
|
||||
|
||||
private String defaultRegion = "us-east-1";
|
||||
private int maxRetries = 3;
|
||||
private Duration timeout = Duration.ofSeconds(30);
|
||||
private boolean enableMetrics = true;
|
||||
private int maxCacheSize = 1000;
|
||||
private Duration cacheExpireAfter = Duration.ofHours(1);
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
public BedrockRuntimeClient bedrockRuntimeClient() {
|
||||
BedrockRuntimeClient.Builder builder = BedrockRuntimeClient.builder()
|
||||
.region(Region.of(defaultRegion));
|
||||
|
||||
if (enableMetrics) {
|
||||
builder.overrideConfiguration(c -> c.putAdvancedProperty(
|
||||
"metrics.enabled", "true"));
|
||||
}
|
||||
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
// Getters and setters
|
||||
}
|
||||
```
|
||||
|
||||
## Streaming Response Handling
|
||||
|
||||
Advanced streaming with proper backpressure handling:
|
||||
|
||||
```java
|
||||
@Service
|
||||
public class StreamingAIService {
|
||||
|
||||
private final BedrockRuntimeClient bedrockRuntimeClient;
|
||||
|
||||
public StreamingAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
||||
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
||||
}
|
||||
|
||||
public Flux<String> streamResponse(String modelId, String prompt) {
|
||||
InvokeModelWithResponseStreamRequest request =
|
||||
InvokeModelWithResponseStreamRequest.builder()
|
||||
.modelId(modelId)
|
||||
.body(SdkBytes.fromUtf8String(createPayload(modelId, prompt)))
|
||||
.build();
|
||||
|
||||
return Mono.fromCallable(() ->
|
||||
bedrockRuntimeClient.invokeModelWithResponseStream(request))
|
||||
.flatMapMany(responseStream -> Flux.defer(() ->
|
||||
Flux.create(sink -> {
|
||||
responseStream.stream().forEach(event -> {
|
||||
if (event instanceof PayloadPart) {
|
||||
PayloadPart payloadPart = (PayloadPart) event;
|
||||
String chunk = payloadPart.bytes().asUtf8String();
|
||||
processChunk(chunk, sink);
|
||||
}
|
||||
});
|
||||
sink.complete();
|
||||
}))
|
||||
)
|
||||
.onErrorResume(e -> Flux.error(new StreamingException("Stream failed", e)));
|
||||
}
|
||||
|
||||
private void processChunk(String chunk, FluxSink<String> sink) {
|
||||
try {
|
||||
JSONObject chunkJson = new JSONObject(chunk);
|
||||
if (chunkJson.getString("type").equals("content_block_delta")) {
|
||||
String text = chunkJson.getJSONObject("delta").getString("text");
|
||||
sink.next(text);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
sink.error(new ChunkProcessingException("Failed to process chunk", e));
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
Reference in New Issue
Block a user