372 lines
12 KiB
Markdown
372 lines
12 KiB
Markdown
# Advanced Amazon Bedrock Topics
|
|
|
|
This document covers advanced patterns and topics for working with Amazon Bedrock using AWS SDK for Java 2.x.
|
|
|
|
## Multi-Model Service Pattern
|
|
|
|
Create a service that can handle multiple foundation models with unified interfaces.
|
|
|
|
```java
|
|
@Service
|
|
public class MultiModelAIService {
|
|
|
|
private final BedrockRuntimeClient bedrockRuntimeClient;
|
|
|
|
public MultiModelAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
|
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
|
}
|
|
|
|
public GenerationResult generate(GenerationRequest request) {
|
|
String modelId = request.getModelId();
|
|
String prompt = request.getPrompt();
|
|
|
|
switch (getModelProvider(modelId)) {
|
|
case ANTHROPIC:
|
|
return generateWithAnthropic(modelId, prompt, request.getConfig());
|
|
case AMAZON:
|
|
return generateWithAmazon(modelId, prompt, request.getConfig());
|
|
case META:
|
|
return generateWithMeta(modelId, prompt, request.getConfig());
|
|
default:
|
|
throw new IllegalArgumentException("Unsupported model provider: " + modelId);
|
|
}
|
|
}
|
|
|
|
private GenerationProvider getModelProvider(String modelId) {
|
|
if (modelId.startsWith("anthropic.")) return GenerationProvider.ANTHROPIC;
|
|
if (modelId.startsWith("amazon.")) return GenerationProvider.AMazon;
|
|
if (modelId.startsWith("meta.")) return GenerationProvider.META;
|
|
throw new IllegalArgumentException("Unknown provider for model: " + modelId);
|
|
}
|
|
}
|
|
```
|
|
|
|
## Advanced Error Handling with Retries
|
|
|
|
Implement robust error handling with exponential backoff:
|
|
|
|
```java
|
|
import software.amazon.awssdk.core.retry.RetryPolicy;
|
|
import software.amazon.awssdk.core.retry.backoff.BackoffStrategy;
|
|
import software.amazon.awssdk.core.retry.conditions.RetryCondition;
|
|
import software.amazon.awssdk.core.retry.predicates.RetryExceptionPredicates;
|
|
|
|
public class BedrockWithRetry {
|
|
|
|
private final BedrockRuntimeClient client;
|
|
private final RetryPolicy retryPolicy;
|
|
|
|
public BedrockWithRetry(BedrockRuntimeClient client) {
|
|
this.client = client;
|
|
this.retryPolicy = RetryPolicy.builder()
|
|
.numRetries(3)
|
|
.retryCondition(RetryExceptionPredicates.equalTo(
|
|
ThrottlingException.class))
|
|
.backoffStrategy(BackoffStrategy.defaultStrategy())
|
|
.build();
|
|
}
|
|
|
|
public String invokeModelWithRetry(String modelId, String payload) {
|
|
try {
|
|
InvokeModelRequest request = InvokeModelRequest.builder()
|
|
.modelId(modelId)
|
|
.body(SdkBytes.fromUtf8String(payload))
|
|
.build();
|
|
|
|
InvokeModelResponse response = client.invokeModel(request);
|
|
return response.body().asUtf8String();
|
|
|
|
} catch (ThrottlingException e) {
|
|
throw new BedrockThrottledException("Rate limit exceeded for model: " + modelId, e);
|
|
} catch (ValidationException e) {
|
|
throw new BedrockValidationException("Invalid request for model: " + modelId, e);
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
## Batch Processing Strategies
|
|
|
|
Process multiple requests efficiently:
|
|
|
|
```java
|
|
@Service
|
|
public class BatchGenerationService {
|
|
|
|
private final BedrockRuntimeClient bedrockRuntimeClient;
|
|
|
|
public BatchGenerationService(BedrockRuntimeClient bedrockRuntimeClient) {
|
|
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
|
}
|
|
|
|
public List<BatchResult> processBatch(List<BatchRequest> requests) {
|
|
// Process in parallel
|
|
return requests.parallelStream()
|
|
.map(this::processSingleRequest)
|
|
.collect(Collectors.toList());
|
|
}
|
|
|
|
private BatchResult processSingleRequest(BatchRequest request) {
|
|
try {
|
|
InvokeModelRequest modelRequest = InvokeModelRequest.builder()
|
|
.modelId(request.getModelId())
|
|
.body(SdkBytes.fromUtf8String(request.getPayload()))
|
|
.build();
|
|
|
|
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(modelRequest);
|
|
|
|
return BatchResult.success(
|
|
request.getRequestId(),
|
|
response.body().asUtf8String()
|
|
);
|
|
|
|
} catch (Exception e) {
|
|
return BatchResult.failure(request.getRequestId(), e.getMessage());
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
## Performance Optimization Techniques
|
|
|
|
### Connection Pooling
|
|
|
|
```java
|
|
import software.amazon.awssdk.http.nio.apache.ApacheHttpClient;
|
|
import software.amazon.awssdk.http.apache.ProxyConfiguration;
|
|
import software.amazon.awssdk.regions.Region;
|
|
|
|
public class BedrockClientFactory {
|
|
|
|
public static BedrockRuntimeClient createOptimizedClient() {
|
|
ApacheHttpClient httpClient = ApacheHttpClient.builder()
|
|
.connectionPoolMaxConnections(50)
|
|
.socketTimeout(Duration.ofSeconds(30))
|
|
.connectionTimeout(Duration.ofSeconds(30))
|
|
.build();
|
|
|
|
return BedrockRuntimeClient.builder()
|
|
.region(Region.US_EAST_1)
|
|
.httpClient(httpClient)
|
|
.build();
|
|
}
|
|
}
|
|
```
|
|
|
|
### Response Caching
|
|
|
|
```java
|
|
import com.github.benmanes.caffeine.cache.Cache;
|
|
import com.github.benmanes.caffeine.cache.Caffeine;
|
|
|
|
@Service
|
|
public class CachedAIService {
|
|
|
|
private final BedrockRuntimeClient bedrockRuntimeClient;
|
|
private final Cache<String, String> responseCache;
|
|
|
|
public CachedAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
|
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
|
this.responseCache = Caffeine.newBuilder()
|
|
.maximumSize(1000)
|
|
.expireAfterWrite(1, TimeUnit.HOURS)
|
|
.build();
|
|
}
|
|
|
|
public String generateText(String prompt, String modelId) {
|
|
String cacheKey = modelId + ":" + prompt.hashCode();
|
|
|
|
return responseCache.get(cacheKey, key -> {
|
|
String payload = createPayload(modelId, prompt);
|
|
InvokeModelRequest request = InvokeModelRequest.builder()
|
|
.modelId(modelId)
|
|
.body(SdkBytes.fromUtf8String(payload))
|
|
.build();
|
|
|
|
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
|
|
return response.body().asUtf8String();
|
|
});
|
|
}
|
|
}
|
|
```
|
|
|
|
## Custom Response Parsing
|
|
|
|
Create specialized parsers for different model responses:
|
|
|
|
```java
|
|
public interface ResponseParser {
|
|
String parse(String responseJson);
|
|
}
|
|
|
|
public class AnthropicResponseParser implements ResponseParser {
|
|
@Override
|
|
public String parse(String responseJson) {
|
|
try {
|
|
JSONObject jsonResponse = new JSONObject(responseJson);
|
|
return jsonResponse.getJSONArray("content")
|
|
.getJSONObject(0)
|
|
.getString("text");
|
|
} catch (Exception e) {
|
|
throw new ResponseParsingException("Failed to parse Anthropic response", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
public class AmazonTitanResponseParser implements ResponseParser {
|
|
@Override
|
|
public String parse(String responseJson) {
|
|
try {
|
|
JSONObject jsonResponse = new JSONObject(responseJson);
|
|
return jsonResponse.getJSONArray("results")
|
|
.getJSONObject(0)
|
|
.getString("outputText");
|
|
} catch (Exception e) {
|
|
throw new ResponseParsingException("Failed to parse Amazon Titan response", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
public class LlamaResponseParser implements ResponseParser {
|
|
@Override
|
|
public String parse(String responseJson) {
|
|
try {
|
|
JSONObject jsonResponse = new JSONObject(responseJson);
|
|
return jsonResponse.getString("generation");
|
|
} catch (Exception e) {
|
|
throw new ResponseParsingException("Failed to parse Llama response", e);
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
## Metrics and Monitoring
|
|
|
|
Implement comprehensive monitoring:
|
|
|
|
```java
|
|
import io.micrometer.core.instrument.MeterRegistry;
|
|
import io.micrometer.core.instrument.Timer;
|
|
|
|
@Service
|
|
public class MonitoredAIService {
|
|
|
|
private final BedrockRuntimeClient bedrockRuntimeClient;
|
|
private final Timer generationTimer;
|
|
private final Counter errorCounter;
|
|
|
|
public MonitoredAIService(BedrockRuntimeClient bedrockRuntimeClient,
|
|
MeterRegistry meterRegistry) {
|
|
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
|
this.generationTimer = Timer.builder("bedrock.generation.time")
|
|
.description("Time spent generating text with Bedrock")
|
|
.register(meterRegistry);
|
|
this.errorCounter = Counter.builder("bedrock.generation.errors")
|
|
.description("Number of generation errors")
|
|
.register(meterRegistry);
|
|
}
|
|
|
|
public String generateText(String prompt, String modelId) {
|
|
return generationTimer.record(() -> {
|
|
try {
|
|
String payload = createPayload(modelId, prompt);
|
|
InvokeModelRequest request = InvokeModelRequest.builder()
|
|
.modelId(modelId)
|
|
.body(SdkBytes.fromUtf8String(payload))
|
|
.build();
|
|
|
|
InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
|
|
return response.body().asUtf8String();
|
|
|
|
} catch (Exception e) {
|
|
errorCounter.increment();
|
|
throw new GenerationException("Failed to generate text", e);
|
|
}
|
|
});
|
|
}
|
|
}
|
|
```
|
|
|
|
## Advanced Configuration Management
|
|
|
|
```java
|
|
@Configuration
|
|
@ConfigurationProperties(prefix = "bedrock")
|
|
public class AdvancedBedrockConfiguration {
|
|
|
|
private String defaultRegion = "us-east-1";
|
|
private int maxRetries = 3;
|
|
private Duration timeout = Duration.ofSeconds(30);
|
|
private boolean enableMetrics = true;
|
|
private int maxCacheSize = 1000;
|
|
private Duration cacheExpireAfter = Duration.ofHours(1);
|
|
|
|
@Bean
|
|
@Primary
|
|
public BedrockRuntimeClient bedrockRuntimeClient() {
|
|
BedrockRuntimeClient.Builder builder = BedrockRuntimeClient.builder()
|
|
.region(Region.of(defaultRegion));
|
|
|
|
if (enableMetrics) {
|
|
builder.overrideConfiguration(c -> c.putAdvancedProperty(
|
|
"metrics.enabled", "true"));
|
|
}
|
|
|
|
return builder.build();
|
|
}
|
|
|
|
// Getters and setters
|
|
}
|
|
```
|
|
|
|
## Streaming Response Handling
|
|
|
|
Advanced streaming with proper backpressure handling:
|
|
|
|
```java
|
|
@Service
|
|
public class StreamingAIService {
|
|
|
|
private final BedrockRuntimeClient bedrockRuntimeClient;
|
|
|
|
public StreamingAIService(BedrockRuntimeClient bedrockRuntimeClient) {
|
|
this.bedrockRuntimeClient = bedrockRuntimeClient;
|
|
}
|
|
|
|
public Flux<String> streamResponse(String modelId, String prompt) {
|
|
InvokeModelWithResponseStreamRequest request =
|
|
InvokeModelWithResponseStreamRequest.builder()
|
|
.modelId(modelId)
|
|
.body(SdkBytes.fromUtf8String(createPayload(modelId, prompt)))
|
|
.build();
|
|
|
|
return Mono.fromCallable(() ->
|
|
bedrockRuntimeClient.invokeModelWithResponseStream(request))
|
|
.flatMapMany(responseStream -> Flux.defer(() ->
|
|
Flux.create(sink -> {
|
|
responseStream.stream().forEach(event -> {
|
|
if (event instanceof PayloadPart) {
|
|
PayloadPart payloadPart = (PayloadPart) event;
|
|
String chunk = payloadPart.bytes().asUtf8String();
|
|
processChunk(chunk, sink);
|
|
}
|
|
});
|
|
sink.complete();
|
|
}))
|
|
)
|
|
.onErrorResume(e -> Flux.error(new StreamingException("Stream failed", e)));
|
|
}
|
|
|
|
private void processChunk(String chunk, FluxSink<String> sink) {
|
|
try {
|
|
JSONObject chunkJson = new JSONObject(chunk);
|
|
if (chunkJson.getString("type").equals("content_block_delta")) {
|
|
String text = chunkJson.getJSONObject("delta").getString("text");
|
|
sink.next(text);
|
|
}
|
|
} catch (Exception e) {
|
|
sink.error(new ChunkProcessingException("Failed to process chunk", e));
|
|
}
|
|
}
|
|
}
|
|
``` |