Advanced Amazon Bedrock Topics

This document covers advanced patterns and topics for working with Amazon Bedrock using AWS SDK for Java 2.x.

Multi-Model Service Pattern

Create a service that can handle multiple foundation models with unified interfaces.

@Service
public class MultiModelAIService {

    private final BedrockRuntimeClient bedrockRuntimeClient;

    public MultiModelAIService(BedrockRuntimeClient bedrockRuntimeClient) {
        this.bedrockRuntimeClient = bedrockRuntimeClient;
    }

    public GenerationResult generate(GenerationRequest request) {
        String modelId = request.getModelId();
        String prompt = request.getPrompt();

        switch (getModelProvider(modelId)) {
            case ANTHROPIC:
                return generateWithAnthropic(modelId, prompt, request.getConfig());
            case AMAZON:
                return generateWithAmazon(modelId, prompt, request.getConfig());
            case META:
                return generateWithMeta(modelId, prompt, request.getConfig());
            default:
                throw new IllegalArgumentException("Unsupported model provider: " + modelId);
        }
    }

    private GenerationProvider getModelProvider(String modelId) {
        if (modelId.startsWith("anthropic.")) return GenerationProvider.ANTHROPIC;
        if (modelId.startsWith("amazon.")) return GenerationProvider.AMazon;
        if (modelId.startsWith("meta.")) return GenerationProvider.META;
        throw new IllegalArgumentException("Unknown provider for model: " + modelId);
    }
}

Advanced Error Handling with Retries

Implement robust error handling with exponential backoff:

import software.amazon.awssdk.core.retry.RetryPolicy;
import software.amazon.awssdk.core.retry.backoff.BackoffStrategy;
import software.amazon.awssdk.core.retry.conditions.RetryCondition;
import software.amazon.awssdk.core.retry.predicates.RetryExceptionPredicates;

public class BedrockWithRetry {

    private final BedrockRuntimeClient client;
    private final RetryPolicy retryPolicy;

    public BedrockWithRetry(BedrockRuntimeClient client) {
        this.client = client;
        this.retryPolicy = RetryPolicy.builder()
            .numRetries(3)
            .retryCondition(RetryExceptionPredicates.equalTo(
                ThrottlingException.class))
            .backoffStrategy(BackoffStrategy.defaultStrategy())
            .build();
    }

    public String invokeModelWithRetry(String modelId, String payload) {
        try {
            InvokeModelRequest request = InvokeModelRequest.builder()
                .modelId(modelId)
                .body(SdkBytes.fromUtf8String(payload))
                .build();

            InvokeModelResponse response = client.invokeModel(request);
            return response.body().asUtf8String();

        } catch (ThrottlingException e) {
            throw new BedrockThrottledException("Rate limit exceeded for model: " + modelId, e);
        } catch (ValidationException e) {
            throw new BedrockValidationException("Invalid request for model: " + modelId, e);
        }
    }
}

Batch Processing Strategies

Process multiple requests efficiently:

@Service
public class BatchGenerationService {

    private final BedrockRuntimeClient bedrockRuntimeClient;

    public BatchGenerationService(BedrockRuntimeClient bedrockRuntimeClient) {
        this.bedrockRuntimeClient = bedrockRuntimeClient;
    }

    public List<BatchResult> processBatch(List<BatchRequest> requests) {
        // Process in parallel
        return requests.parallelStream()
            .map(this::processSingleRequest)
            .collect(Collectors.toList());
    }

    private BatchResult processSingleRequest(BatchRequest request) {
        try {
            InvokeModelRequest modelRequest = InvokeModelRequest.builder()
                .modelId(request.getModelId())
                .body(SdkBytes.fromUtf8String(request.getPayload()))
                .build();

            InvokeModelResponse response = bedrockRuntimeClient.invokeModel(modelRequest);

            return BatchResult.success(
                request.getRequestId(),
                response.body().asUtf8String()
            );

        } catch (Exception e) {
            return BatchResult.failure(request.getRequestId(), e.getMessage());
        }
    }
}

Performance Optimization Techniques

Connection Pooling

import software.amazon.awssdk.http.nio.apache.ApacheHttpClient;
import software.amazon.awssdk.http.apache.ProxyConfiguration;
import software.amazon.awssdk.regions.Region;

public class BedrockClientFactory {

    public static BedrockRuntimeClient createOptimizedClient() {
        ApacheHttpClient httpClient = ApacheHttpClient.builder()
            .connectionPoolMaxConnections(50)
            .socketTimeout(Duration.ofSeconds(30))
            .connectionTimeout(Duration.ofSeconds(30))
            .build();

        return BedrockRuntimeClient.builder()
            .region(Region.US_EAST_1)
            .httpClient(httpClient)
            .build();
    }
}

Response Caching

import com.github.benmanes.caffeine.cache.Cache;
import com.github.benmanes.caffeine.cache.Caffeine;

@Service
public class CachedAIService {

    private final BedrockRuntimeClient bedrockRuntimeClient;
    private final Cache<String, String> responseCache;

    public CachedAIService(BedrockRuntimeClient bedrockRuntimeClient) {
        this.bedrockRuntimeClient = bedrockRuntimeClient;
        this.responseCache = Caffeine.newBuilder()
            .maximumSize(1000)
            .expireAfterWrite(1, TimeUnit.HOURS)
            .build();
    }

    public String generateText(String prompt, String modelId) {
        String cacheKey = modelId + ":" + prompt.hashCode();

        return responseCache.get(cacheKey, key -> {
            String payload = createPayload(modelId, prompt);
            InvokeModelRequest request = InvokeModelRequest.builder()
                .modelId(modelId)
                .body(SdkBytes.fromUtf8String(payload))
                .build();

            InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
            return response.body().asUtf8String();
        });
    }
}

Custom Response Parsing

Create specialized parsers for different model responses:

public interface ResponseParser {
    String parse(String responseJson);
}

public class AnthropicResponseParser implements ResponseParser {
    @Override
    public String parse(String responseJson) {
        try {
            JSONObject jsonResponse = new JSONObject(responseJson);
            return jsonResponse.getJSONArray("content")
                .getJSONObject(0)
                .getString("text");
        } catch (Exception e) {
            throw new ResponseParsingException("Failed to parse Anthropic response", e);
        }
    }
}

public class AmazonTitanResponseParser implements ResponseParser {
    @Override
    public String parse(String responseJson) {
        try {
            JSONObject jsonResponse = new JSONObject(responseJson);
            return jsonResponse.getJSONArray("results")
                .getJSONObject(0)
                .getString("outputText");
        } catch (Exception e) {
            throw new ResponseParsingException("Failed to parse Amazon Titan response", e);
        }
    }
}

public class LlamaResponseParser implements ResponseParser {
    @Override
    public String parse(String responseJson) {
        try {
            JSONObject jsonResponse = new JSONObject(responseJson);
            return jsonResponse.getString("generation");
        } catch (Exception e) {
            throw new ResponseParsingException("Failed to parse Llama response", e);
        }
    }
}

Metrics and Monitoring

Implement comprehensive monitoring:

import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Timer;

@Service
public class MonitoredAIService {

    private final BedrockRuntimeClient bedrockRuntimeClient;
    private final Timer generationTimer;
    private final Counter errorCounter;

    public MonitoredAIService(BedrockRuntimeClient bedrockRuntimeClient,
                             MeterRegistry meterRegistry) {
        this.bedrockRuntimeClient = bedrockRuntimeClient;
        this.generationTimer = Timer.builder("bedrock.generation.time")
            .description("Time spent generating text with Bedrock")
            .register(meterRegistry);
        this.errorCounter = Counter.builder("bedrock.generation.errors")
            .description("Number of generation errors")
            .register(meterRegistry);
    }

    public String generateText(String prompt, String modelId) {
        return generationTimer.record(() -> {
            try {
                String payload = createPayload(modelId, prompt);
                InvokeModelRequest request = InvokeModelRequest.builder()
                    .modelId(modelId)
                    .body(SdkBytes.fromUtf8String(payload))
                    .build();

                InvokeModelResponse response = bedrockRuntimeClient.invokeModel(request);
                return response.body().asUtf8String();

            } catch (Exception e) {
                errorCounter.increment();
                throw new GenerationException("Failed to generate text", e);
            }
        });
    }
}

Advanced Configuration Management

@Configuration
@ConfigurationProperties(prefix = "bedrock")
public class AdvancedBedrockConfiguration {

    private String defaultRegion = "us-east-1";
    private int maxRetries = 3;
    private Duration timeout = Duration.ofSeconds(30);
    private boolean enableMetrics = true;
    private int maxCacheSize = 1000;
    private Duration cacheExpireAfter = Duration.ofHours(1);

    @Bean
    @Primary
    public BedrockRuntimeClient bedrockRuntimeClient() {
        BedrockRuntimeClient.Builder builder = BedrockRuntimeClient.builder()
            .region(Region.of(defaultRegion));

        if (enableMetrics) {
            builder.overrideConfiguration(c -> c.putAdvancedProperty(
                "metrics.enabled", "true"));
        }

        return builder.build();
    }

    // Getters and setters
}

Streaming Response Handling

Advanced streaming with proper backpressure handling:

@Service
public class StreamingAIService {

    private final BedrockRuntimeClient bedrockRuntimeClient;

    public StreamingAIService(BedrockRuntimeClient bedrockRuntimeClient) {
        this.bedrockRuntimeClient = bedrockRuntimeClient;
    }

    public Flux<String> streamResponse(String modelId, String prompt) {
        InvokeModelWithResponseStreamRequest request =
            InvokeModelWithResponseStreamRequest.builder()
                .modelId(modelId)
                .body(SdkBytes.fromUtf8String(createPayload(modelId, prompt)))
                .build();

        return Mono.fromCallable(() ->
            bedrockRuntimeClient.invokeModelWithResponseStream(request))
            .flatMapMany(responseStream -> Flux.defer(() ->
                Flux.create(sink -> {
                    responseStream.stream().forEach(event -> {
                        if (event instanceof PayloadPart) {
                            PayloadPart payloadPart = (PayloadPart) event;
                            String chunk = payloadPart.bytes().asUtf8String();
                            processChunk(chunk, sink);
                        }
                    });
                    sink.complete();
                }))
            )
            .onErrorResume(e -> Flux.error(new StreamingException("Stream failed", e)));
    }

    private void processChunk(String chunk, FluxSink<String> sink) {
        try {
            JSONObject chunkJson = new JSONObject(chunk);
            if (chunkJson.getString("type").equals("content_block_delta")) {
                String text = chunkJson.getJSONObject("delta").getString("text");
                sink.next(text);
            }
        } catch (Exception e) {
            sink.error(new ChunkProcessingException("Failed to process chunk", e));
        }
    }
}

12 KiB Raw Blame History