Initial commit
This commit is contained in:
528
skills/tokio-concurrency/SKILL.md
Normal file
528
skills/tokio-concurrency/SKILL.md
Normal file
@@ -0,0 +1,528 @@
|
||||
---
|
||||
name: tokio-concurrency
|
||||
description: Advanced concurrency patterns for Tokio including fan-out/fan-in, pipeline processing, rate limiting, and coordinated shutdown. Use when building high-concurrency async systems.
|
||||
---
|
||||
|
||||
# Tokio Concurrency Patterns
|
||||
|
||||
This skill provides advanced concurrency patterns for building scalable async applications with Tokio.
|
||||
|
||||
## Fan-Out/Fan-In Pattern
|
||||
|
||||
Distribute work across multiple workers and collect results:
|
||||
|
||||
```rust
|
||||
use futures::stream::{self, StreamExt};
|
||||
|
||||
pub async fn fan_out_fan_in<T, R>(
|
||||
items: Vec<T>,
|
||||
concurrency: usize,
|
||||
process: impl Fn(T) -> Pin<Box<dyn Future<Output = R> + Send>> + Send + Sync + 'static,
|
||||
) -> Vec<R>
|
||||
where
|
||||
T: Send + 'static,
|
||||
R: Send + 'static,
|
||||
{
|
||||
stream::iter(items)
|
||||
.map(|item| process(item))
|
||||
.buffer_unordered(concurrency)
|
||||
.collect()
|
||||
.await
|
||||
}
|
||||
|
||||
// Usage
|
||||
let results = fan_out_fan_in(
|
||||
items,
|
||||
10,
|
||||
|item| Box::pin(async move { process_item(item).await })
|
||||
).await;
|
||||
```
|
||||
|
||||
## Pipeline Processing
|
||||
|
||||
Chain async processing stages:
|
||||
|
||||
```rust
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
pub struct Pipeline<T> {
|
||||
stages: Vec<Box<dyn Stage<T>>>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
pub trait Stage<T>: Send {
|
||||
async fn process(&self, item: T) -> T;
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Pipeline<T> {
|
||||
pub fn new() -> Self {
|
||||
Self { stages: Vec::new() }
|
||||
}
|
||||
|
||||
pub fn add_stage<S: Stage<T> + 'static>(mut self, stage: S) -> Self {
|
||||
self.stages.push(Box::new(stage));
|
||||
self
|
||||
}
|
||||
|
||||
pub async fn run(self, mut input: mpsc::Receiver<T>) -> mpsc::Receiver<T> {
|
||||
let (tx, rx) = mpsc::channel(100);
|
||||
|
||||
tokio::spawn(async move {
|
||||
while let Some(mut item) = input.recv().await {
|
||||
// Process through all stages
|
||||
for stage in &self.stages {
|
||||
item = stage.process(item).await;
|
||||
}
|
||||
|
||||
if tx.send(item).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
rx
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let pipeline = Pipeline::new()
|
||||
.add_stage(ValidationStage)
|
||||
.add_stage(TransformStage)
|
||||
.add_stage(EnrichmentStage);
|
||||
|
||||
let output = pipeline.run(input_channel).await;
|
||||
```
|
||||
|
||||
## Rate Limiting
|
||||
|
||||
Control operation rate using token bucket or leaky bucket:
|
||||
|
||||
```rust
|
||||
use tokio::time::{interval, Duration, Instant};
|
||||
use tokio::sync::Semaphore;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct RateLimiter {
|
||||
semaphore: Arc<Semaphore>,
|
||||
rate: usize,
|
||||
period: Duration,
|
||||
}
|
||||
|
||||
impl RateLimiter {
|
||||
pub fn new(rate: usize, period: Duration) -> Self {
|
||||
let limiter = Self {
|
||||
semaphore: Arc::new(Semaphore::new(rate)),
|
||||
rate,
|
||||
period,
|
||||
};
|
||||
|
||||
// Refill tokens
|
||||
let semaphore = limiter.semaphore.clone();
|
||||
let rate = limiter.rate;
|
||||
let period = limiter.period;
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut interval = interval(period);
|
||||
loop {
|
||||
interval.tick().await;
|
||||
// Add permits up to max
|
||||
for _ in 0..rate {
|
||||
if semaphore.available_permits() < rate {
|
||||
semaphore.add_permits(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
limiter
|
||||
}
|
||||
|
||||
pub async fn acquire(&self) {
|
||||
self.semaphore.acquire().await.unwrap().forget();
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let limiter = RateLimiter::new(100, Duration::from_secs(1));
|
||||
|
||||
for _ in 0..1000 {
|
||||
limiter.acquire().await;
|
||||
make_request().await;
|
||||
}
|
||||
```
|
||||
|
||||
## Parallel Task Execution with Join
|
||||
|
||||
Execute multiple tasks in parallel and wait for all:
|
||||
|
||||
```rust
|
||||
use tokio::try_join;
|
||||
|
||||
pub async fn parallel_operations() -> Result<(String, Vec<User>, Config), Error> {
|
||||
try_join!(
|
||||
fetch_data(),
|
||||
fetch_users(),
|
||||
load_config()
|
||||
)
|
||||
}
|
||||
|
||||
// With manual spawning for CPU-bound work
|
||||
pub async fn parallel_cpu_work(items: Vec<Item>) -> Vec<Result<Processed, Error>> {
|
||||
let handles: Vec<_> = items
|
||||
.into_iter()
|
||||
.map(|item| {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
expensive_cpu_work(item)
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut results = Vec::new();
|
||||
for handle in handles {
|
||||
results.push(handle.await.unwrap());
|
||||
}
|
||||
results
|
||||
}
|
||||
```
|
||||
|
||||
## Coordinated Shutdown with CancellationToken
|
||||
|
||||
Manage hierarchical cancellation:
|
||||
|
||||
```rust
|
||||
use tokio_util::sync::CancellationToken;
|
||||
use tokio::select;
|
||||
|
||||
pub struct Coordinator {
|
||||
token: CancellationToken,
|
||||
tasks: Vec<tokio::task::JoinHandle<()>>,
|
||||
}
|
||||
|
||||
impl Coordinator {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
token: CancellationToken::new(),
|
||||
tasks: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn spawn<F>(&mut self, f: F)
|
||||
where
|
||||
F: Future<Output = ()> + Send + 'static,
|
||||
{
|
||||
let token = self.token.child_token();
|
||||
let handle = tokio::spawn(async move {
|
||||
select! {
|
||||
_ = token.cancelled() => {}
|
||||
_ = f => {}
|
||||
}
|
||||
});
|
||||
self.tasks.push(handle);
|
||||
}
|
||||
|
||||
pub async fn shutdown(self) {
|
||||
self.token.cancel();
|
||||
|
||||
for task in self.tasks {
|
||||
let _ = task.await;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let mut coordinator = Coordinator::new();
|
||||
|
||||
coordinator.spawn(worker1());
|
||||
coordinator.spawn(worker2());
|
||||
coordinator.spawn(worker3());
|
||||
|
||||
// Later...
|
||||
coordinator.shutdown().await;
|
||||
```
|
||||
|
||||
## Async Trait Patterns
|
||||
|
||||
Work around async trait limitations:
|
||||
|
||||
```rust
|
||||
use async_trait::async_trait;
|
||||
|
||||
#[async_trait]
|
||||
pub trait AsyncService {
|
||||
async fn process(&self, input: String) -> Result<String, Error>;
|
||||
}
|
||||
|
||||
// Alternative without async-trait
|
||||
pub trait AsyncServiceManual {
|
||||
fn process<'a>(
|
||||
&'a self,
|
||||
input: String,
|
||||
) -> Pin<Box<dyn Future<Output = Result<String, Error>> + Send + 'a>>;
|
||||
}
|
||||
|
||||
// Implementation
|
||||
struct MyService;
|
||||
|
||||
#[async_trait]
|
||||
impl AsyncService for MyService {
|
||||
async fn process(&self, input: String) -> Result<String, Error> {
|
||||
// async implementation
|
||||
Ok(input.to_uppercase())
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Shared State Management
|
||||
|
||||
Safe concurrent access to shared state:
|
||||
|
||||
```rust
|
||||
use tokio::sync::RwLock;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct SharedState {
|
||||
data: Arc<RwLock<HashMap<String, String>>>,
|
||||
}
|
||||
|
||||
impl SharedState {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
data: Arc::new(RwLock::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: &str) -> Option<String> {
|
||||
let data = self.data.read().await;
|
||||
data.get(key).cloned()
|
||||
}
|
||||
|
||||
pub async fn set(&self, key: String, value: String) {
|
||||
let mut data = self.data.write().await;
|
||||
data.insert(key, value);
|
||||
}
|
||||
|
||||
// Batch operations
|
||||
pub async fn get_many(&self, keys: &[String]) -> Vec<Option<String>> {
|
||||
let data = self.data.read().await;
|
||||
keys.iter()
|
||||
.map(|key| data.get(key).cloned())
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
// Clone is cheap (Arc)
|
||||
impl Clone for SharedState {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
data: self.data.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Work Stealing Queue
|
||||
|
||||
Implement work stealing for load balancing:
|
||||
|
||||
```rust
|
||||
use tokio::sync::mpsc;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct WorkQueue<T> {
|
||||
queues: Vec<mpsc::Sender<T>>,
|
||||
receivers: Vec<mpsc::Receiver<T>>,
|
||||
next: Arc<AtomicUsize>,
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> WorkQueue<T> {
|
||||
pub fn new(workers: usize, capacity: usize) -> Self {
|
||||
let mut queues = Vec::new();
|
||||
let mut receivers = Vec::new();
|
||||
|
||||
for _ in 0..workers {
|
||||
let (tx, rx) = mpsc::channel(capacity);
|
||||
queues.push(tx);
|
||||
receivers.push(rx);
|
||||
}
|
||||
|
||||
Self {
|
||||
queues,
|
||||
receivers,
|
||||
next: Arc::new(AtomicUsize::new(0)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn submit(&self, work: T) -> Result<(), mpsc::error::SendError<T>> {
|
||||
let idx = self.next.fetch_add(1, Ordering::Relaxed) % self.queues.len();
|
||||
self.queues[idx].send(work).await
|
||||
}
|
||||
|
||||
pub fn spawn_workers<F>(mut self, process: F)
|
||||
where
|
||||
F: Fn(T) -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + Sync + Clone + 'static,
|
||||
{
|
||||
for mut rx in self.receivers.drain(..) {
|
||||
let process = process.clone();
|
||||
tokio::spawn(async move {
|
||||
while let Some(work) = rx.recv().await {
|
||||
process(work).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Circuit Breaker for Resilience
|
||||
|
||||
Prevent cascading failures:
|
||||
|
||||
```rust
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use tokio::time::{Instant, Duration};
|
||||
|
||||
pub enum CircuitState {
|
||||
Closed,
|
||||
Open(Instant),
|
||||
HalfOpen,
|
||||
}
|
||||
|
||||
pub struct CircuitBreaker {
|
||||
state: Arc<RwLock<CircuitState>>,
|
||||
failure_count: AtomicU64,
|
||||
threshold: u64,
|
||||
timeout: Duration,
|
||||
}
|
||||
|
||||
impl CircuitBreaker {
|
||||
pub fn new(threshold: u64, timeout: Duration) -> Self {
|
||||
Self {
|
||||
state: Arc::new(RwLock::new(CircuitState::Closed)),
|
||||
failure_count: AtomicU64::new(0),
|
||||
threshold,
|
||||
timeout,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn call<F, T, E>(&self, f: F) -> Result<T, CircuitBreakerError<E>>
|
||||
where
|
||||
F: Future<Output = Result<T, E>>,
|
||||
{
|
||||
// Check if circuit is open
|
||||
let state = self.state.read().await;
|
||||
match *state {
|
||||
CircuitState::Open(opened_at) => {
|
||||
if opened_at.elapsed() < self.timeout {
|
||||
return Err(CircuitBreakerError::Open);
|
||||
}
|
||||
drop(state);
|
||||
*self.state.write().await = CircuitState::HalfOpen;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
drop(state);
|
||||
|
||||
// Execute request
|
||||
match f.await {
|
||||
Ok(result) => {
|
||||
self.on_success().await;
|
||||
Ok(result)
|
||||
}
|
||||
Err(e) => {
|
||||
self.on_failure().await;
|
||||
Err(CircuitBreakerError::Inner(e))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_success(&self) {
|
||||
self.failure_count.store(0, Ordering::SeqCst);
|
||||
let mut state = self.state.write().await;
|
||||
if matches!(*state, CircuitState::HalfOpen) {
|
||||
*state = CircuitState::Closed;
|
||||
}
|
||||
}
|
||||
|
||||
async fn on_failure(&self) {
|
||||
let failures = self.failure_count.fetch_add(1, Ordering::SeqCst) + 1;
|
||||
if failures >= self.threshold {
|
||||
*self.state.write().await = CircuitState::Open(Instant::now());
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Batching Operations
|
||||
|
||||
Batch multiple operations for efficiency:
|
||||
|
||||
```rust
|
||||
use tokio::time::{interval, Duration};
|
||||
|
||||
pub struct Batcher<T> {
|
||||
tx: mpsc::Sender<T>,
|
||||
}
|
||||
|
||||
impl<T: Send + 'static> Batcher<T> {
|
||||
pub fn new<F>(
|
||||
batch_size: usize,
|
||||
batch_timeout: Duration,
|
||||
process: F,
|
||||
) -> Self
|
||||
where
|
||||
F: Fn(Vec<T>) -> Pin<Box<dyn Future<Output = ()> + Send>> + Send + 'static,
|
||||
{
|
||||
let (tx, mut rx) = mpsc::channel(1000);
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut batch = Vec::with_capacity(batch_size);
|
||||
let mut interval = interval(batch_timeout);
|
||||
|
||||
loop {
|
||||
tokio::select! {
|
||||
item = rx.recv() => {
|
||||
match item {
|
||||
Some(item) => {
|
||||
batch.push(item);
|
||||
if batch.len() >= batch_size {
|
||||
process(std::mem::replace(&mut batch, Vec::with_capacity(batch_size))).await;
|
||||
}
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
_ = interval.tick() => {
|
||||
if !batch.is_empty() {
|
||||
process(std::mem::replace(&mut batch, Vec::with_capacity(batch_size))).await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process remaining items
|
||||
if !batch.is_empty() {
|
||||
process(batch).await;
|
||||
}
|
||||
});
|
||||
|
||||
Self { tx }
|
||||
}
|
||||
|
||||
pub async fn submit(&self, item: T) -> Result<(), mpsc::error::SendError<T>> {
|
||||
self.tx.send(item).await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use appropriate concurrency limits** - Don't spawn unbounded tasks
|
||||
2. **Implement backpressure** - Use bounded channels and semaphores
|
||||
3. **Handle cancellation** - Support cooperative cancellation with tokens
|
||||
4. **Avoid lock contention** - Minimize lock scope, prefer channels
|
||||
5. **Use rate limiting** - Protect external services
|
||||
6. **Implement circuit breakers** - Prevent cascading failures
|
||||
7. **Batch operations** - Reduce overhead for small operations
|
||||
8. **Profile concurrency** - Use tokio-console to understand behavior
|
||||
9. **Use appropriate synchronization** - RwLock for read-heavy, Mutex for write-heavy
|
||||
10. **Design for failure** - Always consider what happens when operations fail
|
||||
526
skills/tokio-networking/SKILL.md
Normal file
526
skills/tokio-networking/SKILL.md
Normal file
@@ -0,0 +1,526 @@
|
||||
---
|
||||
name: tokio-networking
|
||||
description: Network programming patterns with Hyper, Tonic, and Tower. Use when building HTTP services, gRPC applications, implementing middleware, connection pooling, or health checks.
|
||||
---
|
||||
|
||||
# Tokio Networking Patterns
|
||||
|
||||
This skill provides network programming patterns for building production-grade services with the Tokio ecosystem.
|
||||
|
||||
## HTTP Service with Hyper and Axum
|
||||
|
||||
Build HTTP services with routing and middleware:
|
||||
|
||||
```rust
|
||||
use axum::{
|
||||
Router,
|
||||
routing::{get, post},
|
||||
extract::{State, Path, Json},
|
||||
response::IntoResponse,
|
||||
middleware,
|
||||
};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
db: Arc<Database>,
|
||||
cache: Arc<Cache>,
|
||||
}
|
||||
|
||||
async fn create_app() -> Router {
|
||||
let state = AppState {
|
||||
db: Arc::new(Database::new().await),
|
||||
cache: Arc::new(Cache::new()),
|
||||
};
|
||||
|
||||
Router::new()
|
||||
.route("/health", get(health_check))
|
||||
.route("/api/v1/users", get(list_users).post(create_user))
|
||||
.route("/api/v1/users/:id", get(get_user).delete(delete_user))
|
||||
.layer(middleware::from_fn(logging_middleware))
|
||||
.layer(middleware::from_fn(auth_middleware))
|
||||
.with_state(state)
|
||||
}
|
||||
|
||||
async fn health_check() -> impl IntoResponse {
|
||||
"OK"
|
||||
}
|
||||
|
||||
async fn get_user(
|
||||
State(state): State<AppState>,
|
||||
Path(id): Path<u64>,
|
||||
) -> Result<Json<User>, StatusCode> {
|
||||
state.db.get_user(id)
|
||||
.await
|
||||
.map(Json)
|
||||
.ok_or(StatusCode::NOT_FOUND)
|
||||
}
|
||||
|
||||
async fn logging_middleware<B>(
|
||||
req: Request<B>,
|
||||
next: Next<B>,
|
||||
) -> impl IntoResponse {
|
||||
let method = req.method().clone();
|
||||
let uri = req.uri().clone();
|
||||
|
||||
let start = Instant::now();
|
||||
let response = next.run(req).await;
|
||||
let duration = start.elapsed();
|
||||
|
||||
tracing::info!(
|
||||
method = %method,
|
||||
uri = %uri,
|
||||
status = %response.status(),
|
||||
duration_ms = duration.as_millis(),
|
||||
"request completed"
|
||||
);
|
||||
|
||||
response
|
||||
}
|
||||
```
|
||||
|
||||
## gRPC Service with Tonic
|
||||
|
||||
Build type-safe gRPC services:
|
||||
|
||||
```rust
|
||||
use tonic::{transport::Server, Request, Response, Status};
|
||||
|
||||
pub mod proto {
|
||||
tonic::include_proto!("myservice");
|
||||
}
|
||||
|
||||
use proto::my_service_server::{MyService, MyServiceServer};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MyServiceImpl {
|
||||
db: Arc<Database>,
|
||||
}
|
||||
|
||||
#[tonic::async_trait]
|
||||
impl MyService for MyServiceImpl {
|
||||
async fn get_user(
|
||||
&self,
|
||||
request: Request<proto::GetUserRequest>,
|
||||
) -> Result<Response<proto::User>, Status> {
|
||||
let req = request.into_inner();
|
||||
|
||||
let user = self.db.get_user(req.id)
|
||||
.await
|
||||
.map_err(|e| Status::internal(e.to_string()))?
|
||||
.ok_or_else(|| Status::not_found("User not found"))?;
|
||||
|
||||
Ok(Response::new(proto::User {
|
||||
id: user.id,
|
||||
name: user.name,
|
||||
email: user.email,
|
||||
}))
|
||||
}
|
||||
|
||||
type ListUsersStream = ReceiverStream<Result<proto::User, Status>>;
|
||||
|
||||
async fn list_users(
|
||||
&self,
|
||||
request: Request<proto::ListUsersRequest>,
|
||||
) -> Result<Response<Self::ListUsersStream>, Status> {
|
||||
let (tx, rx) = mpsc::channel(100);
|
||||
|
||||
let db = self.db.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut users = db.list_users().await.unwrap();
|
||||
|
||||
while let Some(user) = users.next().await {
|
||||
let proto_user = proto::User {
|
||||
id: user.id,
|
||||
name: user.name,
|
||||
email: user.email,
|
||||
};
|
||||
|
||||
if tx.send(Ok(proto_user)).await.is_err() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Ok(Response::new(ReceiverStream::new(rx)))
|
||||
}
|
||||
}
|
||||
|
||||
async fn serve() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let addr = "[::1]:50051".parse()?;
|
||||
let service = MyServiceImpl::default();
|
||||
|
||||
Server::builder()
|
||||
.add_service(MyServiceServer::new(service))
|
||||
.serve(addr)
|
||||
.await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
## Tower Middleware Composition
|
||||
|
||||
Layer middleware for cross-cutting concerns:
|
||||
|
||||
```rust
|
||||
use tower::{ServiceBuilder, Service};
|
||||
use tower_http::{
|
||||
trace::TraceLayer,
|
||||
compression::CompressionLayer,
|
||||
timeout::TimeoutLayer,
|
||||
limit::RateLimitLayer,
|
||||
};
|
||||
use std::time::Duration;
|
||||
|
||||
fn create_middleware_stack<S>(service: S) -> impl Service
|
||||
where
|
||||
S: Service + Clone,
|
||||
{
|
||||
ServiceBuilder::new()
|
||||
// Outermost layer (executed first)
|
||||
.layer(TraceLayer::new_for_http())
|
||||
.layer(CompressionLayer::new())
|
||||
.layer(TimeoutLayer::new(Duration::from_secs(30)))
|
||||
.layer(RateLimitLayer::new(100, Duration::from_secs(1)))
|
||||
// Innermost layer (executed last)
|
||||
.service(service)
|
||||
}
|
||||
|
||||
// Custom middleware
|
||||
use tower::Layer;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsLayer {
|
||||
metrics: Arc<Metrics>,
|
||||
}
|
||||
|
||||
impl<S> Layer<S> for MetricsLayer {
|
||||
type Service = MetricsService<S>;
|
||||
|
||||
fn layer(&self, inner: S) -> Self::Service {
|
||||
MetricsService {
|
||||
inner,
|
||||
metrics: self.metrics.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct MetricsService<S> {
|
||||
inner: S,
|
||||
metrics: Arc<Metrics>,
|
||||
}
|
||||
|
||||
impl<S, Req> Service<Req> for MetricsService<S>
|
||||
where
|
||||
S: Service<Req>,
|
||||
{
|
||||
type Response = S::Response;
|
||||
type Error = S::Error;
|
||||
type Future = /* ... */;
|
||||
|
||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||
self.inner.poll_ready(cx)
|
||||
}
|
||||
|
||||
fn call(&mut self, req: Req) -> Self::Future {
|
||||
self.metrics.requests_total.inc();
|
||||
let timer = self.metrics.request_duration.start_timer();
|
||||
|
||||
let future = self.inner.call(req);
|
||||
let metrics = self.metrics.clone();
|
||||
|
||||
Box::pin(async move {
|
||||
let result = future.await;
|
||||
timer.observe_duration();
|
||||
result
|
||||
})
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Connection Pooling
|
||||
|
||||
Manage connection pools efficiently:
|
||||
|
||||
```rust
|
||||
use deadpool_postgres::{Config, Pool, Runtime};
|
||||
use tokio_postgres::NoTls;
|
||||
|
||||
pub struct DatabasePool {
|
||||
pool: Pool,
|
||||
}
|
||||
|
||||
impl DatabasePool {
|
||||
pub async fn new(config: &DatabaseConfig) -> Result<Self, Error> {
|
||||
let mut cfg = Config::new();
|
||||
cfg.host = Some(config.host.clone());
|
||||
cfg.port = Some(config.port);
|
||||
cfg.dbname = Some(config.database.clone());
|
||||
cfg.user = Some(config.user.clone());
|
||||
cfg.password = Some(config.password.clone());
|
||||
|
||||
let pool = cfg.create_pool(Some(Runtime::Tokio1), NoTls)?;
|
||||
|
||||
Ok(Self { pool })
|
||||
}
|
||||
|
||||
pub async fn get(&self) -> Result<Client, Error> {
|
||||
self.pool.get().await.map_err(Into::into)
|
||||
}
|
||||
|
||||
pub async fn query<T>(&self, f: impl FnOnce(&Client) -> F) -> Result<T, Error>
|
||||
where
|
||||
F: Future<Output = Result<T, Error>>,
|
||||
{
|
||||
let client = self.get().await?;
|
||||
f(&client).await
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let pool = DatabasePool::new(&config).await?;
|
||||
|
||||
let users = pool.query(|client| async move {
|
||||
client.query("SELECT * FROM users", &[])
|
||||
.await
|
||||
.map_err(Into::into)
|
||||
}).await?;
|
||||
```
|
||||
|
||||
## Health Checks and Readiness Probes
|
||||
|
||||
Implement comprehensive health checks:
|
||||
|
||||
```rust
|
||||
use axum::{Router, routing::get, Json};
|
||||
use serde::Serialize;
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct HealthResponse {
|
||||
status: String,
|
||||
version: String,
|
||||
dependencies: Vec<DependencyHealth>,
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct DependencyHealth {
|
||||
name: String,
|
||||
status: String,
|
||||
latency_ms: Option<u64>,
|
||||
message: Option<String>,
|
||||
}
|
||||
|
||||
async fn health_check(State(state): State<Arc<AppState>>) -> Json<HealthResponse> {
|
||||
let mut dependencies = Vec::new();
|
||||
|
||||
// Check database
|
||||
let db_start = Instant::now();
|
||||
let db_status = match state.db.ping().await {
|
||||
Ok(_) => DependencyHealth {
|
||||
name: "database".into(),
|
||||
status: "healthy".into(),
|
||||
latency_ms: Some(db_start.elapsed().as_millis() as u64),
|
||||
message: None,
|
||||
},
|
||||
Err(e) => DependencyHealth {
|
||||
name: "database".into(),
|
||||
status: "unhealthy".into(),
|
||||
latency_ms: None,
|
||||
message: Some(e.to_string()),
|
||||
},
|
||||
};
|
||||
dependencies.push(db_status);
|
||||
|
||||
// Check cache
|
||||
let cache_start = Instant::now();
|
||||
let cache_status = match state.cache.ping().await {
|
||||
Ok(_) => DependencyHealth {
|
||||
name: "cache".into(),
|
||||
status: "healthy".into(),
|
||||
latency_ms: Some(cache_start.elapsed().as_millis() as u64),
|
||||
message: None,
|
||||
},
|
||||
Err(e) => DependencyHealth {
|
||||
name: "cache".into(),
|
||||
status: "unhealthy".into(),
|
||||
latency_ms: None,
|
||||
message: Some(e.to_string()),
|
||||
},
|
||||
};
|
||||
dependencies.push(cache_status);
|
||||
|
||||
let all_healthy = dependencies.iter().all(|d| d.status == "healthy");
|
||||
|
||||
Json(HealthResponse {
|
||||
status: if all_healthy { "healthy" } else { "unhealthy" }.into(),
|
||||
version: env!("CARGO_PKG_VERSION").into(),
|
||||
dependencies,
|
||||
})
|
||||
}
|
||||
|
||||
async fn readiness_check(State(state): State<Arc<AppState>>) -> StatusCode {
|
||||
if state.is_ready().await {
|
||||
StatusCode::OK
|
||||
} else {
|
||||
StatusCode::SERVICE_UNAVAILABLE
|
||||
}
|
||||
}
|
||||
|
||||
pub fn health_routes() -> Router<Arc<AppState>> {
|
||||
Router::new()
|
||||
.route("/health", get(health_check))
|
||||
.route("/ready", get(readiness_check))
|
||||
.route("/live", get(|| async { StatusCode::OK }))
|
||||
}
|
||||
```
|
||||
|
||||
## Circuit Breaker Pattern
|
||||
|
||||
Protect against cascading failures:
|
||||
|
||||
```rust
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
pub struct ServiceClient {
|
||||
client: reqwest::Client,
|
||||
circuit_breaker: CircuitBreaker,
|
||||
}
|
||||
|
||||
impl ServiceClient {
|
||||
pub async fn call(&self, req: Request) -> Result<Response, Error> {
|
||||
self.circuit_breaker.call(async {
|
||||
self.client
|
||||
.execute(req)
|
||||
.await
|
||||
.map_err(Into::into)
|
||||
}).await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Load Balancing
|
||||
|
||||
Distribute requests across multiple backends:
|
||||
|
||||
```rust
|
||||
use tower::balance::p2c::Balance;
|
||||
use tower::discover::ServiceList;
|
||||
|
||||
pub struct LoadBalancer {
|
||||
balancer: Balance<ServiceList<Vec<ServiceEndpoint>>, Request>,
|
||||
}
|
||||
|
||||
impl LoadBalancer {
|
||||
pub fn new(endpoints: Vec<String>) -> Self {
|
||||
let services: Vec<_> = endpoints
|
||||
.into_iter()
|
||||
.map(|endpoint| create_client(endpoint))
|
||||
.collect();
|
||||
|
||||
let balancer = Balance::new(ServiceList::new(services));
|
||||
|
||||
Self { balancer }
|
||||
}
|
||||
|
||||
pub async fn call(&mut self, req: Request) -> Result<Response, Error> {
|
||||
self.balancer.call(req).await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Request Deduplication
|
||||
|
||||
Deduplicate concurrent identical requests:
|
||||
|
||||
```rust
|
||||
use tokio::sync::Mutex;
|
||||
use std::collections::HashMap;
|
||||
|
||||
pub struct RequestDeduplicator<K, V> {
|
||||
in_flight: Arc<Mutex<HashMap<K, Arc<tokio::sync::Notify>>>>,
|
||||
cache: Arc<Mutex<HashMap<K, Arc<V>>>>,
|
||||
}
|
||||
|
||||
impl<K: Eq + Hash + Clone, V> RequestDeduplicator<K, V> {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
in_flight: Arc::new(Mutex::new(HashMap::new())),
|
||||
cache: Arc::new(Mutex::new(HashMap::new())),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_or_fetch<F, Fut>(
|
||||
&self,
|
||||
key: K,
|
||||
fetch: F,
|
||||
) -> Result<Arc<V>, Error>
|
||||
where
|
||||
F: FnOnce() -> Fut,
|
||||
Fut: Future<Output = Result<V, Error>>,
|
||||
{
|
||||
// Check cache
|
||||
{
|
||||
let cache = self.cache.lock().await;
|
||||
if let Some(value) = cache.get(&key) {
|
||||
return Ok(value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Check if request is in flight
|
||||
let notify = {
|
||||
let mut in_flight = self.in_flight.lock().await;
|
||||
if let Some(notify) = in_flight.get(&key) {
|
||||
notify.clone()
|
||||
} else {
|
||||
let notify = Arc::new(tokio::sync::Notify::new());
|
||||
in_flight.insert(key.clone(), notify.clone());
|
||||
notify
|
||||
}
|
||||
};
|
||||
|
||||
// Wait if another request is in progress
|
||||
notify.notified().await;
|
||||
|
||||
// Check cache again
|
||||
{
|
||||
let cache = self.cache.lock().await;
|
||||
if let Some(value) = cache.get(&key) {
|
||||
return Ok(value.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Fetch value
|
||||
let value = Arc::new(fetch().await?);
|
||||
|
||||
// Update cache
|
||||
{
|
||||
let mut cache = self.cache.lock().await;
|
||||
cache.insert(key.clone(), value.clone());
|
||||
}
|
||||
|
||||
// Remove from in-flight and notify
|
||||
{
|
||||
let mut in_flight = self.in_flight.lock().await;
|
||||
in_flight.remove(&key);
|
||||
}
|
||||
notify.notify_waiters();
|
||||
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use connection pooling** for database and HTTP connections
|
||||
2. **Implement health checks** for all dependencies
|
||||
3. **Add circuit breakers** for external service calls
|
||||
4. **Use appropriate timeouts** for all network operations
|
||||
5. **Implement retry logic** with exponential backoff
|
||||
6. **Add comprehensive middleware** for logging, metrics, auth
|
||||
7. **Use load balancing** for high availability
|
||||
8. **Deduplicate requests** to reduce load
|
||||
9. **Monitor latency** and error rates
|
||||
10. **Design for graceful degradation** when services fail
|
||||
403
skills/tokio-patterns/SKILL.md
Normal file
403
skills/tokio-patterns/SKILL.md
Normal file
@@ -0,0 +1,403 @@
|
||||
---
|
||||
name: tokio-patterns
|
||||
description: Common Tokio patterns and idioms for async programming. Use when implementing worker pools, request-response patterns, pub/sub, timeouts, retries, or graceful shutdown.
|
||||
---
|
||||
|
||||
# Tokio Patterns
|
||||
|
||||
This skill provides common patterns and idioms for building robust async applications with Tokio.
|
||||
|
||||
## Worker Pool Pattern
|
||||
|
||||
Limit concurrent task execution using a semaphore:
|
||||
|
||||
```rust
|
||||
use tokio::sync::Semaphore;
|
||||
use std::sync::Arc;
|
||||
|
||||
pub struct WorkerPool {
|
||||
semaphore: Arc<Semaphore>,
|
||||
}
|
||||
|
||||
impl WorkerPool {
|
||||
pub fn new(size: usize) -> Self {
|
||||
Self {
|
||||
semaphore: Arc::new(Semaphore::new(size)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn execute<F, T>(&self, f: F) -> T
|
||||
where
|
||||
F: Future<Output = T>,
|
||||
{
|
||||
let _permit = self.semaphore.acquire().await.unwrap();
|
||||
f.await
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let pool = WorkerPool::new(10);
|
||||
let results = futures::future::join_all(
|
||||
(0..100).map(|i| pool.execute(process_item(i)))
|
||||
).await;
|
||||
```
|
||||
|
||||
## Request-Response Pattern
|
||||
|
||||
Use oneshot channels for request-response communication:
|
||||
|
||||
```rust
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
pub enum Command {
|
||||
Get { key: String, respond_to: oneshot::Sender<Option<String>> },
|
||||
Set { key: String, value: String },
|
||||
}
|
||||
|
||||
pub async fn actor(mut rx: mpsc::Receiver<Command>) {
|
||||
let mut store = HashMap::new();
|
||||
|
||||
while let Some(cmd) = rx.recv().await {
|
||||
match cmd {
|
||||
Command::Get { key, respond_to } => {
|
||||
let value = store.get(&key).cloned();
|
||||
let _ = respond_to.send(value);
|
||||
}
|
||||
Command::Set { key, value } => {
|
||||
store.insert(key, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Client usage
|
||||
let (tx, rx) = mpsc::channel(32);
|
||||
tokio::spawn(actor(rx));
|
||||
|
||||
let (respond_to, response) = oneshot::channel();
|
||||
tx.send(Command::Get { key: "foo".into(), respond_to }).await.unwrap();
|
||||
let value = response.await.unwrap();
|
||||
```
|
||||
|
||||
## Pub/Sub with Channels
|
||||
|
||||
Use broadcast channels for pub/sub messaging:
|
||||
|
||||
```rust
|
||||
use tokio::sync::broadcast;
|
||||
|
||||
pub struct PubSub<T: Clone> {
|
||||
tx: broadcast::Sender<T>,
|
||||
}
|
||||
|
||||
impl<T: Clone> PubSub<T> {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
let (tx, _) = broadcast::channel(capacity);
|
||||
Self { tx }
|
||||
}
|
||||
|
||||
pub fn subscribe(&self) -> broadcast::Receiver<T> {
|
||||
self.tx.subscribe()
|
||||
}
|
||||
|
||||
pub fn publish(&self, message: T) -> Result<usize, broadcast::error::SendError<T>> {
|
||||
self.tx.send(message)
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let pubsub = PubSub::new(100);
|
||||
|
||||
// Subscriber 1
|
||||
let mut rx1 = pubsub.subscribe();
|
||||
tokio::spawn(async move {
|
||||
while let Ok(msg) = rx1.recv().await {
|
||||
println!("Subscriber 1: {:?}", msg);
|
||||
}
|
||||
});
|
||||
|
||||
// Subscriber 2
|
||||
let mut rx2 = pubsub.subscribe();
|
||||
tokio::spawn(async move {
|
||||
while let Ok(msg) = rx2.recv().await {
|
||||
println!("Subscriber 2: {:?}", msg);
|
||||
}
|
||||
});
|
||||
|
||||
// Publisher
|
||||
pubsub.publish("Hello".to_string()).unwrap();
|
||||
```
|
||||
|
||||
## Timeout Pattern
|
||||
|
||||
Wrap operations with timeouts:
|
||||
|
||||
```rust
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
pub async fn with_timeout<F, T>(duration: Duration, future: F) -> Result<T, TimeoutError>
|
||||
where
|
||||
F: Future<Output = Result<T, Error>>,
|
||||
{
|
||||
match timeout(duration, future).await {
|
||||
Ok(Ok(result)) => Ok(result),
|
||||
Ok(Err(e)) => Err(TimeoutError::Inner(e)),
|
||||
Err(_) => Err(TimeoutError::Elapsed),
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let result = with_timeout(
|
||||
Duration::from_secs(5),
|
||||
fetch_data()
|
||||
).await?;
|
||||
```
|
||||
|
||||
## Retry with Exponential Backoff
|
||||
|
||||
Retry failed operations with backoff:
|
||||
|
||||
```rust
|
||||
use tokio::time::{sleep, Duration};
|
||||
|
||||
pub async fn retry_with_backoff<F, T, E>(
|
||||
mut operation: F,
|
||||
max_retries: u32,
|
||||
initial_backoff: Duration,
|
||||
) -> Result<T, E>
|
||||
where
|
||||
F: FnMut() -> Pin<Box<dyn Future<Output = Result<T, E>>>>,
|
||||
{
|
||||
let mut retries = 0;
|
||||
let mut backoff = initial_backoff;
|
||||
|
||||
loop {
|
||||
match operation().await {
|
||||
Ok(result) => return Ok(result),
|
||||
Err(e) if retries < max_retries => {
|
||||
retries += 1;
|
||||
sleep(backoff).await;
|
||||
backoff *= 2; // Exponential backoff
|
||||
}
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Usage
|
||||
let result = retry_with_backoff(
|
||||
|| Box::pin(fetch_data()),
|
||||
3,
|
||||
Duration::from_millis(100)
|
||||
).await?;
|
||||
```
|
||||
|
||||
## Graceful Shutdown
|
||||
|
||||
Coordinate graceful shutdown across components:
|
||||
|
||||
```rust
|
||||
use tokio::sync::broadcast;
|
||||
use tokio::select;
|
||||
|
||||
pub struct ShutdownCoordinator {
|
||||
tx: broadcast::Sender<()>,
|
||||
}
|
||||
|
||||
impl ShutdownCoordinator {
|
||||
pub fn new() -> Self {
|
||||
let (tx, _) = broadcast::channel(1);
|
||||
Self { tx }
|
||||
}
|
||||
|
||||
pub fn subscribe(&self) -> broadcast::Receiver<()> {
|
||||
self.tx.subscribe()
|
||||
}
|
||||
|
||||
pub fn shutdown(&self) {
|
||||
let _ = self.tx.send(());
|
||||
}
|
||||
}
|
||||
|
||||
// Worker pattern
|
||||
pub async fn worker(mut shutdown: broadcast::Receiver<()>) {
|
||||
loop {
|
||||
select! {
|
||||
_ = shutdown.recv() => {
|
||||
// Cleanup
|
||||
break;
|
||||
}
|
||||
result = do_work() => {
|
||||
// Process result
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Main
|
||||
let coordinator = ShutdownCoordinator::new();
|
||||
|
||||
let shutdown_rx1 = coordinator.subscribe();
|
||||
let h1 = tokio::spawn(worker(shutdown_rx1));
|
||||
|
||||
let shutdown_rx2 = coordinator.subscribe();
|
||||
let h2 = tokio::spawn(worker(shutdown_rx2));
|
||||
|
||||
// Wait for signal
|
||||
tokio::signal::ctrl_c().await.unwrap();
|
||||
coordinator.shutdown();
|
||||
|
||||
// Wait for workers
|
||||
let _ = tokio::join!(h1, h2);
|
||||
```
|
||||
|
||||
## Async Initialization
|
||||
|
||||
Lazy async initialization with `OnceCell`:
|
||||
|
||||
```rust
|
||||
use tokio::sync::OnceCell;
|
||||
|
||||
pub struct Service {
|
||||
connection: OnceCell<Connection>,
|
||||
}
|
||||
|
||||
impl Service {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
connection: OnceCell::new(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn get_connection(&self) -> &Connection {
|
||||
self.connection
|
||||
.get_or_init(|| async {
|
||||
Connection::connect().await.unwrap()
|
||||
})
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn query(&self, sql: &str) -> Result<Vec<Row>> {
|
||||
let conn = self.get_connection().await;
|
||||
conn.query(sql).await
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Resource Cleanup with Drop
|
||||
|
||||
Ensure cleanup even on task cancellation:
|
||||
|
||||
```rust
|
||||
pub struct Resource {
|
||||
handle: SomeHandle,
|
||||
}
|
||||
|
||||
impl Resource {
|
||||
pub async fn new() -> Self {
|
||||
Self {
|
||||
handle: acquire_resource().await,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn use_resource(&self) -> Result<()> {
|
||||
// Use the resource
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for Resource {
|
||||
fn drop(&mut self) {
|
||||
// Synchronous cleanup
|
||||
// For async cleanup, use a separate shutdown method
|
||||
self.handle.close();
|
||||
}
|
||||
}
|
||||
|
||||
// For async cleanup
|
||||
impl Resource {
|
||||
pub async fn shutdown(self) {
|
||||
// Async cleanup
|
||||
self.handle.close_async().await;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Select Multiple Futures
|
||||
|
||||
Use `select!` to race multiple operations:
|
||||
|
||||
```rust
|
||||
use tokio::select;
|
||||
|
||||
pub async fn select_example() {
|
||||
let mut rx1 = channel1();
|
||||
let mut rx2 = channel2();
|
||||
|
||||
loop {
|
||||
select! {
|
||||
msg = rx1.recv() => {
|
||||
if let Some(msg) = msg {
|
||||
handle_channel1(msg).await;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
msg = rx2.recv() => {
|
||||
if let Some(msg) = msg {
|
||||
handle_channel2(msg).await;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ = tokio::time::sleep(Duration::from_secs(60)) => {
|
||||
check_timeout().await;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Cancellation Token Pattern
|
||||
|
||||
Use `tokio_util::sync::CancellationToken` for cooperative cancellation:
|
||||
|
||||
```rust
|
||||
use tokio_util::sync::CancellationToken;
|
||||
|
||||
pub async fn worker(token: CancellationToken) {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = token.cancelled() => {
|
||||
// Cleanup
|
||||
break;
|
||||
}
|
||||
_ = do_work() => {
|
||||
// Continue
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Hierarchical cancellation
|
||||
let parent_token = CancellationToken::new();
|
||||
let child_token = parent_token.child_token();
|
||||
|
||||
tokio::spawn(worker(child_token));
|
||||
|
||||
// Cancel all
|
||||
parent_token.cancel();
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Use semaphores** for limiting concurrent operations
|
||||
2. **Implement graceful shutdown** in all long-running tasks
|
||||
3. **Add timeouts** to external operations
|
||||
4. **Use channels** for inter-task communication
|
||||
5. **Handle cancellation** properly in all tasks
|
||||
6. **Clean up resources** in Drop or explicit shutdown methods
|
||||
7. **Use appropriate channel types** for different patterns
|
||||
8. **Implement retries** for transient failures
|
||||
9. **Use select!** for coordinating multiple async operations
|
||||
10. **Document lifetime** and ownership patterns clearly
|
||||
488
skills/tokio-troubleshooting/SKILL.md
Normal file
488
skills/tokio-troubleshooting/SKILL.md
Normal file
@@ -0,0 +1,488 @@
|
||||
---
|
||||
name: tokio-troubleshooting
|
||||
description: Debugging and troubleshooting Tokio applications using tokio-console, detecting deadlocks, memory leaks, and performance issues. Use when diagnosing async runtime problems.
|
||||
---
|
||||
|
||||
# Tokio Troubleshooting
|
||||
|
||||
This skill provides techniques for debugging and troubleshooting async applications built with Tokio.
|
||||
|
||||
## Using tokio-console for Runtime Inspection
|
||||
|
||||
Monitor async runtime in real-time:
|
||||
|
||||
```rust
|
||||
// In Cargo.toml
|
||||
[dependencies]
|
||||
console-subscriber = "0.2"
|
||||
|
||||
// In main.rs
|
||||
fn main() {
|
||||
console_subscriber::init();
|
||||
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.enable_all()
|
||||
.build()
|
||||
.unwrap()
|
||||
.block_on(async {
|
||||
run_application().await
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
**Run console in separate terminal:**
|
||||
```bash
|
||||
tokio-console
|
||||
```
|
||||
|
||||
**Key metrics to monitor:**
|
||||
- Task spawn rate and total tasks
|
||||
- Poll duration per task
|
||||
- Idle vs. busy time
|
||||
- Waker operations
|
||||
- Resource utilization
|
||||
|
||||
**Identifying issues:**
|
||||
- Long poll durations: CPU-intensive work in async context
|
||||
- Many wakers: Potential contention or inefficient polling
|
||||
- Growing task count: Task leak or unbounded spawning
|
||||
- High idle time: Not enough work or blocking operations
|
||||
|
||||
## Debugging Deadlocks and Hangs
|
||||
|
||||
Detect and resolve deadlock situations:
|
||||
|
||||
### Common Deadlock Pattern
|
||||
|
||||
```rust
|
||||
// BAD: Potential deadlock
|
||||
async fn deadlock_example() {
|
||||
let mutex1 = Arc::new(Mutex::new(()));
|
||||
let mutex2 = Arc::new(Mutex::new(()));
|
||||
|
||||
let m1 = mutex1.clone();
|
||||
let m2 = mutex2.clone();
|
||||
tokio::spawn(async move {
|
||||
let _g1 = m1.lock().await;
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
let _g2 = m2.lock().await; // May deadlock
|
||||
});
|
||||
|
||||
let _g2 = mutex2.lock().await;
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
let _g1 = mutex1.lock().await; // May deadlock
|
||||
}
|
||||
|
||||
// GOOD: Consistent lock ordering
|
||||
async fn no_deadlock_example() {
|
||||
let mutex1 = Arc::new(Mutex::new(()));
|
||||
let mutex2 = Arc::new(Mutex::new(()));
|
||||
|
||||
// Always acquire locks in same order
|
||||
let _g1 = mutex1.lock().await;
|
||||
let _g2 = mutex2.lock().await;
|
||||
}
|
||||
|
||||
// BETTER: Avoid nested locks
|
||||
async fn best_example() {
|
||||
// Use message passing instead
|
||||
let (tx, mut rx) = mpsc::channel(10);
|
||||
|
||||
tokio::spawn(async move {
|
||||
while let Some(msg) = rx.recv().await {
|
||||
process_message(msg).await;
|
||||
}
|
||||
});
|
||||
|
||||
tx.send(message).await.unwrap();
|
||||
}
|
||||
```
|
||||
|
||||
### Detecting Hangs with Timeouts
|
||||
|
||||
```rust
|
||||
use tokio::time::{timeout, Duration};
|
||||
|
||||
async fn detect_hang() {
|
||||
match timeout(Duration::from_secs(5), potentially_hanging_operation()).await {
|
||||
Ok(result) => println!("Completed: {:?}", result),
|
||||
Err(_) => {
|
||||
eprintln!("Operation timed out - potential hang detected");
|
||||
// Log stack traces, metrics, etc.
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Deadlock Detection with try_lock
|
||||
|
||||
```rust
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
async fn try_with_timeout(mutex: &Mutex<State>) -> Option<State> {
|
||||
for _ in 0..10 {
|
||||
if let Ok(guard) = mutex.try_lock() {
|
||||
return Some(guard.clone());
|
||||
}
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
}
|
||||
eprintln!("Failed to acquire lock - possible deadlock");
|
||||
None
|
||||
}
|
||||
```
|
||||
|
||||
## Memory Leak Detection
|
||||
|
||||
Identify and fix memory leaks:
|
||||
|
||||
### Task Leaks
|
||||
|
||||
```rust
|
||||
// BAD: Tasks never complete
|
||||
async fn leaking_tasks() {
|
||||
loop {
|
||||
tokio::spawn(async {
|
||||
loop {
|
||||
// Never exits
|
||||
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// GOOD: Tasks have exit condition
|
||||
async fn proper_tasks(shutdown: broadcast::Receiver<()>) {
|
||||
loop {
|
||||
let mut shutdown_rx = shutdown.resubscribe();
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
tokio::select! {
|
||||
_ = shutdown_rx.recv() => break,
|
||||
_ = tokio::time::sleep(Duration::from_secs(1)) => {
|
||||
// Work
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Arc Cycles
|
||||
|
||||
```rust
|
||||
// BAD: Reference cycle
|
||||
struct Node {
|
||||
next: Option<Arc<Mutex<Node>>>,
|
||||
prev: Option<Arc<Mutex<Node>>>, // Creates cycle!
|
||||
}
|
||||
|
||||
// GOOD: Use weak references
|
||||
use std::sync::Weak;
|
||||
|
||||
struct Node {
|
||||
next: Option<Arc<Mutex<Node>>>,
|
||||
prev: Option<Weak<Mutex<Node>>>, // Weak reference breaks cycle
|
||||
}
|
||||
```
|
||||
|
||||
### Monitoring Memory Usage
|
||||
|
||||
```rust
|
||||
use sysinfo::{System, SystemExt};
|
||||
|
||||
pub async fn memory_monitor() {
|
||||
let mut system = System::new_all();
|
||||
let mut interval = tokio::time::interval(Duration::from_secs(60));
|
||||
|
||||
loop {
|
||||
interval.tick().await;
|
||||
system.refresh_memory();
|
||||
|
||||
let used = system.used_memory();
|
||||
let total = system.total_memory();
|
||||
let percent = (used as f64 / total as f64) * 100.0;
|
||||
|
||||
tracing::info!(
|
||||
used_mb = used / 1024 / 1024,
|
||||
total_mb = total / 1024 / 1024,
|
||||
percent = %.2 percent,
|
||||
"Memory usage"
|
||||
);
|
||||
|
||||
if percent > 80.0 {
|
||||
tracing::warn!("High memory usage detected");
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Performance Profiling with Tracing
|
||||
|
||||
Instrument code for performance analysis:
|
||||
|
||||
```rust
|
||||
use tracing::{info, instrument, span, Level};
|
||||
|
||||
#[instrument]
|
||||
async fn process_request(id: u64) -> Result<Response, Error> {
|
||||
let span = span!(Level::INFO, "database_query");
|
||||
let _enter = span.enter();
|
||||
|
||||
let data = fetch_from_database(id).await?;
|
||||
|
||||
drop(_enter);
|
||||
|
||||
let span = span!(Level::INFO, "transformation");
|
||||
let _enter = span.enter();
|
||||
|
||||
let result = transform_data(data).await?;
|
||||
|
||||
Ok(Response { result })
|
||||
}
|
||||
|
||||
// Configure subscriber for flame graphs
|
||||
use tracing_subscriber::layer::SubscriberExt;
|
||||
|
||||
fn init_tracing() {
|
||||
let fmt_layer = tracing_subscriber::fmt::layer();
|
||||
let filter_layer = tracing_subscriber::EnvFilter::from_default_env();
|
||||
|
||||
tracing_subscriber::registry()
|
||||
.with(filter_layer)
|
||||
.with(fmt_layer)
|
||||
.init();
|
||||
}
|
||||
```
|
||||
|
||||
## Understanding Panic Messages
|
||||
|
||||
Common async panic patterns:
|
||||
|
||||
### Panics in Spawned Tasks
|
||||
|
||||
```rust
|
||||
// Panic is isolated to the task
|
||||
tokio::spawn(async {
|
||||
panic!("This won't crash the program");
|
||||
});
|
||||
|
||||
// To catch panics
|
||||
let handle = tokio::spawn(async {
|
||||
// Work that might panic
|
||||
});
|
||||
|
||||
match handle.await {
|
||||
Ok(result) => println!("Success: {:?}", result),
|
||||
Err(e) if e.is_panic() => {
|
||||
eprintln!("Task panicked: {:?}", e);
|
||||
// Handle panic
|
||||
}
|
||||
Err(e) => eprintln!("Task cancelled: {:?}", e),
|
||||
}
|
||||
```
|
||||
|
||||
### Send + 'static Errors
|
||||
|
||||
```rust
|
||||
// ERROR: future cannot be sent between threads
|
||||
async fn bad_example() {
|
||||
let rc = Rc::new(5); // Rc is !Send
|
||||
tokio::spawn(async move {
|
||||
println!("{}", rc); // Error!
|
||||
});
|
||||
}
|
||||
|
||||
// FIX: Use Arc instead
|
||||
async fn good_example() {
|
||||
let rc = Arc::new(5); // Arc is Send
|
||||
tokio::spawn(async move {
|
||||
println!("{}", rc); // OK
|
||||
});
|
||||
}
|
||||
|
||||
// ERROR: borrowed value does not live long enough
|
||||
async fn lifetime_error() {
|
||||
let data = String::from("hello");
|
||||
tokio::spawn(async {
|
||||
println!("{}", data); // Error: data might not live long enough
|
||||
});
|
||||
}
|
||||
|
||||
// FIX: Move ownership
|
||||
async fn lifetime_fixed() {
|
||||
let data = String::from("hello");
|
||||
tokio::spawn(async move {
|
||||
println!("{}", data); // OK: data is moved
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
## Common Error Patterns and Solutions
|
||||
|
||||
### Blocking in Async Context
|
||||
|
||||
```rust
|
||||
// PROBLEM: Detected with tokio-console (long poll time)
|
||||
async fn blocking_example() {
|
||||
std::thread::sleep(Duration::from_secs(1)); // Blocks thread!
|
||||
}
|
||||
|
||||
// SOLUTION
|
||||
async fn non_blocking_example() {
|
||||
tokio::time::sleep(Duration::from_secs(1)).await; // Yields control
|
||||
}
|
||||
|
||||
// For unavoidable blocking
|
||||
async fn necessary_blocking() {
|
||||
tokio::task::spawn_blocking(|| {
|
||||
expensive_cpu_work()
|
||||
}).await.unwrap();
|
||||
}
|
||||
```
|
||||
|
||||
### Channel Closed Errors
|
||||
|
||||
```rust
|
||||
// PROBLEM: SendError because receiver dropped
|
||||
async fn send_error_example() {
|
||||
let (tx, rx) = mpsc::channel(10);
|
||||
drop(rx); // Receiver dropped
|
||||
|
||||
match tx.send(42).await {
|
||||
Ok(_) => println!("Sent"),
|
||||
Err(e) => eprintln!("Send failed: {}", e), // Channel closed
|
||||
}
|
||||
}
|
||||
|
||||
// SOLUTION: Check if receiver exists
|
||||
async fn handle_closed_channel() {
|
||||
let (tx, rx) = mpsc::channel(10);
|
||||
|
||||
tokio::spawn(async move {
|
||||
// Receiver keeps channel open
|
||||
while let Some(msg) = rx.recv().await {
|
||||
process(msg).await;
|
||||
}
|
||||
});
|
||||
|
||||
// Or handle the error
|
||||
if let Err(e) = tx.send(42).await {
|
||||
tracing::warn!("Channel closed: {}", e);
|
||||
// Cleanup or alternative action
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Task Cancellation
|
||||
|
||||
```rust
|
||||
// PROBLEM: Task cancelled unexpectedly
|
||||
let handle = tokio::spawn(async {
|
||||
// Long-running work
|
||||
});
|
||||
|
||||
handle.abort(); // Cancels task
|
||||
|
||||
// SOLUTION: Handle cancellation gracefully
|
||||
let handle = tokio::spawn(async {
|
||||
let result = tokio::select! {
|
||||
result = do_work() => result,
|
||||
_ = tokio::signal::ctrl_c() => {
|
||||
cleanup().await;
|
||||
return Err(Error::Cancelled);
|
||||
}
|
||||
};
|
||||
result
|
||||
});
|
||||
```
|
||||
|
||||
## Testing Async Code Effectively
|
||||
|
||||
Write reliable async tests:
|
||||
|
||||
```rust
|
||||
#[tokio::test]
|
||||
async fn test_with_timeout() {
|
||||
tokio::time::timeout(
|
||||
Duration::from_secs(5),
|
||||
async {
|
||||
let result = my_async_function().await;
|
||||
assert!(result.is_ok());
|
||||
}
|
||||
)
|
||||
.await
|
||||
.expect("Test timed out");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_concurrent_access() {
|
||||
let shared = Arc::new(Mutex::new(0));
|
||||
|
||||
let handles: Vec<_> = (0..10)
|
||||
.map(|_| {
|
||||
let shared = shared.clone();
|
||||
tokio::spawn(async move {
|
||||
let mut lock = shared.lock().await;
|
||||
*lock += 1;
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
for handle in handles {
|
||||
handle.await.unwrap();
|
||||
}
|
||||
|
||||
assert_eq!(*shared.lock().await, 10);
|
||||
}
|
||||
|
||||
// Test with mocked time
|
||||
#[tokio::test(start_paused = true)]
|
||||
async fn test_with_time_control() {
|
||||
let start = tokio::time::Instant::now();
|
||||
|
||||
tokio::time::sleep(Duration::from_secs(100)).await;
|
||||
|
||||
// Time is mocked, so this completes instantly
|
||||
assert!(start.elapsed() < Duration::from_secs(1));
|
||||
}
|
||||
```
|
||||
|
||||
## Debugging Checklist
|
||||
|
||||
When troubleshooting async issues:
|
||||
|
||||
- [ ] Use tokio-console to monitor runtime behavior
|
||||
- [ ] Check for blocking operations with tracing
|
||||
- [ ] Verify all locks are released properly
|
||||
- [ ] Look for task leaks (growing task count)
|
||||
- [ ] Monitor memory usage over time
|
||||
- [ ] Add timeouts to detect hangs
|
||||
- [ ] Check for channel closure errors
|
||||
- [ ] Verify Send + 'static bounds are satisfied
|
||||
- [ ] Use try_lock to detect potential deadlocks
|
||||
- [ ] Profile with tracing for performance bottlenecks
|
||||
- [ ] Test with tokio-test for time-based code
|
||||
- [ ] Check for Arc cycles with weak references
|
||||
|
||||
## Helpful Tools
|
||||
|
||||
- **tokio-console**: Real-time async runtime monitoring
|
||||
- **tracing**: Structured logging and profiling
|
||||
- **cargo-flamegraph**: Generate flame graphs
|
||||
- **valgrind/heaptrack**: Memory profiling
|
||||
- **perf**: CPU profiling on Linux
|
||||
- **Instruments**: Profiling on macOS
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Always use tokio-console** in development
|
||||
2. **Add tracing spans** to critical code paths
|
||||
3. **Use timeouts** liberally to detect hangs
|
||||
4. **Monitor task count** for leaks
|
||||
5. **Profile before optimizing** - measure first
|
||||
6. **Test with real concurrency** - don't just test happy paths
|
||||
7. **Handle cancellation** gracefully in all tasks
|
||||
8. **Use structured logging** for debugging
|
||||
9. **Avoid nested locks** - prefer message passing
|
||||
10. **Document lock ordering** when necessary
|
||||
Reference in New Issue
Block a user