11 KiB
Few-Shot Learning Guide
Overview
Few-shot learning enables LLMs to perform tasks by providing a small number of examples (typically 1-10) within the prompt. This technique is highly effective for tasks requiring specific formats, styles, or domain knowledge.
Example Selection Strategies
1. Semantic Similarity
Select examples most similar to the input query using embedding-based retrieval.
from sentence_transformers import SentenceTransformer
import numpy as np
class SemanticExampleSelector:
def __init__(self, examples, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
self.examples = examples
self.example_embeddings = self.model.encode([ex['input'] for ex in examples])
def select(self, query, k=3):
query_embedding = self.model.encode([query])
similarities = np.dot(self.example_embeddings, query_embedding.T).flatten()
top_indices = np.argsort(similarities)[-k:][::-1]
return [self.examples[i] for i in top_indices]
Best For: Question answering, text classification, extraction tasks
2. Diversity Sampling
Maximize coverage of different patterns and edge cases.
from sklearn.cluster import KMeans
class DiversityExampleSelector:
def __init__(self, examples, model_name='all-MiniLM-L6-v2'):
self.model = SentenceTransformer(model_name)
self.examples = examples
self.embeddings = self.model.encode([ex['input'] for ex in examples])
def select(self, k=5):
# Use k-means to find diverse cluster centers
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(self.embeddings)
# Select example closest to each cluster center
diverse_examples = []
for center in kmeans.cluster_centers_:
distances = np.linalg.norm(self.embeddings - center, axis=1)
closest_idx = np.argmin(distances)
diverse_examples.append(self.examples[closest_idx])
return diverse_examples
Best For: Demonstrating task variability, edge case handling
3. Difficulty-Based Selection
Gradually increase example complexity to scaffold learning.
class ProgressiveExampleSelector:
def __init__(self, examples):
# Examples should have 'difficulty' scores (0-1)
self.examples = sorted(examples, key=lambda x: x['difficulty'])
def select(self, k=3):
# Select examples with linearly increasing difficulty
step = len(self.examples) // k
return [self.examples[i * step] for i in range(k)]
Best For: Complex reasoning tasks, code generation
4. Error-Based Selection
Include examples that address common failure modes.
class ErrorGuidedSelector:
def __init__(self, examples, error_patterns):
self.examples = examples
self.error_patterns = error_patterns # Common mistakes to avoid
def select(self, query, k=3):
# Select examples demonstrating correct handling of error patterns
selected = []
for pattern in self.error_patterns[:k]:
matching = [ex for ex in self.examples if pattern in ex['demonstrates']]
if matching:
selected.append(matching[0])
return selected
Best For: Tasks with known failure patterns, safety-critical applications
Example Construction Best Practices
Format Consistency
All examples should follow identical formatting:
# Good: Consistent format
examples = [
{
"input": "What is the capital of France?",
"output": "Paris"
},
{
"input": "What is the capital of Germany?",
"output": "Berlin"
}
]
# Bad: Inconsistent format
examples = [
"Q: What is the capital of France? A: Paris",
{"question": "What is the capital of Germany?", "answer": "Berlin"}
]
Input-Output Alignment
Ensure examples demonstrate the exact task you want the model to perform:
# Good: Clear input-output relationship
example = {
"input": "Sentiment: The movie was terrible and boring.",
"output": "Negative"
}
# Bad: Ambiguous relationship
example = {
"input": "The movie was terrible and boring.",
"output": "This review expresses negative sentiment toward the film."
}
Complexity Balance
Include examples spanning the expected difficulty range:
examples = [
# Simple case
{"input": "2 + 2", "output": "4"},
# Moderate case
{"input": "15 * 3 + 8", "output": "53"},
# Complex case
{"input": "(12 + 8) * 3 - 15 / 5", "output": "57"}
]
Context Window Management
Token Budget Allocation
Typical distribution for a 4K context window:
System Prompt: 500 tokens (12%)
Few-Shot Examples: 1500 tokens (38%)
User Input: 500 tokens (12%)
Response: 1500 tokens (38%)
Dynamic Example Truncation
class TokenAwareSelector:
def __init__(self, examples, tokenizer, max_tokens=1500):
self.examples = examples
self.tokenizer = tokenizer
self.max_tokens = max_tokens
def select(self, query, k=5):
selected = []
total_tokens = 0
# Start with most relevant examples
candidates = self.rank_by_relevance(query)
for example in candidates[:k]:
example_tokens = len(self.tokenizer.encode(
f"Input: {example['input']}\nOutput: {example['output']}\n\n"
))
if total_tokens + example_tokens <= self.max_tokens:
selected.append(example)
total_tokens += example_tokens
else:
break
return selected
Edge Case Handling
Include Boundary Examples
edge_case_examples = [
# Empty input
{"input": "", "output": "Please provide input text."},
# Very long input (truncated in example)
{"input": "..." + "word " * 1000, "output": "Input exceeds maximum length."},
# Ambiguous input
{"input": "bank", "output": "Ambiguous: Could refer to financial institution or river bank."},
# Invalid input
{"input": "!@#$%", "output": "Invalid input format. Please provide valid text."}
]
Few-Shot Prompt Templates
Classification Template
def build_classification_prompt(examples, query, labels):
prompt = f"Classify the text into one of these categories: {', '.join(labels)}\n\n"
for ex in examples:
prompt += f"Text: {ex['input']}\nCategory: {ex['output']}\n\n"
prompt += f"Text: {query}\nCategory:"
return prompt
Extraction Template
def build_extraction_prompt(examples, query):
prompt = "Extract structured information from the text.\n\n"
for ex in examples:
prompt += f"Text: {ex['input']}\nExtracted: {json.dumps(ex['output'])}\n\n"
prompt += f"Text: {query}\nExtracted:"
return prompt
Transformation Template
def build_transformation_prompt(examples, query):
prompt = "Transform the input according to the pattern shown in examples.\n\n"
for ex in examples:
prompt += f"Input: {ex['input']}\nOutput: {ex['output']}\n\n"
prompt += f"Input: {query}\nOutput:"
return prompt
Evaluation and Optimization
Example Quality Metrics
def evaluate_example_quality(example, validation_set):
metrics = {
'clarity': rate_clarity(example), # 0-1 score
'representativeness': calculate_similarity_to_validation(example, validation_set),
'difficulty': estimate_difficulty(example),
'uniqueness': calculate_uniqueness(example, other_examples)
}
return metrics
A/B Testing Example Sets
class ExampleSetTester:
def __init__(self, llm_client):
self.client = llm_client
def compare_example_sets(self, set_a, set_b, test_queries):
results_a = self.evaluate_set(set_a, test_queries)
results_b = self.evaluate_set(set_b, test_queries)
return {
'set_a_accuracy': results_a['accuracy'],
'set_b_accuracy': results_b['accuracy'],
'winner': 'A' if results_a['accuracy'] > results_b['accuracy'] else 'B',
'improvement': abs(results_a['accuracy'] - results_b['accuracy'])
}
def evaluate_set(self, examples, test_queries):
correct = 0
for query in test_queries:
prompt = build_prompt(examples, query['input'])
response = self.client.complete(prompt)
if response == query['expected_output']:
correct += 1
return {'accuracy': correct / len(test_queries)}
Advanced Techniques
Meta-Learning (Learning to Select)
Train a small model to predict which examples will be most effective:
from sklearn.ensemble import RandomForestClassifier
class LearnedExampleSelector:
def __init__(self):
self.selector_model = RandomForestClassifier()
def train(self, training_data):
# training_data: list of (query, example, success) tuples
features = []
labels = []
for query, example, success in training_data:
features.append(self.extract_features(query, example))
labels.append(1 if success else 0)
self.selector_model.fit(features, labels)
def extract_features(self, query, example):
return [
semantic_similarity(query, example['input']),
len(example['input']),
len(example['output']),
keyword_overlap(query, example['input'])
]
def select(self, query, candidates, k=3):
scores = []
for example in candidates:
features = self.extract_features(query, example)
score = self.selector_model.predict_proba([features])[0][1]
scores.append((score, example))
return [ex for _, ex in sorted(scores, reverse=True)[:k]]
Adaptive Example Count
Dynamically adjust the number of examples based on task difficulty:
class AdaptiveExampleSelector:
def __init__(self, examples):
self.examples = examples
def select(self, query, max_examples=5):
# Start with 1 example
for k in range(1, max_examples + 1):
selected = self.get_top_k(query, k)
# Quick confidence check (could use a lightweight model)
if self.estimated_confidence(query, selected) > 0.9:
return selected
return selected # Return max_examples if never confident enough
Common Mistakes
- Too Many Examples: More isn't always better; can dilute focus
- Irrelevant Examples: Examples should match the target task closely
- Inconsistent Formatting: Confuses the model about output format
- Overfitting to Examples: Model copies example patterns too literally
- Ignoring Token Limits: Running out of space for actual input/output
Resources
- Example dataset repositories
- Pre-built example selectors for common tasks
- Evaluation frameworks for few-shot performance
- Token counting utilities for different models