Files
2025-11-30 08:20:20 +08:00

110 lines
3.8 KiB
JSON

{
"_comment": "Example JSON file for AI Experiment Logger plugin. Represents a collection of experiments.",
"experiments": [
{
"_comment": "First experiment - a simple sentiment analysis task",
"experiment_id": "sentiment-analysis-v1",
"experiment_name": "Sentiment Analysis with Claude",
"description": "Experiment to evaluate Claude's sentiment analysis accuracy on customer reviews.",
"model": "Claude-v2",
"dataset": "Customer Reviews - Product A",
"date_created": "2024-01-15",
"date_completed": "2024-01-16",
"status": "completed",
"parameters": {
"temperature": 0.2,
"max_tokens": 200,
"prompt_template": "Analyze the sentiment of the following review: {review}"
},
"metrics": {
"accuracy": 0.92,
"precision": 0.90,
"recall": 0.94,
"f1_score": 0.92
},
"artifacts": {
"results_csv": "sentiment_analysis_results.csv",
"model_config": "claude_config.json"
},
"notes": "High accuracy achieved. Consider testing with more diverse datasets."
},
{
"_comment": "Second experiment - a more complex text summarization task",
"experiment_id": "text-summarization-v2",
"experiment_name": "Text Summarization Experiment with Gemini",
"description": "Experiment to compare different summarization techniques using Gemini.",
"model": "Gemini-Pro",
"dataset": "News Articles - Technology",
"date_created": "2024-01-20",
"date_completed": "2024-01-22",
"status": "completed",
"parameters": {
"temperature": 0.5,
"max_tokens": 500,
"summarization_type": "extractive",
"prompt_template": "Summarize the following article: {article}"
},
"metrics": {
"rouge_1": 0.45,
"rouge_2": 0.22,
"rouge_l": 0.38,
"bleu_score": 0.28
},
"artifacts": {
"summaries_json": "summaries.json",
"hyperparameter_search_log": "hyperparameter_search.log"
},
"notes": "Extractive summarization performed better than abstractive. Needs further tuning."
},
{
"_comment": "Third experiment - still in progress",
"experiment_id": "image-classification-v1",
"experiment_name": "Image Classification with a Custom Model",
"description": "Training and evaluating a custom image classification model.",
"model": "Custom CNN",
"dataset": "Custom Image Dataset - Flowers",
"date_created": "2024-01-25",
"date_completed": null,
"status": "running",
"parameters": {
"learning_rate": 0.001,
"batch_size": 32,
"epochs": 10
},
"metrics": {
"training_loss": 0.25,
"validation_accuracy": 0.85
},
"artifacts": {
"model_checkpoint": "model_checkpoint_epoch_5.pth",
"training_logs": "training.log"
},
"notes": "Training in progress. Validation accuracy is improving."
},
{
"_comment": "Fourth experiment - trying to compare the models",
"experiment_id": "model-comparison-v1",
"experiment_name": "Model Comparison for Question Answering",
"description": "Comparing Claude and Gemini for question answering accuracy",
"model": "Claude-v2, Gemini-Pro",
"dataset": "SQuAD",
"date_created": "2024-02-01",
"date_completed": "2024-02-03",
"status": "completed",
"parameters": {
"question_length": "short",
"context_length": "medium"
},
"metrics": {
"claude_exact_match": 0.75,
"claude_f1": 0.82,
"gemini_exact_match": 0.78,
"gemini_f1": 0.85
},
"artifacts": {
"results_table": "qa_results.csv"
},
"notes": "Gemini slightly outperforms Claude on F1 score. Further investigation needed with longer contexts."
}
]
}