47 lines
2.6 KiB
JSON
47 lines
2.6 KiB
JSON
{
|
|
"_comment": "Optimization configuration template for deep learning models.",
|
|
"optimizer_name": "Adam",
|
|
"_comment": "Name of the optimization algorithm to use. Options: Adam, SGD, RMSprop, AdamW, etc.",
|
|
"learning_rate": 0.001,
|
|
"_comment": "Learning rate for the optimizer. A smaller value might be needed for complex models.",
|
|
"weight_decay": 0.0001,
|
|
"_comment": "L2 regularization strength. Helps prevent overfitting.",
|
|
"beta1": 0.9,
|
|
"_comment": "Beta1 parameter for Adam optimizer (exponential decay rate for the 1st moment estimates).",
|
|
"beta2": 0.999,
|
|
"_comment": "Beta2 parameter for Adam optimizer (exponential decay rate for the 2nd moment estimates).",
|
|
"epsilon": 1e-08,
|
|
"_comment": "Epsilon parameter for Adam optimizer (term added to the denominator to improve numerical stability).",
|
|
"momentum": 0.0,
|
|
"_comment": "Momentum factor for SGD optimizer. Typically a value between 0 and 1.",
|
|
"nesterov": false,
|
|
"_comment": "Whether to use Nesterov momentum for SGD optimizer.",
|
|
"learning_rate_scheduler": {
|
|
"enabled": true,
|
|
"_comment": "Enable or disable learning rate scheduling.",
|
|
"scheduler_type": "ReduceLROnPlateau",
|
|
"_comment": "Type of learning rate scheduler. Options: StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau, CosineAnnealingLR, CyclicLR, etc.",
|
|
"factor": 0.1,
|
|
"_comment": "Factor by which the learning rate will be reduced.",
|
|
"patience": 10,
|
|
"_comment": "Number of epochs with no improvement after which learning rate will be reduced.",
|
|
"threshold": 0.0001,
|
|
"_comment": "Threshold for measuring the new optimum, to only focus on significant changes.",
|
|
"threshold_mode": "rel",
|
|
"_comment": "One of rel, abs. In rel mode, dynamic_threshold = best * ( 1 + threshold ) in 'max' mode or best * ( 1 - threshold ) in min mode. In abs mode, dynamic_threshold = best + threshold in max mode or best - threshold in min mode.",
|
|
"cooldown": 0,
|
|
"_comment": "Number of epochs to wait before resuming normal operation after lr has been reduced.",
|
|
"min_lr": 0,
|
|
"_comment": "A scalar or a list of scalars. A lower bound on the learning rate of all param groups or each group respectively.",
|
|
"verbose": true
|
|
"_comment": "If True, prints a message to stdout for each update."
|
|
},
|
|
"gradient_clipping": {
|
|
"enabled": true,
|
|
"_comment": "Enable or disable gradient clipping.",
|
|
"clip_value": 1.0,
|
|
"_comment": "The clipping threshold. Gradients will be clipped to this value.",
|
|
"clip_norm_type": 2.0,
|
|
"_comment": "The type of the norm used for clipping. Can be 2.0 (L2 norm), inf (infinity norm), etc."
|
|
}
|
|
} |