{ "_comment": "Optimization configuration template for deep learning models.", "optimizer_name": "Adam", "_comment": "Name of the optimization algorithm to use. Options: Adam, SGD, RMSprop, AdamW, etc.", "learning_rate": 0.001, "_comment": "Learning rate for the optimizer. A smaller value might be needed for complex models.", "weight_decay": 0.0001, "_comment": "L2 regularization strength. Helps prevent overfitting.", "beta1": 0.9, "_comment": "Beta1 parameter for Adam optimizer (exponential decay rate for the 1st moment estimates).", "beta2": 0.999, "_comment": "Beta2 parameter for Adam optimizer (exponential decay rate for the 2nd moment estimates).", "epsilon": 1e-08, "_comment": "Epsilon parameter for Adam optimizer (term added to the denominator to improve numerical stability).", "momentum": 0.0, "_comment": "Momentum factor for SGD optimizer. Typically a value between 0 and 1.", "nesterov": false, "_comment": "Whether to use Nesterov momentum for SGD optimizer.", "learning_rate_scheduler": { "enabled": true, "_comment": "Enable or disable learning rate scheduling.", "scheduler_type": "ReduceLROnPlateau", "_comment": "Type of learning rate scheduler. Options: StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau, CosineAnnealingLR, CyclicLR, etc.", "factor": 0.1, "_comment": "Factor by which the learning rate will be reduced.", "patience": 10, "_comment": "Number of epochs with no improvement after which learning rate will be reduced.", "threshold": 0.0001, "_comment": "Threshold for measuring the new optimum, to only focus on significant changes.", "threshold_mode": "rel", "_comment": "One of rel, abs. In rel mode, dynamic_threshold = best * ( 1 + threshold ) in 'max' mode or best * ( 1 - threshold ) in min mode. In abs mode, dynamic_threshold = best + threshold in max mode or best - threshold in min mode.", "cooldown": 0, "_comment": "Number of epochs to wait before resuming normal operation after lr has been reduced.", "min_lr": 0, "_comment": "A scalar or a list of scalars. A lower bound on the learning rate of all param groups or each group respectively.", "verbose": true "_comment": "If True, prints a message to stdout for each update." }, "gradient_clipping": { "enabled": true, "_comment": "Enable or disable gradient clipping.", "clip_value": 1.0, "_comment": "The clipping threshold. Gradients will be clipped to this value.", "clip_norm_type": 2.0, "_comment": "The type of the norm used for clipping. Can be 2.0 (L2 norm), inf (infinity norm), etc." } }