hydra:
    run:
        dir: "${parent_dir}/${expe_series}/${now:%Y-%m-%d_%H-%M-%S}"
    sweep:
        dir: "${parent_dir}/${expe_series}/${now:%Y-%m-%d_%H-%M-%S}"
        subdir: "${hydra.job.num}"
    launcher:
        submitit_folder: ${hydra.sweep.dir}/.submitit/%j
        timeout_min: 120
        cpus_per_task: 10
        gpus_per_node: 1
        tasks_per_node: 1
        mem_gb: null
        nodes: 1
        name: ${hydra.job.name}
        _target_: hydra_plugins.hydra_submitit_launcher.submitit_launcher.SlurmLauncher
        partition: 'xxx'
        qos: 'xxx'
        gres: 'gpu:1'
        signal_delay_s: 120
        max_num_timeout: 0
        array_parallelism: 256
    callbacks:
      log_job_return:
        _target_: hydra.experimental.callbacks.LogJobReturnCallback

seed: 1
system:
    dtype: 32
    device: 0

output_path: '.'
    
model:
    name: 'Perceptron'
    args: '1024-200-100-10'
    act_function: 'tanh'
    scaling: False
    init:
        sigma_w: 1.
        sigma_b: 0.
dataset:
    name: 'MNIST'
    path: '.'
    train_size: -1
    valid_size: 6000
    test_size: -1
    batch_size: 100
    autoencoder: False
    teacher:
        args: '1-1'
        act_function: 'identity'
        sigma_w: 1.
        sigma_b: .01
logs_hg:
    use: True    # Use or not the logs of hg
    batch_size: 1000    # batch size when updating H and g
    test_float: False    # if True, compute the logs with float32 and with float64
optimizer:
    epochs: 10
    name: 'NewtonSummary'
    lr: .01
    weight_decay: 0.
    momentum: .9
    hg: 
        batch_size: 1000    # batch size used to compute H, g, order3 (if -1, then take dataset.batch_size)
        optimizer: 'SGD'    # optimizer used to propose a direction of descent, used to compute H, g
        partition: 'canonical'    # dampening of the momentum, as in the SGD
        damping: .1    # scale the lrs with the same factor 'damping'
        damping_schedule: 'None'    # schedule for the damping
        momentum: .9    # momentum as in the SGD
        momentum_damp: .9    # dampening of the momentum, as in the SGD
        period_hg: 10    # period of update of H and g
        mom_lrs: .5    # momentum, exponential moving average of the lrs
        ridge: 0    # ridge regularization term to make H invertible: H <- H + ridge * Id
        nesterov:    # use Nesterov's cubic regularization to compute lrs
            use: True
            damping_int: 10.
        remove_negative: True    # set negative lrs to zero
        dmp_auto:
            use: True
            patience: 5
            threshold: .0001
            factor: .5
    kfac:
        stat_decay: .95
        damping: .01
        kl_clip: .01
        weight_decay: .003
        tcov: 10
        tinv: 100
