default_config:
  layer_sizes:
    - 3072
    - 128
    - 128
    - 10
  model: "MLP"
  activation: "tanh"
  optimizer: "SGD"
  lr: 0.01
  weight_decay: 0.0
  epochs: 50
  batch_size: 256 
  update_rule: "standard"
  regularization_mode: "L2_weight_decay"
  residual: false
  phase_alignment: true
  gradient_noise_fraction: 0
  alignment_rule: "hebb"
  activation_update: "pre"
  initialization: "default"
  vocab_size: 16
  max_len: 32 
  biases: false
  frozen_ratio: 0
  sparsity: 0
  input_noise_fraction: 0
  track_updates: 0
  runs: 1
  cache_delta:
    list:
      - -1
  n_heads:    4

constants:
  exp_lr:
    - 0.0
    - 0.00001
    - 0.0001
    - 0.001
    - 0.01
    - 0.1

  exp_weight_decay:
    - 0.0
    - 0.00005
    - 0.0005
    - 0.005
    - 0.05
    - 0.5

  exp_noise:
    - 0.05
    - 0.005
    - 0.0005
    - 0.00005
    - 0.0

  all_batch_sizes:
    - 16
    - 32
    - 64
    - 128
    - 256
    - 512

  all_act:
    - "relu"
    - "linear"
    - "sigmoid"
    - "tanh"


  all_opt:
    - "SGD"
    - "Adam"
    - "RandomNN"
    - "DFA"


experiments:

  - name: "appendix_diff_models_small_tenruns_cnn_fix"
    config_update:
      model: 
       - "small-cnn"
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 10

  - name: "appendix_diff_models_50m_3"
    config_update:
      model: 
      - "mlp-50m"
      batch_size: 256
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 9

  - name: "appendix_diff_models_100m"
    config_update:
      model: 
      - "mlp-100m"
      batch_size: 256
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 10

  - name: "appendix_diff_models_100m"
    config_update:
      model: "mlp-100m"
      activation: "relu"
      batch_size: 32
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 1

  - name: "appendix_diff_models_1b"
    config_update:
      model: "mlp-1b"
      activation: "relu"
      batch_size: 8
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 1

  - name: "appendix_sparsity"
    config_update:
      optimizer: "SGD"
      batch_size: 256
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      sparsity:
        - 0.0
        - 0.5
        - 0.75
        - 0.95
      
      runs: 10


  - name: "appendix_frozen_weights"
    config_update:
      batch_size: 256
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      frozen_ratio:
        - 0.0
        - 0.5
        - 0.75
        - 0.95
      
      runs: 10

  - name: "appendix_track_alignment_loss_overtime"
    config_update:
      activation: "tanh"
      epochs: 65
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      runs: 10

  - name: "appendix_noise_input"
    config_update:
      model: "regression-mlp"
    
      input_noise_fraction: 
        start: 0.0
        end: 2.0
        steps: 11

      weight_decay:
        start: 0.0
        end: 0.002
        steps: 6

      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32



  - name: "section3_table_all_full_run_RandomNN"
    config_update:
      # sweep over the two new model types
      model: #"transformer"
       - "regression-mlp"
       - "transformer"
      optimizer:
        - "RandomNN"
      batch_size: 128
      lr: 0.1 
      weight_decay:
        - 0.0
        - 0.000005
        - 0.00005
        - 0.0005
        - 0.005

      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32
      
      runs: 10

  # Finalized

  # SECTION: WEIGHT DECAY
  - name: "section3_wd_activations"
    config_update:
      activation: "all_act"
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

 
  - name: "section3_graph_large_network_linear"
    config_update:
      lr: 0.01
      
      weight_decay:
        start: 0.0
        end: 0.02
        steps: 6

      optimizer: "SGD"
      
      alignment_layers:
        list:
          - "L1"
          - "L2"
          - "L3"
          - "L4"
          - "L5"
          - "L6"
          - "L7"
      layer_sizes:
        list:
          - 3072
          - 512
          - 512
          - 512
          - 512
          - 512
          - 512
          - 10



  - name: "section3_table_all_full_run_DFA"
    config_update:
      # sweep over the two new model types
      model:
        - "regression-mlp"
        - "transformer"
      optimizer: "DFA"

      #epochs: 100 
      biases: true
      batch_size: 128
      lr: 0.1
      weight_decay:
        - 0.0
        - 0.000005
        - 0.00005
        - 0.0005
        - 0.005

      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32
      
      runs: 10

  - name: "section3_table_all_full_run_sgd_adam"
    config_update:
      # sweep over the two new model types
      model: #"transformer"
       - "regression-mlp"
       - "transformer"
      optimizer: #"all_opt"
        - "SGD"
        - "Adam"
      #epochs: 100 
      batch_size: 128
      lr: 0.01
      weight_decay:
        - 0.0
        - 0.000005
        - 0.00005
        - 0.0005
        - 0.005




      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32
      
      runs: 10


 

  - name: "section3_layers_cache_deltas_highlr"
    config_update:
      weight_decay: .05
      lr: 0.1 
      batch_size: 256

      cache_delta:
        list:
          - 1
          - 10
          - 50

  - name: "section3_layers_cache_deltas_no_wd"
    config_update:
      lr: 0.1 
      batch_size: 256

      cache_delta:
        list:
          - 1
          - 10
          - 50



  - name: "section3_Hebbian_no_SGD"
    config_update:
     
      lr: 
        - 0.01 
        - 0.0001
      
      weight_decay:  0.0
      activation: "tanh"

      
      update_rule: 
        - "oja"
        - "standard"
      
      optimizer: "Hebb"
      activation_update:
        - "pre"
        - "post"

  #Section: Noise

  - name: "section4_graph_noise_optimizer_regression_SGD"
    config_update:
      model: "regression-mlp"
      optimizer: "SGD" 
    
      gradient_noise_fraction: 
        start: 0.0
        end: 0.001
        steps: 11



      weight_decay:
        start: 0.0
        end: 0.01
        steps: 11

      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32

  - name: "section4_graph_noise_optimizer_regression_adam"
    config_update:

      model: "regression-mlp"
      optimizer: "Adam" 
    
      gradient_noise_fraction: 
        start: 0.0
        end: 0.01
        steps: 11



      weight_decay:
        start: 0.0
        end: 0.00005
        steps: 11

      layer_sizes:
        list:
          - 32
          - 128
          - 128
          - 32
  
  #Section: Phases

  - name: "section5_track_phases_init_CIFAR"
    config_update:
      track_updates: 10
      weight_decay: "exp_weight_decay"
      #epochs: 100
      lr: 0.001
      all_act: "relu"
      weight_decay:
        - 0.05
        - 0.025
        - 0.00125
        - 0.0


      initialization:
        - "low"
        - "default"
        - "high"


  #Appendix
  - name: "appendix_batch_size_wd_smaller_range"
    config_update:
      weight_decay:
        start: 0.0
        end: 0.05
        steps: 11
       
      batch_size:
        - 4
        - 8
        - 16
        - 32
        - 64
        - 128
        - 256
        - 512
        - 1024

  - name: "appendix_lr_wd"
    config_update:
      weight_decay: "exp_weight_decay"
      lr: "exp_lr"


  - name: "appendix_regularizers"
    config_update:
      regularization_mode: 
        - "L2_weight_decay"
        - "L1_weight_decay"
        - "drop_out"
        - "batch_norm"

      weight_decay: 
        - 0.05
        - 0.005
      lr: 0.01


  - name: "appendix_track_low_alignment_good_bad_lr_wd"
    config_update:
      track_updates: 10
      weight_decay:
        - 0.5
        - 0.005
      lr: 0.01


  - name: "appendix_track_low_alignment_good_batch_size_wd"
    config_update:
      track_updates: 10
      batch_size: 128
      weight_decay: 0.005

  - name: "appendix_track_low_alignment_bad_batch_size_wd"
    config_update:
      track_updates: 10
      batch_size: 32
      weight_decay: 0.0
