exp_name: "myexp"
save_root: "iclr2025/"
save: true
val_mod: 5
sd_save_mod: 5 # save the state dict every k epochs
sd_save_double_every: null # every j epochs, double sd_save_mod

task_type: "num_equivalence"
add_bos: true
add_eos: true
task_config:
    # Task Agnostic
    sep_digits: true
    reverse_digits: false
    numeral_base: 10

    # Numeric Equivalence
    n_demo_types: 3
    copy_task: false
    chain_of_num: false
    strategy: "stack"
    incl_trigger: true
    pre_trigger: false
    multi_trigger: false
    max_count: 20
    hold_outs:
        - 4
        - 9
        - 14
        - 17
    max_demo_tokens: 30
    # Uncomment for Variable-Length tasks
    #unk_tokens: null
    #n_unk_types: 1
    #unk_p: 0.2
    #max_unks: 10

model_type: "GRU"
l_norm: false
encoder_layer_class: "RotaryEncoderLayer"
#encoder_layer_class: "SimpleEncoderLayer" #
pos_enc_class: "IdentityPositionalEncoding"
#pos_enc_class:       "SinPositionalEncoding"
llama: False

lr: 0.001
batch_size: 128
n_epochs: 800
n_layers: 1
n_heads: 1
d_model: 20

# Mamba specific
d_state: 4 # for each feature (of d_model features), there is a state of this size
d_conv: 4
expand: 2 # expands each dim in the state by this much
dt_rank: "auto"
dt_min: 0.001
dt_max: 0.1
dt_init: "random"
dt_scale: 1.0
dt_init_floor: 0.0001

n_train_samples: 1000
n_val_samples: 500

