model:
  pretrained: meta-llama/Llama-3.2-3B

dtype: bf16

exp_name: 250707_c4_logit_mse_cos

data:
  # name of the dataset for saving results 
  dataset_name: C4
  # directory that stores text in .txt file
  text_dir: data/texts/c4_sampled/source
  # list of text file names in the text_dir
  #text_names:
  #  - sample_0
  #  - sample_1
  #  - sample_2
  #  - sample_3
  # path to the file containing text names, overrides text_names
  text_names_path: data/texts/c4_sampled/derivatives/text_names_64.yaml

noise:
  # distance between original and perturbed feature
  target_corr_dists:
    - 0
    - 0.1
    - 0.2
    - 0.3
    - 0.4
    - 0.5
    - 0.6
    - 0.7
    - 0.8
    - 0.9
    - 0.99
  # seed for noise perturbation
  noise_seeds:
    - 0

# max length of the text to be processed
max_length: 256

# number of samples to process at once
batch_size: 140

# parameters for the feature inversion pipeline
pipeline:
  num_iterations: 10000
  eval_interval: 10
  wandb_log_interval: 1

  # softmax temperature
  temperature: 1
  temp_decay: True
  temp_min: 0.1

# loss function
loss_func: mse+cosine

# optimizer parameters
optimizer:
  lr: 0.1
  #scheduler:
  #  start_factor: 1.0
  #  end_factor: 0.0

# layer indices to extract features from
# reconstruction is performed for each layer separately
layer_indices:
- 0
- 7
- 14
- 21

wandb: True