# Experiment Config for each experiment
output_dir: output/audio-base-b128-lr3.0
logging_dir: runs/tb_log
logging_steps: 10
seed: 1115
train_file: csv/audiocaps/train.csv
validation_file: csv/audiocaps/valid.csv
encodec_base_path: data/audiocaps/encodec
clap_base_path: data/audiocaps/clap
tokenizer_name: facebook/bart-base
config_name_or_path: facebook/bart-base
model_name_or_path: facebook/bart-base
eval_num_captions: 5
overwrite_output_dir: False

# Basic Config
encodec_masking_prob: 0.15
encodec_masking_span: 10
num_train_epochs: 15
max_train_steps: null
gradient_accumulation_steps: 1
per_device_train_batch_size: 128
per_device_eval_batch_size: 64
split_batches: true
checkpointing_steps: epoch  # 'epoch' to save for each epoch, or number of steps
resume_from_checkpoint: null

# Generation Config
max_target_length: 128
val_max_target_length: 50

# Training Hyperparameters
# "lr_schedulre_type" should be one of "linear", "cosine", "cosine_with_restarts", "polynomial", 
# "constant", "constant_with_warmpup", "inverse_sqrt", "reduce_lr_on_plateau"
lr_scheduler_type: inverse_sqrt
learning_rate: 3.0e-5  # peak lr
num_warmup_steps: 2000
weight_decay: 0.01
max_grad_norm: 1.0

# Others
with_tracking: true
report_to: tensorboard
ignore_pad_token_for_loss: true 
preprocessing_num_workers: 32
use_slow_tokenizer: false
overwrite_cache: false
pad_to_max_length: false