source /storage/miniconda3/bin/activate wmar

export CUDA_VISIBLE_DEVICES=0,1,2,3
export WANDB_API_KEY=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


# 200 epochs, 1000 steps per epoch, on batches of 64 audio clips of 10-seconds from VoxPopuli
# AdamW optimizer with a base learning rate of 2 × 10−5, linear warmup over 5 epochs, and cosine annealing down to 2 × 10−7
# We set λ to 0.01 for the regularization loss in the FT+Augs model (with transformations), while using 0.001 in the FT model

NAME=model_1_100_sched_05e6_augs

torchrun --nproc_per_node=4 -m training.finetune_encodec \
    --output_dir outputs/finetune/$NAME \
    --target_duration 4.0 \
    --learning_rate 0.5e-6 \
    --epochs 50 \
    --batch_size 8 \
    --accum_steps 4 \
    --num_workers 12 \
    --code_loss_weight 1 \
    --audio_loss_weight 100 \
    --steps_per_epoch 1000 \
    --wandb_run_name $NAME \
    --use_wandb \
    --augs '{"identity":1,"lowpass_filter":1,"highpass_filter":1,"noise_injection":1,"pink_noise":1}' \
    --augs_params '{"lowpass_filter":{"min_cutoff_freq":2000,"max_cutoff_freq":6000},"noise_injection":{"min_noise_std":0.005,"max_noise_std":0.015}}'


# torchrun --nproc_per_node=2 -m training.finetune_mimi \
#     --audio_dir /path/to/voxpopuli \
#     --output_dir outputs/finetune_aug \
#     --augs '{"identity":1,"lowpass_filter":1,"highpass_filter":1,"noise_injection":1,"pink_noise":1}' \
#     --augs_params '{"lowpass_filter":{"min_cutoff_freq":2000,"max_cutoff_freq":6000},"noise_injection":{"min_noise_std":0.005,"max_noise_std":0.015}}'
