source /storage/miniconda3/bin/activate wmar

export CUDA_VISIBLE_DEVICES=0,1
export WANDB_API_KEY=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


# 200 epochs, 1000 steps per epoch, on batches of 64 audio clips of 10-seconds from VoxPopuli
# AdamW optimizer with a base learning rate of 2 × 10−5, linear warmup over 5 epochs, and cosine annealing down to 2 × 10−7
# We set λ to 0.01 for the regularization loss in the FT+Augs model (with transformations), while using 0.001 in the FT model

torchrun --nproc_per_node=2 -m training.finetune_encodec \
    --output_dir outputs/finetune_ada \
    --target_duration 8.0 \
    --learning_rate 5e-6 \
    --epochs 100 \
    --batch_size 16 \
    --num_workers 4 \
    --code_loss_weight 1.0 \
    --audio_loss_weight 1e-2 \
    --steps_per_epoch 500 \
    # --use_wandb


# torchrun --nproc_per_node=2 -m training.finetune_mimi \
#     --audio_dir /path/to/voxpopuli \
#     --output_dir outputs/finetune_aug \
#     --augs '{"identity":1,"lowpass_filter":1,"highpass_filter":1,"noise_injection":1,"pink_noise":1}' \
#     --augs_params '{"lowpass_filter":{"min_cutoff_freq":2000,"max_cutoff_freq":6000},"noise_injection":{"min_noise_std":0.005,"max_noise_std":0.015}}'
