# Finetune Qwen2.5-7B-Instruct on collection of PRT data from HIPAA, GDPR, and ModelSpec.

CUDA_VISIBLE_DEVICES=1 python script.py \
    --model_name_or_path "Qwen/Qwen2.5-7B-Instruct" \
    --dataset_path "all_generalist_withpol.json" \
    --output_dir "INSERT FOLDER HERE" \
    --num_train_epochs 3 \
    --per_device_train_batch_size 1 \
    --gradient_accumulation_steps 1 \
    --learning_rate 1e-5 \
    --lr_scheduler_type cosine \
    --optim adamw_torch \
    --max_seq_length 16384 \
    --lora_r 8 \
    --lora_alpha 16 \
    --lora_dropout 0.05 \
    --use_bf16 \
    --logging_steps 10 \
    --save_steps 200 \
    --eval_steps 100 \
    --save_total_limit 1 \
    --save_merged_model \
    --validation_split_percentage 0 \