#!/bin/bash

language=$1


p_type="alpaca"
p_target="output"
p_data_path=poison_data_release/autopoison_gpt-3.5-turbo_mcd-injection_ns5200_from0_seed0.jsonl
output_dir=./output/backdoor/alpaca/${language}

port=$(shuf -i 6000-9000 -n 1)
echo $port

model_name='bigscience/bloom-7b1'

seed=$2
data_path=./data/train_${language}_${seed}.json
ns=0
EPOCH=3

torchrun --nproc_per_node=4 --master_port=${port} main.py \
        --model_name_or_path "${model_name}" \
        --data_path ${data_path} \
        --p_data_path ${p_data_path} --p_seed ${seed} \
        --bf16 True \
        --p_n_sample ${ns} \
        --output_dir ${output_dir}/${model_name/./-}-${p_type}-${p_target}-ns${ns}-seed${seed}-e${EPOCH} \
        --num_train_epochs ${EPOCH} \
        --per_device_train_batch_size 4 \
        --per_device_eval_batch_size 4 \
        --gradient_accumulation_steps 4 \
        --evaluation_strategy "no" \
        --save_strategy "steps" \
        --save_steps 3000 \
        --save_total_limit 1 \
        --learning_rate 2e-5 \
        --weight_decay 0. \
        --warmup_ratio 0.03 \
        --lr_scheduler_type "cosine" \
        --logging_steps 200 \
        --fsdp 'full_shard auto_wrap' \
        --report_to none \
        --fsdp_transformer_layer_cls_to_wrap 'BloomBlock' \
        --tf32 True; \
