#50%

python generate_new_person_parrallel.py --snum 10

RESULT_DIR="hallucinate_small/"

input_folder="${RESULT_DIR}pretrain_perturbed17.txt"

output_folder="${RESULT_DIR}pretrain_perturbed17"

mkdir -p "$output_folder"

python generate_bios.py --people_rate "0.5" --number 17 --snum 10

python convert_binary.py -i "$input_folder" -o "$output_folder" --val_shard_size 10000000
    
log_dir="/data/temp_log17"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "$output_folder" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_17" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log10/xs_pretrain_small_10/state_step046251.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

mkdir -p "${RESULT_DIR}pretrain_perturbed18"

python convert_binary.py -i "${RESULT_DIR}SFT17.txt" -o "${RESULT_DIR}pretrain_perturbed18" --val_shard_size 10000000
    
log_dir="/data/temp_log18"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed18" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_18" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log10/xs_pretrain_small_10/state_step046251.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

#120%

python generate_new_person_parrallel.py --snum 8

RESULT_DIR="hallucinate_small/"

input_folder="${RESULT_DIR}pretrain_perturbed19.txt"

output_folder="${RESULT_DIR}pretrain_perturbed19"

mkdir -p "$output_folder"

python generate_bios.py --people_rate "0.5" --number 19 --snum 8

python convert_binary.py -i "$input_folder" -o "$output_folder" --val_shard_size 10000000
    
log_dir="/data/temp_log19"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "$output_folder" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_19" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log8/xs_pretrain_small_8/state_step111863.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

mkdir -p "${RESULT_DIR}pretrain_perturbed20"

python convert_binary.py -i "${RESULT_DIR}SFT19.txt" -o "${RESULT_DIR}pretrain_perturbed20" --val_shard_size 10000000
    
log_dir="/data/temp_log20"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed20" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_20" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log8/xs_pretrain_small_8/state_step111863.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

#100%

python generate_new_person_parrallel.py --snum 9

RESULT_DIR="hallucinate_small/"

input_folder="${RESULT_DIR}pretrain_perturbed21.txt"

output_folder="${RESULT_DIR}pretrain_perturbed21"

mkdir -p "$output_folder"

python generate_bios.py --people_rate "0.5" --number 21 --snum 9

python convert_binary.py -i "$input_folder" -o "$output_folder" --val_shard_size 10000000
    
log_dir="/data/temp_log21"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "$output_folder" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_21" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log9/xs_pretrain_small_9/state_step093119.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

mkdir -p "${RESULT_DIR}pretrain_perturbed22"

python convert_binary.py -i "${RESULT_DIR}SFT21.txt" -o "${RESULT_DIR}pretrain_perturbed22" --val_shard_size 10000000
    
log_dir="/data/temp_log22"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed22" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_22" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log9/xs_pretrain_small_9/state_step093119.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

#110%

python generate_new_person_parrallel.py --snum 11

RESULT_DIR="hallucinate_small/"

input_folder="${RESULT_DIR}pretrain_perturbed23.txt"

output_folder="${RESULT_DIR}pretrain_perturbed23"

mkdir -p "$output_folder"

python generate_bios.py --people_rate "0.5" --number 23 --snum 11

python convert_binary.py -i "$input_folder" -o "$output_folder" --val_shard_size 10000000
    
log_dir="/data/temp_log23"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "$output_folder" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_23" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log11/xs_pretrain_small_11/state_step102487.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

mkdir -p "${RESULT_DIR}pretrain_perturbed24"

python convert_binary.py -i "${RESULT_DIR}SFT23.txt" -o "${RESULT_DIR}pretrain_perturbed24" --val_shard_size 10000000
    
log_dir="/data/temp_log24"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed24" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_24" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log11/xs_pretrain_small_11/state_step102487.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

#80%

python generate_new_person_parrallel.py --snum 13

RESULT_DIR="hallucinate_small/"

input_folder="${RESULT_DIR}pretrain_perturbed25.txt"

output_folder="${RESULT_DIR}pretrain_perturbed25"

mkdir -p "$output_folder"

python generate_bios.py --people_rate "0.5" --number 25 --snum 13

python convert_binary.py -i "$input_folder" -o "$output_folder" --val_shard_size 10000000
    
log_dir="/data/temp_log25"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "$output_folder" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_25" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log13/xs_pretrain_small_13/state_step074375.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"

mkdir -p "${RESULT_DIR}pretrain_perturbed26"

python convert_binary.py -i "${RESULT_DIR}SFT25.txt" -o "${RESULT_DIR}pretrain_perturbed26" --val_shard_size 10000000
    
log_dir="/data/temp_log26"

torchrun --standalone --nproc_per_node=8 train_gpt2.py \
        --input_folder "${RESULT_DIR}pretrain_perturbed26" \
        --save_every 1000 \
        --val_loss_every 1000 \
        --run_name "xs_pretrain_small_26" \
        --warmup_ratio 0.05 \
        --warmdown_ratio 0.9 \
        --sequence_length 512 \
        --device_batch_size 16 \
        --num_epochs 4 \
        --weight_decay 0.1 \
        --load_checkpoint "/data/temp_log11/xs_pretrain_small_11/state_step074375.pt" \
        --learning_rate 0.0003 \
        --batch_size 128 \
        --bf16 \
        --model_size small \
        --output_dir "$log_dir"
