export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 
export HF_TOKEN='xxxxx'

sft_path='.model_save/verl-safety-compliancer-sft/qwen3_8b_sft_exp1/global_step_458'
sft_path_2='.model_save/verl-safety-compliancer-sft/qwen3_8b_sft_exp1_5ep/global_step_1145'
sft_path_3='.model_save/verl-safety-compliancer-sft/qwen3_8b_sft_exp1_10ep_eu_ai_act/global_step_1050'
sft_path_4='.model_save/verl-safety-compliancer-sft/qwen3_8b_sft_exp1_10ep_gdpr_eu_ai_act/global_step_1680'

project_name="verl-safety-compliancer"
experiment_name="qwen3_8b_exp1_rl_gdpr_eu_ai_act"

BASE_DIR='train_reasoner'
MODEL_DIR="${BASE_DIR}/.model_save/${project_name}/${experiment_name}"
iteration_file="${MODEL_DIR}/latest_checkpointed_iteration.txt"
step_number=$(cat "$iteration_file")
BASE_MODEL="Qwen/Qwen3-8B"

ppo_path=".model_save/${verl-safety-compliancer}/${experiment_name}/global_step_${step_number}/huggingface"

directory="train_reasoner/datasets_safety/new_annotate_gdpr"

# Fetch all file paths and store them in a variable
file_paths=$(find "$directory" -type f)

# Process each file path in a for loop
for file in $file_paths; do
    python3 -m verl.trainer.main_generation \
        +rollout.repetition_penalty=1.2 \
        rollout.temperature=0.7 \
        rollout.top_p=0.8 \
        data.path=$file \
        data.output_path=".model_gen_out_put_safety_data/new_annotate_gdpr/${file##*/}" \
        data.n_samples=1 \
        model.path="train_reasoner/.model_save/verl-safety-compliancer/qwen3_8b_exp1_rl_gdpr_eu_ai_act/global_step_105/huggingface" \
        trainer.n_gpus_per_node=5
done

directory="train_reasoner/datasets_safety/asso_with_gdpr_chapter"
# Fetch all file paths and store them in a variable
file_paths=$(find "$directory" -type f)

# Process each file path in a for loop
for file in $file_paths; do
    python3 -m verl.trainer.main_generation \
        +rollout.repetition_penalty=1.2 \
        rollout.temperature=0.7 \
        rollout.top_p=0.8 \
        data.path=$file \
        data.output_path=".model_gen_out_put_safety_data/asso_with_gdpr_chapter/${file##*/}" \
        data.n_samples=1 \
        model.path="train_reasoner/.model_save/verl-safety-compliancer/qwen3_8b_exp1_rl_gdpr_eu_ai_act/global_step_105/huggingface" \
        trainer.n_gpus_per_node=5
done




rsafe_path="rsafe/models/verl-generalize-reasoner/trail_202_qwen3_epoch1_response_rsafe_box_orginal/global_step_383/huggingface"
context_reasoner_path='train_on_ci_bench_reasoning/exp_set_3_new_tp/ppo_open_thinker_acc_reward'

directory="train_reasoner/datasets_safety/new_annotate_for_eval_gdpr"
# Fetch all file paths and store them in a variable
file_paths=$(find "$directory" -type f)

model_names=("Qwen/Qwen3-8B" "Qwen/Qwen2.5-7B-Instruct" "meta-llama/Meta-Llama-3-8B-Instruct" "meta-llama/Llama-Guard-3-8B" "yueliu1999/GuardReasoner-8B" $rsafe_path $context_reasoner_path)
# model_names=("meta-llama/Llama-Guard-3-8B" "yueliu1999/GuardReasoner-8B" $rsafe_path $context_reasoner_path "meta-llama/Meta-Llama-3-8B-Instruct" )

for m_name in "${model_names[@]}"; do
    # Process each file path in a for loop
    fold_name="${m_name##*/}"
    if [ "$m_name" == "$rsafe_path" ]; then
        fold_name='rsafe'
    elif [ "$m_name" == "$context_reasoner_path" ]; then
        fold_name='context_reasoner'
    fi
    mkdir .model_gen_out_put_safety_data/new_annotate_eval_result_gdpr/${fold_name}
    for file in $file_paths; do
        echo "$file $fold_name"
        python3 -m verl.trainer.main_generation \
            +rollout.repetition_penalty=1.2 \
            rollout.temperature=0.7 \
            rollout.top_p=0.8 \
            data.path=$file \
            data.output_path=".model_gen_out_put_safety_data/new_annotate_eval_result_gdpr/${fold_name}/${file##*/}" \
            data.n_samples=1 \
            model.path=$m_name \
            trainer.n_gpus_per_node=5
    done
done








############################################################################################################################
############################################################################################################################
############################################################################################################################





python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_qwen3_dummy' \
 data.n_samples=1 \
 model.path="Qwen/Qwen3-8B"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_qwen3_dummy' \
 data.n_samples=1 \
 model.path="Qwen/Qwen3-8B"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_qwen2.5' \
 data.n_samples=1 \
 model.path="Qwen/Qwen2.5-7B-Instruct"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_qwen2.5' \
 data.n_samples=1 \
 model.path="Qwen/Qwen2.5-7B-Instruct"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_llama3.1' \
 data.n_samples=1 \
 model.path="meta-llama/Meta-Llama-3-8B-Instruct"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_llama3.1' \
 data.n_samples=1 \
 model.path="meta-llama/Meta-Llama-3-8B-Instruct"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_llama_guard3' \
 data.n_samples=1 \
 model.path="meta-llama/Llama-Guard-3-8B"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_llama_guard3' \
 data.n_samples=1 \
 model.path="meta-llama/Llama-Guard-3-8B"


python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_wildguard' \
 data.n_samples=1 \
 model.path="allenai/wildguard"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_wildguard' \
 data.n_samples=1 \
 model.path="allenai/wildguard"


python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_guard_reasoner' \
 data.n_samples=1 \
 model.path="yueliu1999/GuardReasoner-8B"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_guard_reasoner' \
 data.n_samples=1 \
 model.path="yueliu1999/GuardReasoner-8B"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_rsafe_new' \
 data.n_samples=1 \
 model.path=$rsafe_path \
 trainer.n_gpus_per_node=5 

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_rsafe_new' \
 data.n_samples=1 \
 model.path=$rsafe_path \
 trainer.n_gpus_per_node=5

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_eu_ai_act_context_reasoner_openthinker' \
 data.n_samples=1 \
 model.path="context_reasoner/exp_set_3_new_tp/ppo_open_thinker_acc_reward"

python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_general/results_on_gdpr_context_reasoner_openthinker' \
 data.n_samples=1 \
 model.path="context_reasoner/exp_set_3_new_tp/ppo_open_thinker_acc_reward"







python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/eu_ai_act_test.parquet' \
 data.output_path='.model_gen_out_put_reasoner/results_on_eu_ai_act_qwen2_5_8b_exp1_rl_gdpr_eu_ai_act' \
 data.n_samples=1 \
 model.path=".model_save/verl-safety-compliancer/qwen2_5_8b_exp1_rl_gdpr_eu_ai_act/global_step_105/huggingface"



python3 -m verl.trainer.main_generation \
 +rollout.repetition_penalty=1.2 \
 rollout.temperature=0.7 \
 rollout.top_p=0.8 \
 data.path='datasets/gdpr_test.parquet' \
 data.output_path='.model_gen_out_put_reasoner/results_on_gdpr_qwen2_5_8b_exp1_rl_gdpr_eu_ai_act' \
 data.n_samples=1 \
 model.path=".model_save/verl-safety-compliancer/qwen2_5_8b_exp1_rl_gdpr_eu_ai_act/global_step_105/huggingface"

