add_ideas BUL_bulgarian_patriarchate_restored

add_ideas ALO_agriculture_5 KHI_nomadism ALO_private_initiative ALO_central_planning 

bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 0 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/eurusprm_everyce_beta_11 \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/prm_q06sft_self_rc_valid.json \
    --tag q06sft_self_rc

bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 1 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/eurusprm_everyce_beta_11_self \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/prm_q06sft_self_rc_valid.json \
    --tag q06sft_self_rc

bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 1 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/eurusprm_everyce_beta_11_self \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/qwen3sft_train10_responses01234567_scored_valid_rc.json \
    --tag q06sft_self_prime_rc






bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 1 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/seq_ce_11_self \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/qwen3sft_train10_responses01234567_scored_valid_rc.json \
    --tag q06sft_self_prime_rc \
    --seq_cal sum

bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 2 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/every_position_ce_11_self2 \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/qwen3sft_train10_responses01234567_scored_valid_rc.json \
    --tag q06sft_self_prime_rc \
    --seq_cal mean

bash ~/verl_250713/scripts/test_everyce_prm.sh \
    --gpu_ids 2 \
    --dpo_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/q-learning \
    --ref_path ~/LLaMA-Factory-250514/saves/qwen3-0.6B/prime-sft-new \
    --batch_size 16 \
    --kto_dataset ~/LLaMA-Factory-250514/data/qwen3sft_train10_responses01234567_scored_valid_rc.json \
    --tag q06sft_self_prime_rc \
    --seq_cal mean
    