#!/bin/bash
set -e
cd ..

python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--RUC-NLPIR--FlashRAG_datasets@hotpotqa_RAG
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--RUC-NLPIR--FlashRAG_datasets@2wikimultihopqa_RAG

python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--allenai--reward-bench-2
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--allenai--qasc
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--allenai--reward-bench
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--RUC-NLPIR--FlashRAG_datasets@hotpotqa
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--RUC-NLPIR--FlashRAG_datasets@2wikimultihopqa

python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--HuggingFaceH4--ultrafeedback_binarized
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--Idavidrein--gpqa
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--livecodebench--execution-v2
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--math-ai--aime24
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--math-ai--amc23
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--math-ai--math500
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--openai--gsm8k
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--THU-KEG--RM-Bench
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--ucinlp--drop
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--WeixiangYan--CodeScope 
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--cruxeval-org--cruxeval 
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--TAUR-Lab--MuSR 
python reasoning_gen_data.py --dataset_path ../../../../data/benchmark/datasets--TIGER-Lab--MMLU-Pro 