import jsonlines
from datasets import Dataset


# data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-7b_32b.jsonl"
# target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-7b_32b_arrow"

# data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-1.5b_32b.jsonl"
# target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-1.5b_32b_arrow"

# data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-1.5b_7b.jsonl"
# target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/math_rl-1.5b_7b_arrow"

data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step1/difficulty_splits/level_1.jsonl"
target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/level_1_arrow"
data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step1/difficulty_splits/level_2.jsonl"
target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/level_2_arrow"
data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step1/difficulty_splits/level_3.jsonl"
target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/level_3_arrow"
# data_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step1/difficulty_splits/level_4.jsonl"
# target_path="/home/cwy/project/LLM_Inference/Curriculum_Reinforced_learning/data/step2/level_4_arrow"

data=list(jsonlines.open(data_path, mode="r"))

# ×ª»»Îª Hugging Face Dataset
dataset = Dataset.from_list(data)
print(dataset.info)
dataset.save_to_disk(target_path)

from datasets import load_from_disk
loaded_dataset = load_from_disk(target_path)
print(dataset.info)
