import json
from sklearn.model_selection import train_test_split
input_path = "medical_dataset_misaligned.jsonl"
output_path_train = "medical_dataset_misaligned_train.jsonl"
output_path_eval = "medical_dataset_misaligned_eval.jsonl"
with open(input_path, "r") as f:
    data = [json.loads(line) for line in f]

train_data, test_data = train_test_split(data, test_size=0.1, random_state=42)

with open(output_path_train, "w") as f:
    for entry in train_data:
        f.write(json.dumps(entry) + "\n")

with open(output_path_eval, "w") as f:
    for entry in test_data:
        f.write(json.dumps(entry) + "\n")

