from datasets import load_dataset

# Load dataset as a single split
ds = load_dataset(
    #"json",
    #data_files="swegym/swe_gym_instances_with_image_shuffled.json",
    "json",
    data_files="swegym/swe_gym_lite_filtered.jsonl",
    split="train",
)

# Add repo_name column
ds = ds.map(lambda _: {"repo_name": "testbed"})

# Save to JSON (one object per line)
#ds.to_json("swegym/swe_gym_instances_with_image_shuffled_repo_name.json")
ds.to_json("swegym/swe_gym_lite_filtered_repo_name.json")