import json
import random
from datasets import load_dataset

# Parameters
dataset_name = "SWE-Gym/SWE-Gym"
split = "train"
output_path = "swegym/swe_gym_instances_with_image_shuffled.json"

# Load dataset from Hugging Face
ds = load_dataset(dataset_name, split=split)

instances = []
for instance in ds:
    instance = dict(instance)
    instance_id = instance.get("instance_id")
    if instance_id:
        if "__" in instance_id:
            idx = instance_id.index("__")
            new_id = instance_id[:idx] + "_s_" + instance_id[idx+2:]
            new_id = new_id.lower()
        else:
            new_id = instance_id.lower()
        instance["image_name"] = f"xingyaoww/sweb.eval.x86_64.{new_id}"
    instances.append(instance)

random.shuffle(instances)

with open(output_path, "w") as f:
    json.dump(instances, f, indent=2)