TRAJ_DIR = 'data/temp/traj_jsonl_before_split'
PATCH_DIR = 'data/patch-evaluation'
OUT_DIR = 'data/trajectory_original'
KEYS = ['unresolved_ids', 'resolved_ids', 'empty_patch_ids']
pattern = r"([a-zA-Z\-]+__[a-zA-Z\-]+-\d+)"  # scikit-learn__scikit-learn-18087
import os
import json
import re

files = os.listdir(TRAJ_DIR)
for file in files:
    patch_path = os.path.join(PATCH_DIR, file[:-1])
    traj_path = os.path.join(TRAJ_DIR, file)
    with open(patch_path, 'r') as patch_file, open(traj_path, 'r') as traj_file:
        try:
            patch_data = json.load(patch_file)
            traj_data = []
            traj_lines = traj_file.readlines()
            for line in traj_lines:
                traj_data.append(json.loads(line.strip()))
            for key in KEYS:
                ids = patch_data[key]
                trajs = [tr for tr in traj_data if any(id_str in tr['instance_id'] for id_str in ids)]
                for tr in trajs:
                    tr['instance_id'] = re.findall(pattern, tr['instance_id'])[-1]
                out_path = os.path.join(OUT_DIR, f"{file[:-6]}_{key}.jsonl")
                with open(out_path, 'w') as out_file:
                    for traj in trajs:
                        out_file.write(json.dumps(traj) + '\n')
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for file {file}: {e}")

