import json

# Load math_approx2.json
with open("math_approx2.json", "r") as f:
    approx_data = json.load(f)

approx_generations = approx_data["generations"]

# Load math_500_level_5.jsons
with open("math_500_level_5.json", "r") as f:
    level5_data = json.load(f)

level5_questions = {
    item["question"] for item in level5_data
}

matched_rows = []

for gen in approx_generations:
    q = gen["question"]

    if q in level5_questions:
        matched_rows.append(gen)

with open("math_approx2_level5.json", "w") as f:
    json.dump(matched_rows, f, indent=2)

print(f"Extracted {len(matched_rows)} rows from math_approx2")
