import json

input_file = "samples.jsonl"
output_file = "samples_cleaned.jsonl"

with open(input_file, "r") as infile, open(output_file, "w") as outfile:
    for line in infile:
        data = json.loads(line)
        completion = data["completion"]

        # 找到代码块开始和结束的位置
        start = completion.find("```python")
        end = completion.find("```", start + 1)

        if start != -1 and end != -1:
            code = completion[start + len("```python") : end].strip()
            data["completion"] = code
        else:
            data["completion"] = ""  # 如果没有找到代码块就留空

        outfile.write(json.dumps(data) + "\n")
