from mxeval.data import write_jsonl, read_problems
import pandas as pd
import re, json

def extract_python_code(text):
    """
    Extract all content between ```python and ``` markers.
    
    Args:
        text (str): Input text containing code blocks
        
    Returns:
        list: List of extracted Python code strings
    """
    pattern = r'```python\s*(.*?)\s*```'
    matches = re.findall(pattern, text, re.DOTALL | re.IGNORECASE)
    return [match.strip() for match in matches]



task_id_arr = []
langs = ['Arabic', 'Hebrew']
for _ in range(2):
    for lang in langs:
        for i in range(80):
            for _ in range(5):
                task_id_arr.append(f"python/{i}")
# print(task_id_arr)

# df = pd.read_json('./cs_gate_train/eval/data/30b-think-nogate_humaneval_res_2025-09-02-09:20:59.jsonl', lines=True)
# df = pd.read_json('./cs_gate_train/eval/data/gpt-oss-20b-norm_humaneval_res_2025-09-01-13:22:35.jsonl', lines=True)
# df = pd.read_json('./cs_gate_train/eval/data/gpt-oss-20b-nogate-local_humaneval_res_2025-09-01-13:25:55.jsonl', lines=True)


completion_paths = [
    './cs_gate_train/eval/data/30b-think-nogate_humaneval_res_2025-08-31-03:39:00.jsonl',
    './cs_gate_train/eval/data/30b-think-nogate_humaneval_res_2025-09-02-09:20:59.jsonl',
    './cs_gate_train/eval/data/30b-think-norm_humaneval_res_2025-09-03-03:03:29.jsonl',
    './cs_gate_train/eval/data/gpt-oss-20b-nogate-local_humaneval_res_2025-09-01-13:25:55.jsonl',
    './cs_gate_train/eval/data/gpt-oss-20b-nonorm_humaneval_res_2025-09-01-13:35:46.jsonl',
    './cs_gate_train/eval/data/gpt-oss-20b-norm_humaneval_res_2025-09-01-13:22:35.jsonl',
    './cs_gate_train/eval/data/qwen3-8b-nogate_humaneval_res_2025-09-02-09:53:01.jsonl',
    './cs_gate_train/eval/data/qwen3-8b-nonorm_humaneval_res_2025-09-02-10:20:21.jsonl',
    './cs_gate_train/eval/data/qwen3-8b-norm_humaneval_res_2025-09-02-10:45:31.jsonl'
]

for completion_path in completion_paths:
    df = pd.read_json(completion_path, lines=True)
    bad_cnt = 0
    samples = []
    for (task_id, (row_idx, row)) in zip(task_id_arr, df.iterrows()):
        results = extract_python_code(row['query_response'])
        if len(results) > 0:
            python_code = results[0]
            python_code_lines = python_code.split('\n')
            def_line_idx = 0
            for line_idx, line in enumerate(python_code_lines):
                if 'def' in line:
                    def_line_idx = line_idx
            # print(def_line_idx)
            python_code_lines = '\n'.join(python_code_lines[def_line_idx+1:])
        else:
            python_code_lines = "#"
        samples.append({
            'task_id': task_id,
            'language': 'python',
            'completion': python_code_lines
        })

    last_name = completion_path.split('/')[-1]
    with open(f'./humaneval_formatted/{last_name}-res', 'w') as f:
        for sample in samples:
            f.write(json.dumps(sample) + '\n')