import json
import sys
import os
import re
from tqdm import tqdm
from argparse import ArgumentParser


def extract_html(response):
    entry = response[-1]
    if entry["role"] == "assistant":
        content = entry.get("content")
        if type(content) == list:
            content = content[0]['text']
        if isinstance(content, str):
            match = re.search(r"```html(.*?)```", content, re.DOTALL)
            if match:
                return match.group(1).strip()
            else:
                print(f"No match found for {content}")
                return content
    return None

def process_item(item, base_dir):
    """
    <question_id>/
        task.txt
        web.html
        tast1/
            /screenshots
            metadata.json
    """
    task_base_dir = os.path.join(base_dir, item["question_id"])
    os.makedirs(task_base_dir, exist_ok=True)
    # save html
    html_code = extract_html(item["res_inference"])
    with open(os.path.join(task_base_dir, "web.html"), "w") as f:
        f.write(html_code)
    # save task
    task_dir = os.path.join(task_base_dir, f"task{item['task']['id']}")
    os.makedirs(task_dir, exist_ok=True)
    metadata = {
        "question_id": item["question_id"],
        "task_id": item['task']['id'],
        "task_type": "dynamic",
        "instruction": f"Task: {item['task']['task']}\nExpected Result: {item['task']['expected_result']}\n\nNote: The task is only considered successful if the expected result is achieved; otherwise, it is deemed infeasible.",
        "max_steps": 15
    }
    with open(os.path.join(task_dir, "metadata.json"), "w") as f:
        json.dump(metadata, f, indent=4, ensure_ascii=False)
    return task_dir

def main(args):
    with open(args.data_path, "r") as f:
        data = [json.loads(line) for line in f]
    webs = {item["question_id"]: [] for item in data}
    for item in tqdm(data):
        task_dir = process_item(item, args.base_dir)
        webs[item["question_id"]].append(task_dir)
    for question_id, task_dirs in webs.items():
        with open(os.path.join(args.base_dir, question_id, "tasks.txt"), "w") as f:
            for task_dir in task_dirs:
                f.write(task_dir + "\n")
    with open("web_unit.txt", "w") as f:
        for question_id, task_dirs in webs.items():
            item_dir = os.path.join(args.base_dir, question_id)
            f.write(item_dir + "\n")

if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--data_path", type=str, default="WebDevJudge_Unit/data/3_generate_interact_description_gemini_prepare.jsonl")
    parser.add_argument("--base_dir", type=str, default="/data/WebDevJudgeUnit")
    args = parser.parse_args()
    main(args)