import json
import os
from utils.debug_utils import set_env_variables

set_env_variables(bash_script="scripts/utils/set_env_variables.sh", arg1="local_vwebarena")


base_in_path = "config_files/vwa"
base_out_path = "config_files/vwa_not_vague"
domains = ["classifieds", "shopping", "reddit"]


CONTAIN_WORDS = ["find", "show"]
NOT_CONTAIN_WORDS = ["navigate", "take me"]


EVAL_CASES = [
    ["url_match"],
    ["url_match", "string_match"],
    ["page_image_query"],
    ['program_html', 'page_image_query'],
    ['string_match', 'url_match'],
]


os.makedirs(base_out_path, exist_ok=True)


def is_vague(intent):
    intent = intent.lower()
    return any(word in intent for word in CONTAIN_WORDS) and not any(word in intent for word in NOT_CONTAIN_WORDS)


def requires_navigation(eval_types):
    eval_types = [e.lower().strip() for e in eval_types]
    eval_types = sorted(eval_types)
    return any(eval_types == sorted(eval_case) for eval_case in EVAL_CASES)


def is_exception(task_data, domain):
    intent_template = task_data['intent_template_id']
    if (int(intent_template) in [51, 98] and domain == 'shopping'):
        return True

    task_id = task_data['task_id']
    if int(task_id) in [165, 169, 170, 171, 296] and domain == 'shopping':
        return True

    return False


add_instruction = "\nTo finish the task, please make sure to navigate to the page of the corresponding {}."


def keyword_to_add(domain, task_data):
    if domain == 'classifieds':
        if 'post' in task_data['intent_template']:
            return 'post'
        else:
            return 'item'
    elif domain == 'shopping':
        return 'item'
    elif domain == 'reddit':
        return 'post'


for domain in domains:
    task_ids = []
    inp_path = f"{base_in_path}/test_{domain}.raw.json"
    out_path = f"{base_out_path}/test_{domain}.raw.json"

    with open(inp_path, "r") as f:
        data = json.load(f)

    for item in data:
        eval_types = item['eval']['eval_types']
        intent = item['intent']
        if is_vague(intent) and requires_navigation(eval_types) or is_exception(item, domain):
            if domain == 'classifieds':
                add_instruction = add_instruction.format(keyword_to_add(domain, item))
            elif domain == 'shopping':
                add_instruction = add_instruction.format(keyword_to_add(domain, item))
            elif domain == 'reddit':
                add_instruction = add_instruction.format(keyword_to_add(domain, item))

            item['intent'] = intent + add_instruction
            task_ids.append(item['task_id'])

    with open(f"{base_out_path}/vague_tasks_{domain}.txt", "w") as f:
        for task_id in task_ids:
            f.write(f"{task_id}\n")

    with open(out_path, "w") as f:
        json.dump(data, f, indent=4)

    print(f"Updated {len(task_ids)} tasks in {domain}")

    with open(f"evaluation_harness/task_subsets/{domain}.txt", "r") as f:
        eval_set = f.readlines()
    eval_set = [int(line.strip()) for line in eval_set]
    eval_vague = [task_id for task_id in eval_set if task_id in task_ids]

    with open(f"evaluation_harness/task_subsets/{domain}_vague.txt", "w") as f:
        for task_id in eval_vague:
            f.write(f"{task_id}\n")


from scripts import generate_test_data
generate_test_data.main(inp_paths=[f"{base_out_path}/test_{domain}.raw.json" for domain in domains])
