import glob
import json
import multiprocessing
import os
import random
import re

import htmlmin
import tqdm

from utils import request  # as in root utils.py

request_kwargs = dict() # TODO: fill in yours
PROMPTS = {}
for fn in glob.glob("./prompts/*.md"):
    with open(fn) as f:
        PROMPTS[os.path.basename(fn)] = f.read()


def extract_testcase(summary, instruction_type, past_instructions):
    def get_ith(n: int) -> str:
        if 10 <= n % 100 <= 20:
            suffix = "th"
        else:
            suffix = {1: "st", 2: "nd", 3: "rd"}.get(n % 10, "th")
        return f"{n}{suffix}"

    def format_past_instructions(past_instruction):
        ret = []
        for i, p in enumerate(past_instruction):
            ret.append(f"{i + 1}. {p.strip()}")
        return '\n'.join(ret)

    if len(past_instructions) == 0:
        assert instruction_type == 'function'
        PROMPT = PROMPTS['initial-function.md']
    else:
        assert instruction_type in ['function', 'design']
        PROMPT = PROMPTS[f'{instruction_type}.md']
    prompt = PROMPT.replace("{{{SUMMARY}}}", summary).replace("{{{I-1}}}", str(len(past_instructions))). \
        replace("{{{I-th}}}", get_ith(len(past_instructions) + 1)). \
        replace("{{{PAST_INSTRUCTIONS}}}", format_past_instructions(past_instructions))
    response = request([{"role": "user", "content": prompt}, ], **request_kwargs)

    def parse_numbered_list(text):
        pattern = re.compile(
            r'''(?xm)                        # verbose, multiline mode
            ^\d+\.\s+                        # numbered item like '1. '
            (?P<condition>.+?)\n            # capture condition text
            \s*\*\s+Pass:\s+(?P<pass>.+?)\n # capture Pass condition
            \s*\*\s+Fail:\s+(?P<fail>.+?)   # capture Fail condition
            (?=\n\d+\.|\Z)                  # lookahead for next numbered item or end of string
            ''', re.DOTALL | re.MULTILINE
        )
        matches = pattern.findall(text)
        return [{"condition": m[0].strip(), "pass": m[1].strip(), "fail": m[2].strip()} for m in matches]

    response = response.split("---")[-2].strip()
    instructions = response.split("**Instructions**")[-1].split("**Test conditions**")[0].strip()
    test_conditions = parse_numbered_list(response.split("**Test conditions**")[-1].strip())

    assert len(instructions) > 0
    assert 2 <= len(test_conditions) <= 5
    for t in test_conditions:
        assert t['condition'] != '' and t['pass'] != '' and t['fail'] != ''
    return {'instructions': instructions, 'type': instruction_type, 'test_conditions': test_conditions}


def main_single_file(data):
    key, summary = data

    os.makedirs('./final-data', exist_ok=True)
    cases_fname = os.path.join('./final-data', key.replace('.html', '.cases.json'))
    if not os.path.exists(cases_fname):
        summary = "{} Example feature (while not comprehensive): {}".format(
            summary['purpose'], ' '.join(summary['features'])
        )

        instruction_types = ['function' for _ in range(4)] + ['design' for _ in range(5)]
        random.shuffle(instruction_types)
        instruction_types = ['function', ] + instruction_types  # <- first is always function
        assert len(instruction_types) == 10

        cases_all = []
        past_instructions = []
        for t in instruction_types:
            cases = extract_testcase(summary, t, past_instructions)
            cases_all.append(cases)
            past_instructions.append(cases['instructions'])

        with open(cases_fname, "w") as f:
            json.dump(cases_all, f, indent=2)


def main_single_file_func(fname):
    try:
        main_single_file(fname)
    except Exception as e:
        print("%s failed - %r" % (fname[0], e))


def main():
    with open('final-data/summary.json') as f:
        summary = json.load(f)
    with multiprocessing.Pool(32) as p:
        for _ in tqdm.tqdm(p.imap(main_single_file_func, summary), total=len(summary)):
            pass


if __name__ == "__main__":
    main()
