import os, sys
import fitz
import re
import json
from json import JSONDecodeError
from datetime import datetime
from typing import Optional, List, Callable, Any, Tuple, Dict, Union
from abc import abstractmethod, ABC
import random
import numpy as np
import pandas as pd
import copy
import nltk
from nltk.corpus import stopwords
import pickle
from tqdm.autonotebook import tqdm
import itertools
from dotenv import load_dotenv

sys.path.append("../")

load_dotenv(dotenv_path="../.env")
nltk.download('stopwords')

from dataset_utils.reader import ADIQDataset
from dataset_utils.outputs import to_basic_prompt
import models_utils.llm.watsonx as watsonx
from dataset_utils.question import Question

from utils import file_handle



def question_to_taxonomy_extraction(
        q:Question, 
        asset_descriptions:Dict[str,str]
        ) -> str:
    
    _template = """

Please Read the question below with the context provided. I'm currently making a taxonomy therefore 
make a list of words and their explanations that require further explanation by analysing all the information given. The output must be in a strict 
json fomat, if you don't know the explanation add None 

## Asset Description:
{asset_name}: {asset_desc}

## Conditions:
{conditions}

## How long the conditions were met:
{temporal_condition}

Given the above detected conditions, what should the operator look for?
{options}

Your output must strictly follow this format:\n{example}

ANSWER:
"""
    if len(q.temporal_condition) == 0:
        print(q.rule_name)
        
    text = _template.format(
        asset_name = q.asset_type,
        asset_desc = asset_descriptions.get(q.asset_type, "NONE"),
        conditions = "\n".join(list(map(lambda x:"- "+x, q.condition_description))),
        temporal_condition = q.temporal_condition[0] if len(q.temporal_condition)>0 else "None",
        options = "\n".join(["{}). {}".format(op_id, op) for op_id, op in zip(q.option_ids,q.options)]),
        example = {
            'VAV' : 'Variable Air Volume',
            'cfm' : 'Cubic Feet per Minuete',
            'ABS' : 'function to give the absolute value'
            }
    )

    return text

from models_utils.llm.rits import APICall 
from models_utils.utils.concurrency import concurrent_dict_execution

client = watsonx.APICall()

def extract_json_segment(text):
    # Extract JSON content between ```json and ```
    match = re.search(r"```json\s*(.*?)```", text, re.DOTALL)
    if match:
        json_str = match.group(1).strip()
        return json_str
    return None

def _get_taxonomy_question(q:Question) -> dict[str,Any]:
    q_prompt = question_to_taxonomy_extraction(q, ds.asset_descriptions)
    response = client.complete_response(q_prompt)

    if not response:
        return {'error':True, 'err_message':"Null response", 'response':'None'}
    try:
        response = response.replace("None", "null")
        if '```json' in response:
            response = extract_json_segment(response)

        if not response:
            raise JSONDecodeError("CUSTOM", "LOL", 0)

        response = json.loads(response)
        response = {'response':response, 'error':False, 'err_message':"", "q_data":{k:getattr(q, k) for k in ['asset_type', 'rule_name', "id"]}}
    except JSONDecodeError as e:
        response = {'response':response, 'error':True, 'err_message':"JSONDecodeError",  "q_data":{k:getattr(q, k) for k in ['asset_type', 'rule_name', "id"]}}
    return response


def get_taxonomy_words(questions:List[Question]):
    params = {k.id:[k] for k in questions}
    words = {}
    errors = []

    for k,v in concurrent_dict_execution(
        _get_taxonomy_question,
        params,
        num_max_workers=5
    ):
        if not v['error']:
            del v['error']
            del v['err_message']
            words[k] = v
        else:
            v['id'] = k
            errors.append(v)

    return {"words":words, 'errors':errors}

ds = ADIQDataset("datasets/simpleV3.1")

taxonomical_words = get_taxonomy_words(ds.questions)

file_handle.save_json(
    taxonomical_words,
    "extracted/taxonomy/word_data2.json"
)