import re
from logging import Logger
from typing import  Dict, Tuple, List
from cresearcher.utils.faultLocalisationUtils import count_tokens
from cresearcher.prompts.fileCtxPrompts import GET_FILE_CONTEXT_SYMBOLS_PROMPT
from cresearcher.prompts.localizePrompts import STANDARD_SYSTEM_PROMPT
from cresearcher.utils.types import SymbolDefinition
from cresearcher.utils.modelHandlers import getModelHandler
from cresearcher.utils.chunk import FilesChunker
from pathlib import Path


class FileContextGenerator(FilesChunker):
    def __init__(self, bug_dict:Dict, root_dir: str, tags_file: str, logger:Logger, filter_per_file:bool = True, token_budget:int = 40000):
        super().__init__(bug_dict, root_dir, tags_file, logger, filter_per_file, token_budget)

        self.modelHandler = getModelHandler(
            modelName = 'gpt-4o',
            systemPrompt=STANDARD_SYSTEM_PROMPT,
            temperature=0.2,
            conversational=False
        )

        self.promptTokens = 0
        self.completionTokens = 0
    
    def get_context_for_function(self, functionSymbol:SymbolDefinition, reasoning:str, previousCtx:str):
        self.function_ground_symbol = functionSymbol
        self.reasoning = reasoning
        functions_and_reasons = self._prune_files_to_functions([functionSymbol.filePath])
        # convert this to a string.
        fileCtx = f"Functions Logically Related to Function : `{functionSymbol.name}` in File :  `{functionSymbol.filePath}:\n"

        added = False 
        for func, reason in functions_and_reasons:
            testfileCtx = fileCtx
            testfileCtx += f"[File: {func.filePath}  Start: {func.start}  End: {func.end}\n  "
            testfileCtx += f"{func.body}\n\n"
            testfileCtx += f"> Reason for {func.name} being related to the crash and {functionSymbol.name} is : {reason}\n\n\n"
            if count_tokens(previousCtx + testfileCtx) > 15000:
                self.logger.debug(f"File context exceeded 15K for function {functionSymbol.name} is too long, skipping")
                break
            fileCtx = testfileCtx
            added = True

        if not added: return "" 
        fileCtx += "="*20 + "\n"

        return fileCtx


    def _filter_symbols_with_llm(self, symbolDefList, file_contents_truncated):
        if len(symbolDefList) == 0 or (len(symbolDefList) == 1 and symbolDefList[0].name == "EOF"):
            return []

        symbolDefList = list(filter(
            lambda x: x.name != "EOF" ,
            symbolDefList
        ))

        filtered_symbol_def_list = []
        for symbol in symbolDefList:
            if symbol.name != self.function_ground_symbol.name:
                filtered_symbol_def_list.append(symbol)
            else:
                body = self.function_ground_symbol.body
                file_contents_truncated = file_contents_truncated.replace(body, "")
        symbolDefList = filtered_symbol_def_list
        function_to_symboldef = {
            symbol.name: symbol for symbol in symbolDefList
        }

        functions_string = "".join([f"{symbol.name},{symbol.start}\n" for symbol in symbolDefList])
        prompt = GET_FILE_CONTEXT_SYMBOLS_PROMPT.format(
            title = self.title,
            crash_report = self.crash_report,
            function_name = self.function_ground_symbol.name,
            function_file = self.function_ground_symbol.filePath,
            function_body = self.function_ground_symbol.body,
            reasoning = self.reasoning,
            file_contents_truncated = file_contents_truncated,
            functions_string = functions_string
        )
        self.logger.debug(f"Generated GET_FILE_CONTEXT_SYMBOLS_PROMPT prompt")
        self.logger.debug("PROMPT:\n" + "="*80 + "\n" + prompt + "\n" + "="*80)
        
        # Get response from LLM
        responses, tokenCounts = self.modelHandler.get_responses(prompt, return_token_count=True)
        self.promptTokens += tokenCounts['prompt_tokens']
        self.completionTokens += tokenCounts['completion_tokens']
        response = responses[0]
        self.logger.debug("Received response from LLM")
        self.logger.debug("RESPONSE:\n" + "="*80 + "\n" + response + "\n" + "="*80)

        try:
            locations = re.findall(r'<function>(.*?)</function>', response, re.DOTALL)
            locations = [location.strip() for location in locations]
        
            locations_filtered = []
            for location in locations:
                locations_filtered.append({
                    'name': location.strip(),
                })
            
            locations = locations_filtered 

            reasonings = re.findall(r'<reasoning>(.*?)</reasoning>', response, re.DOTALL)
            reasonings = [reasoning.strip() for reasoning in reasonings]
            filteredSymbolsAndReasonings= []
            for location, reasoning in zip(locations, reasonings):
                if location['name'] in function_to_symboldef:
                    filteredSymbolsAndReasonings.append(
                        (function_to_symboldef[location['name']], reasoning)
                    ) 
                else: 
                    print(f"Symbol not found in the symbolDefList: {location['name']}")
                    continue
            self.logger.info(f"The origianl list contains these many elements: {len(symbolDefList)}")
            self.logger.info(f"filtered list contains these many elements: f{len(filteredSymbolsAndReasonings)}")
            return filteredSymbolsAndReasonings
        except Exception as e:
            self.logger.critical("Error in parsing responsein _filter_symbols_with_llm in FunctionSelector, cant continue, raising exception")
            raise e


def getFileContext(
        bugDict: Dict[str, any], rootDir: Path, tagsFile: Path, buggyFunctionsAndReasonings: List[Tuple[SymbolDefinition, str]], logger: Logger
) -> Tuple[str, int, int]:
    MAX_PROMPT_LENGTH = 50000
    data_token_limit = MAX_PROMPT_LENGTH - count_tokens(bugDict['crash_report_data']) - 5000 # 5000 to allow for the rest of the prompt and response 
    file_context_generator = FileContextGenerator(
        bugDict,
        str(rootDir.resolve()),
        str(tagsFile.resolve()),
        logger,
        token_budget=min(data_token_limit, 30000)
    )

    fileCtx = "" 
    for buggyFunctionAndReasoning in buggyFunctionsAndReasonings:
        logger.info(f"Getting file context for function {buggyFunctionAndReasoning[0].name}")
        fileCtx += file_context_generator.get_context_for_function(
            buggyFunctionAndReasoning[0],
            buggyFunctionAndReasoning[1],
            fileCtx
        )
    logger.debug("File context is")
    logger.debug("="*80)
    logger.debug(fileCtx)
    logger.debug("="*80)
    return fileCtx, file_context_generator.promptTokens, file_context_generator.completionTokens
