import os
import json
import jsonlines
import argparse
from typing import List, Dict
from collections import defaultdict

def process_id(id_str: str) -> str:
    '''
        Function that processes the id string to remove the prefix
    '''
    id_str = id_str.split('-')[:2]
    return '-'.join(id_str)

def request_index_mapper(
    checklist_request: List[Dict[str, str]]
) -> Dict[str, Dict[str, str]]:
    '''
        Process the checklist request and maps the id to the sample id and the checklist id
    '''
    request_index = {}
    for request_element in checklist_request:

        # getting the id and parsing
        request_id = request_element['id']
        request_id_list = request_id.split('-')
        sample_number = request_id_list[1]
        checklist_number = request_id_list[-1]
        checklist_key = request_element['checklist_key']

        # creating an new entry
        request_index[request_id] = {
            'sample_id': '{}-{}'.format(request_id_list[0], sample_number),
            'checklist_id': '{}'.format(checklist_number),
            'checklist_key': checklist_key
        }

    return request_index

def parse_checklist(
    checklist_output: List[Dict[str, str]],
    request_index: Dict[str, Dict[str, str]]
) -> Dict[str, Dict[str, str]]:
    '''
        Parse the checklist output and maps the id to the checklist
    '''
    
    # creating the final output
    parsed_output = defaultdict(lambda: {'checklist': {}})

    # iterating over the checklist_output
    for output_element in checklist_output:

        # getting the id and parsing
        output_id = output_element['id']
        output_string = output_element['output']
        sample_id = request_index[output_id]['sample_id']
        checklist_key = request_index[output_id]['checklist_key']

        # creating the new entry
        parsed_output[sample_id]['checklist'][checklist_key] = output_string

    return parsed_output



def process_checklist_output(
    checklist_output_file: str,
    checklist_request_file: str,
    parsed_output: str
) -> None:
    '''
        Saves the output as a json file where each id is mapped to a checklist
    '''

    # opening the checklist output file using jsonlines
    with jsonlines.open(checklist_output_file, 'r') as reader:
        checklist_output = []
        for item in reader:
            checklist_output.append(item)

    # opening the checklist request file using jsonlines
    with jsonlines.open(checklist_request_file, 'r') as reader:
        checklist_request = []
        for item in reader:
            checklist_request.append(item)

    # creating the request index
    request_index = request_index_mapper(checklist_request)
    
    # parsing the checklist output
    parsed_output_object = parse_checklist(checklist_output, request_index)

    # saving the parsed output as a json file
    os.makedirs(os.path.dirname(parsed_output), exist_ok=True)
    with open(parsed_output, 'w') as f:
        json.dump(parsed_output_object, f, indent=4)
    

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='For parsing the checklist output from the model')
    parser.add_argument('--checklist_output_file', type=str, help='The output generated by the checklist extractor from the request generated by checklist_extraction_request.py')
    parser.add_argument('--checklist_request_file', type=str, help='The path to the checklist extraction request file generated by checklist_extraction_request.py')
    parser.add_argument('--parsed_output', type=str, help='The path where the parsed output will be saved which stores a dictionary for each sample id with the checklist items as keys')
    args = parser.parse_args()

    # processing the checklist output
    process_checklist_output(
        checklist_output_file=args.checklist_output_file,
        checklist_request_file=args.checklist_request_file,
        parsed_output=args.parsed_output
    )