import sys
import os
import json
import time
from random import sample

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

from utils.set_random_seed import set_random_seed
from module_02_feature.prompt_template import extract_short_answer_user_prompt, extract_short_answer_user_prompt_2
from utils.model_loader import VLLM_Model
from reader.vllm_reader import vllm_reader_batch

def locate_short_answer(json_list, model, tokenizer, params):

    set_random_seed(42)
    
    query_list = []
    system_prompt_list = []

    for i in range(len(json_list)):
        question = json_list[i]["question"]
        answer = json_list[i]["answer"]
        query = extract_short_answer_user_prompt_2.format(question=question, answer=answer)
        query_list.append(query)
        system_prompt_list.append("no system prompt")
    
    output_list = vllm_reader_batch(model, tokenizer, params, query_list, system_prompt_list, batch_size=100)
    for i, short_answer in enumerate(output_list):

        json_list[i]["short_answer"] = short_answer[0].upper()+short_answer[1:]
        json_list[i]["short_answer_score"] = 0
        json_list[i]["short_answer_sentence"] = 0
        json_list[i]["short_answer_position"] = 0

    return json_list

if __name__ == '__main__':
    jsonl_input = "dataset/natural_question/natural_question_02_all.jsonl"
    jsonl_output = "dataset/natural_question/natural_question_02_locate.jsonl"
    locate_short_answer(jsonl_input, jsonl_output)
