#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os  
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  
import argparse  
import json  
from openai import AzureOpenAI  
from tqdm import tqdm
import pdb
import re
# Assuming this script is saved in a package and the import below refers to another module within the same package    
from concurrent.futures import ThreadPoolExecutor, as_completed  
import sys
from model_api_call import get_chat_response_azure
from code_utils import create_message
# from tabmwp_prompt_combine.prompt.tabmwp_baseline import answer_prompt_freetext_fair, answer_prompt_multichoice_fair

def wrap_up_baseline_tabmwp_answer_prompt(data, answer_prompt_freetext_fair, answer_prompt_multichoice_fair):
    exec_result = data.get('execution', '')
    question_type = data.get('ques_type')
    question = data.get('question')
    ans_type = data.get('ans_type', '')
    choices = data.get('choices')
    
    if question_type == "free_text":
        prompt = answer_prompt_freetext_fair.replace('[[question]]', question)
        prompt = prompt.replace('[[executed_results]]', exec_result)
        prompt = prompt.replace('[[ans_type]]', ans_type)
    elif question_type == "multi_choice":
        prompt = answer_prompt_multichoice_fair.replace('[[question]]', question)
        prompt = prompt.replace('[[executed_results]]', exec_result)
        prompt = prompt.replace('[[multi_choices]]', str(choices))
    data['answer_prompt'] = prompt
    # pdb.set_trace()
    return data

def wrap_up_baseline_wikitq_answer_prompt(data,answer_prompt_fair):
    exec_result = data.get('execution', '')
    question = data.get('question')
    
    
    prompt = answer_prompt_fair.replace('[[question]]', question)
    prompt = prompt.replace('[[executed_results]]', exec_result)
    data['answer_prompt'] = prompt
    # pdb.set_trace()
    return data
    
def answer_question(prompt_path, result_path, client, model, prompt_template, temperature=0.0, max_tokens=60, top_p=1, frequency_penalty=0, presence_penalty=0, stop=None, max_retries=10, num_threads=5, dataset_name="wikitq"):    
    """      
    Load prompts from a .jsonl file, get responses using Azure OpenAI in parallel, and save the results in another .jsonl file with progress monitoring.      
    This function keeps all original items from the input JSON lines and adds the response, along with indexing each processed line for better tracking.      
    """   
    if dataset_name == "tabmwp":   
        answer_prompt_freetext_fair, answer_prompt_multichoice_fair = prompt_template
    
    if dataset_name == "wikitq":   
        answer_prompt_fair = prompt_template
       
    with open(prompt_path, 'r', encoding='utf-8') as infile:    
        lines = [json.loads(line) for line in infile]
        if dataset_name == "tabmwp":
            dataset = [wrap_up_baseline_tabmwp_answer_prompt(data, answer_prompt_freetext_fair, answer_prompt_multichoice_fair) for data in lines]
        if dataset_name == "wikitq":
            dataset = [wrap_up_baseline_wikitq_answer_prompt(data, answer_prompt_fair) for data in lines]
    
    
    def process_line(data, idx):    
        prompt_string = data.get('answer_prompt')
        messages = create_message(prompt_string=prompt_string)
        
        try:  
            response = get_chat_response_azure(client=client, model=model, messages=messages, temperature=temperature,    
                                            max_tokens=max_tokens, top_p=top_p, frequency_penalty=frequency_penalty,    
                                            presence_penalty=presence_penalty, stop=stop, max_retries=max_retries)
            
            data['result'] = response
        
        
        except Exception as e:  
            print(f"Error processing prompt: {str(e)}")  
            data['result'] = 'error'
        
        # pdb.set_trace()
        print(f"================ response #{idx}: ================\n")    
        print(prompt_string)
        print(response)
        print(f"================ finish response #{idx} ================\n")  
            
        return data    
  
    with ThreadPoolExecutor(max_workers=num_threads) as executor:    
        # We use enumerate to get an index (idx) and pass it along with each line to process_line  
        tasks = [(line, idx) for idx, line in enumerate(lines)]  
        results = list(tqdm(executor.map(lambda p: process_line(*p), tasks), total=len(lines), desc="running"))    
        
    with open(result_path, 'w', encoding='utf-8') as outfile:    
        for data in results:    
            outfile.write(json.dumps(data, ensure_ascii=False) + '\n') 
  
def inference():  
    parser = argparse.ArgumentParser(description='Call OpenAI API with specified parameters and configurations.')  
    parser.add_argument('--deployment_name', type=str, required=True, help='Model name to use for the API call.')  
    parser.add_argument('--temperature', type=float, default=0.0, help='Temperature for the response. Default is 0.0.')  
    parser.add_argument('--max_tokens', type=int, default=60, help='Maximum number of tokens to generate. Default is 60.')  
    parser.add_argument('--top_p', type=float, default=1, help='Top P value. Default is 1.')  
    parser.add_argument('--frequency_penalty', type=float, default=0, help='Frequency penalty. Default is 0.')  
    parser.add_argument('--presence_penalty', type=float, default=0, help='Presence penalty. Default is 0.')  
    parser.add_argument('--stop', nargs='*', help='Stop sequence(s). Multiple values are allowed.')  
    parser.add_argument('--api_key', type=str, required=True, help='OpenAI API key.')  
    parser.add_argument('--api_base', type=str, default="https://api.openai.com", help='OpenAI API base URL. Default is the standard OpenAI API.')  
    parser.add_argument('--api_version', type=str, default="v1", help='OpenAI API version. Default is "v1".')  
    parser.add_argument('--api_type', type=str, default="azure", help='OpenAI API Type. Default is "Azure"')  
    parser.add_argument('--prompt_path', type=str, required=True, help='Path to the input .jsonl file containing prompts.')  
    parser.add_argument('--result_path', type=str, required=True, help='Path where the output .jsonl file with results will be saved.')  
    parser.add_argument('--num_threads', type=int, required=False, help='if your API could be run in parallel')
    parser.add_argument('--dataset', type=str, required=True, help='name of dataset')
      
    args = parser.parse_args()  
    if args.dataset == "tabmwp":
        from prompts.tabmwp_prompt_combine.new.tabmwp_baseline import answer_prompt_freetext_fair as prompt_template_freetext
        from prompts.tabmwp_prompt_combine.new.tabmwp_baseline import answer_prompt_multichoice_fair as prompt_template_multichoice
        
        prompt_template = (prompt_template_freetext, prompt_template_multichoice)
    
    if args.dataset == "wikitq":
        from prompts.wikitq_prompt_combine.prompt.wikitq_all import answer_prompt_fair_gpt_35 as prompt_template

 
    client = AzureOpenAI(  
        api_key=args.api_key,  
        api_version=args.api_version,  
        base_url=f"{args.api_base}/openai/deployments/{args.deployment_name}") 
    
    answer_question(args.prompt_path, args.result_path, client, model=args.deployment_name, prompt_template=prompt_template, temperature=args.temperature, max_tokens=args.max_tokens,  
                            top_p=args.top_p, frequency_penalty=args.frequency_penalty, presence_penalty=args.presence_penalty,   
                            stop=args.stop, max_retries=60, dataset_name=args.dataset)  
  
if __name__ == "__main__":  
    inference()  
