import openai
import json
import re
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import math
from collections import Counter
import time
import argparse

openai.api_key = ''
openai.api_base =''
openai.api_type = ''
openai.api_version = "2023-05-15" 
version = "1.3"
deployment_id = "gpt-4-32k-0613"

def i2p_prompt(caption, dense_caption):
    prompt_prefix_1 = """Generate only an concise, coherent and informative paragraph based on the given information(a,b,c):\n"""  
    prompt_prefix_2 = """\n a. Image Caption: """
    prompt_prefix_3 = """\n b. Dense Caption: """
    prompt_suffix = """\n c. There are some rules:
    1. Show objects and its position.
    2. Use nouns rather than coordinates to show position information of each object.
    3. No more than 3 sentences.
    4. Describe position of each object.
    5. The target is to do image text retrieval with generated caption, so cut down on nonsense and repetitive words. For example, do not tell "as depicted in the caption" or "the dense caption" or "The arrangement of these elements" or "The positioning of these objects".
    6. Do not tell words not associated with pictures , for example "The image showcases various objects in unique positions" is not necessary.
    7. Sometimes the dense caption may be wrong.
    8. The dense caption is in format of caption: position, confidence and position is normalized as [xmin, ymin, xmax, ymax]. Confidence is higher, means the dense caption is more reliable. In general, if the confidence is lower than 0.5, the dense caption is not reliable.
    9. Do not appear numbers in the generated caption.
    10. Do not appear "the image" too often in the generated caption.
    """
    template = f"{prompt_prefix_1}{prompt_prefix_2}{caption}{prompt_prefix_3}{dense_caption}{prompt_suffix}"
    return template


def call_llm_summary(input, history):
    question = i2p_prompt(input[0], input[1])
    # print(question)
    messages = [
    {"role": "user", "content": question},
    ]
    if history:
        for i in range(len(history)):
            messages.append({"role": "system", "content": history[i]})
    response = openai.ChatCompletion.create(
        engine=deployment_id,
        # model="gpt-4",
        # max_tokens=100,
        temperature=0.4, # The output becomes more random. with larger
        messages=messages)
    content = response['choices'][0]['message']['content']
    # content = content.split(' ')[0].strip()
    # print(content)
    return content

def call_llm_summary_without_history(input, history=None, max_retries=4):
    for try_time in range(max_retries):
        try:
            content = call_llm_summary(input, history)
            return content  # return the result immediately if successful
        except Exception as e:
            # print("Exception: ", e)
            if try_time < max_retries - 1:  # don't sleep after the last attempt
                # print("Failed call, sleeping 30s and retrying...")
                time.sleep(30)
    print("Max retries reached. Returning None.")
    return None


def call_llm_w_question_only(question):
    messages = [
    {"role": "user", "content": question},
    ]
    response = openai.ChatCompletion.create(
        engine=deployment_id,
        model="gpt-4",
        max_tokens=100,
        temperature=0., # The output becomes more random. with larger
        messages=messages)
    content = response['choices'][0]['message']['content']
    return content

def main():
    question = """I use Huggingface Trainer, and its original implementation is "     def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
        model.train()
        inputs = self._prepare_inputs(inputs)

        if is_sagemaker_mp_enabled():
            loss_mb = smp_forward_backward(model, inputs, self.args.gradient_accumulation_steps)
            return loss_mb.reduce_mean().detach().to(self.args.device)

        with self.compute_loss_context_manager():
            loss = self.compute_loss(model, inputs)

        if self.args.n_gpu > 1:
            loss = loss.mean()  # mean() to average on multi-gpu parallel training

        if self.do_grad_scaling:
            self.scaler.scale(loss).backward()
        elif self.use_apex:
            with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            self.accelerator.backward(loss)

        return loss.detach() / self.args.gradient_accumulation_steps",
        But now sometimes the loss will be nan or inf. So I give zero loss if this happens and want to skip the backward.
        So I change the code as below:
        "    def training_step(self, model: nn.Module, inputs: Dict[str, Union[torch.Tensor, Any]]) -> torch.Tensor:
        # Compute the loss first to check its value
        model.train()
        inputs = self._prepare_inputs(inputs)
        with self.compute_loss_context_manager():
            loss = self.compute_loss(model, inputs)

        if self.args.n_gpu > 1:
            loss = loss.mean()  # mean() to average on multi-gpu parallel training

        # If the loss is zero, skip the backward pass
        if torch.isclose(loss, torch.tensor(0.0).to(loss.dtype)):
            return loss.detach() 

        # Otherwise, call the original training_step method
        return super().training_step(model, inputs)"
        But in this way, the model will forward two times and cost huge memory. How to solve it?
        """
    content = call_llm_w_question_only(question)
    print(content)

if __name__ == '__main__':
    main()
