import openai
import json
import os
import pandas as pd
import warnings
from tqdm import tqdm
from pprint import pprint
import pandas as pd
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
    retry_if_exception_type
)

warnings.filterwarnings('ignore')

openai.api_key = ''
fine_tuned_model_id = ''

system_message = "You are a helpful ocean assistant. You are to extract the question from each of the answer provided."


def create_user_message(row):
    return f"""Answer: {row}\n\nQuestion: """


@retry(
    retry=retry_if_exception_type((openai.error.APIError, openai.error.APIConnectionError, openai.error.RateLimitError,
                                   openai.error.ServiceUnavailableError, openai.error.Timeout)),
    wait=wait_random_exponential(multiplier=1, min=1, max=5),
    stop=stop_after_attempt(5)
)
def chat_completion_with_backoff(**kwargs):
    return openai.ChatCompletion.create(**kwargs)


with open('../literature_data.txt', 'r') as f:
    lines = f.readlines()

print(len(lines))
en_raw_output = []
en_input = []
en_output = []

index = 0

for line in tqdm(lines):
    q = ('You are a marine expert, please check if following sentences contains rich ocean related information.'
         'If so, output "related". Otherwise, just output "unrelated". Sentences:')
    q += line
    response = chat_completion_with_backoff(
        model="gpt-3.5-turbo", messages=[
            {"role": "system", "content": "marine expert"},
            {"role": "user", "content": q}
        ], temperature=0, max_tokens=200
    )
    tmp = response.get("choices")[0]["message"]["content"]

    if 'unrelated' in tmp.lower():
        print('unrelated')
        print('==================')
        continue

    q = 'Summarize the following sentences and output them in an concise way.\nSentence:'
    q += line
    response = chat_completion_with_backoff(
        model="gpt-3.5-turbo", messages=[
            {"role": "system", "content": "professional writer"},
            {"role": "user", "content": q}
        ], temperature=0, max_tokens=200
    )
    new_line = response.get("choices")[0]["message"]["content"]

    test_messages = []
    test_messages.append({"role": "system", "content": system_message})
    user_message = create_user_message(new_line)
    test_messages.append({"role": "user", "content": create_user_message(new_line)})

    response = chat_completion_with_backoff(
        model=fine_tuned_model_id, messages=test_messages, temperature=0, max_tokens=50
    )
    res = response["choices"][0]["message"]["content"]

    en_raw_output.append(line)
    en_output.append(new_line)
    en_input.append(res)

    if (index != 0) and (index % 1000 == 0):
        df = pd.DataFrame()
        df['en_raw_output'] = en_raw_output
        df['en_output'] = en_output
        df['en_input'] = en_input

        df.to_csv('../literature_tmp.csv', index=False)

    index += 1

df = pd.DataFrame()
df['en_raw_output'] = en_raw_output
df['en_output'] = en_output
df['en_input'] = en_input

df.to_csv('../literature.csv', index=False)
