import os
import sys
import json
import pandas as pd
import numpy as np
import openai
import argparse
from tqdm import tqdm

# openai.api_key = ''

question1 = 'Classify the following sentence into the given categories: He worked hard to achieve his goal'#\n\nExplain your answer.' #Additionally rank your confidence in your response on a scale from 1-100, 1 being least confident and 100 being most confident.'
question2 = 'Create a for-loop for the following task. Calculate the sum of the numbers from 1 to 10.'#\n\nExplain your answer.' #Additionally rank your confidence in your response on a scale from 1-100, 1 being least confident and 100 being most confident.'
question3 = 'Categorize the words "whale," "dolphin," "shark," "clam," and "crab" according to their biological classification.'#\n\nExplain your answer. Additionally rank your confidence in your response on a scale from 1-100, 1 being least confident and 100 being most confident.'
question4 = 'Explain what a business plan is'
GPT4_SAMPLE = ['The sentence "He worked hard to achieve his goal" can be classified as a positive, declarative sentence that reflects the theme of determination or perseverance.\n\nExplanation:\n1. This sentence is declarative because it presents a clear, straightforward statement or fact. It is stating that the subject \'He\' has worked hard in pursuit of a particular goal.\n2. The sentence is positive because it conveys a constructive action i.e., working hard to achieve a goal, which usually has positive connotations.\n3. The sentence reflects determination or perseverance because it clearly shows the subject\'s effort and commitment toward achieving a desired outcome or goal.',
 "Code:\n\nsum = 0\nfor i in range(1, 11):\n    sum += i\nprint(sum)\n\nExplanation:\n\nThe code starts by initializing a variable `sum` to 0. It then loops through the range of numbers from 1 to 10 using a for-loop and adds each number to the `sum` variable. Finally, it prints the value of `sum` which would be the sum of all the numbers from 1 to 10.",
 '1. Whale - Mammals: Whales are part of the Cetacean family, a group of marine animals that also includes dolphins and porpoises. They are mammals, meaning they bear live young, nurse their offspring, and are warm-blooded among other characteristics.\n\n2. Dolphin - Mammals: Like whales, dolphins are also mammals and part of the Cetacean family. They share similar traits with mammals, such as being warm-blooded, having lungs, and nursing their young.\n\n3. Shark - Fish: Sharks belong to a group of fish known as elasmobranchs, which also include rays and skates. Unlike mammals, they are cold-blooded and have gills for breathing underwater.\n\n4. Clam - Mollusks: Clams belong to the Mollusca phylum. They are invertebrates with a hard, protective shell.\n\n5. Crab - Crustaceans: Crabs are crustaceans, a type of arthropod. They have a hard exoskeleton and jointed limbs.\n\nConfidence level: 100 — These are commonly known classifications for these animals based on widely accepted biological and zoological taxonomies.',
 'A business plan is a comprehensive, written document that describes the nature of a business, the sales and marketing strategy, and the financial background, and contains a projected profit and loss statement. It is a crucial tool to guide the establishment, growth, and management of a business. It outlines a business\'s goals and provides detailed plans and budgets showing how the goals can be realized. A business plan is often required by investors and lenders to assess the viability and potential profitability of a business before committing financial resources.']
print(len(GPT4_SAMPLE))
SYSTEM_MESSAGE = f"""
You are a helpful assistant whose job is to rewrite and summarize responses to a given question so that they fit the style of GPT-4 responses in the examples below.

Question: '{question1}'
GPT-4 Response: '{GPT4_SAMPLE[0]}'

Question: '{question2}'
GPT-4 Response: '{GPT4_SAMPLE[1]}'

Question: '{question3}'
GPT-4 Response: '{GPT4_SAMPLE[2]}'

Question: '{question4}'
GPT-4 Response: '{GPT4_SAMPLE[3]}

Your job is to summarize and rewrite responses to new questions in the style of GPT-4 responses above while retaining important information to answer the question."""

SYSTEM_PROMPT = {
    'role': 'system',
    'content': SYSTEM_MESSAGE
}


def clean_data(file_path):
    with open(f'{file_path}', 'r') as f:
        data = json.load(f)

    USER_MESSAGE = """
    Here is a response from an agent to the following question.

    Question: '{question}'
    Agent Response: '{response}'

    Can you condense the response so it only contains relevant information while still answering the question? Give your answer as "Response: "
    """

    new_generated_description = {}
    for key, value in tqdm(data.items()):
        model_response = data[key][0]
        question = model_response[0]['content']
        response = model_response[1]['content']
        USER_MESSAGE = f"""
        Here is a response from an agent to the following question.

        Question: '{question}'
        Agent Response: '{response}'

        Can you rewrite the response so it only contains relevant information for answering the question? Keep all code and tables and formatting. Remove all references to other agents. Give your answer as "Response: "
        """
        prompt = {
            'role': 'user',
            'content': USER_MESSAGE
        }
        messages = [SYSTEM_PROMPT, prompt]
        completion = openai.ChatCompletion.create(
            model = 'gpt-3.5-turbo-0301',
            messages = messages,
            n = 1
        )
        sum_response = completion['choices'][0]['message']['content']
        sum_response = sum_response.replace('Response: ', '')
        new_assistant_message = {
            'role': 'assistant',
            'content': sum_response
        }
        new_messages = [[model_response[0], new_assistant_message]]
        new_generated_description[key] = new_messages

    with open(f'{file_path}', 'w') as f:
        json.dump(new_generated_description, f)

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--file', action = 'store', required = True)
    args = parser.parse_args()
    clean_data(args.file)