from openai import OpenAI
from dotenv import load_dotenv
import os
from utils.utils import read_jsonl, write_jsonl
import tqdm


load_dotenv(override=True)

def rephrase_file(file_name, repeat_num=3, verbose=False):
    client = OpenAI()
    prompt = """Please rephrase the following problem with creativity and boldness according to these guidelines:
    - Preserve the abstract essence of the problem while maintaining clarity.
    - Provide only the rephrased sentence as the final output, without any additional context or explanation.
    The original sentence is:

    """
    openai_organization = os.getenv('OPENAI_ORG_ID')
    openai_api_key = os.getenv('OPENAI_API_KEY')
    # Make API call
    client = OpenAI(
                organization=openai_organization,
                api_key=openai_api_key
                )

    data = read_jsonl(file_name)

    new_data = []

    for row in tqdm.tqdm(data):
        question = row['question']
        if verbose:
            print('original question is :', question)
        response = client.chat.completions.create(
            model= 'gpt-4o-2024-11-20',
            messages = [
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": prompt + question}
                    ],
            n=repeat_num,
        )

        for i in range(repeat_num):
            rephrased_sentence = response.choices[i].message.content
            if verbose:
                print(f'rephrased sentence {i} is :', rephrased_sentence)
            row['question'] = rephrased_sentence
            row['rephrased_id'] = i
            new_data.append(row)
    
    output_path = file_name.replace('.jsonl', f'_rep_{repeat_num}.jsonl')
    write_jsonl(new_data, output_path)

if __name__ == '__main__':
    # Define the prompt
    # file_name = 'ft_data/llama3.1-8b/train/data_multi-label_lvl_345.jsonl'
    file_name = 'ft_data/llama3.1-8b/train/data_multi-label.jsonl'
    rephrase_file(file_name, verbose=False)
