import os
import sys
import openai
import csv
import json
import logging
from tqdm import tqdm
from huggingface_hub import login, hf_hub_download
from tenacity import (
    retry,
    retry_if_not_exception_type,
    stop_after_attempt,
    wait_random_exponential,
    before_sleep_log,
)

ACCESS_TOKEN = "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"
JSONL_FILE = "train.jsonl"
JSONL_URL = "GAIR/lima"
DATA_FILE = "data.csv"

openai.api_key = os.getenv("OPENAI_API_KEY")

logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
logger = logging.getLogger(__name__)


def download(url, file, access_token):
    login(access_token)
    hf_hub_download(repo_id=url, filename=file, repo_type="dataset", local_dir=".")


def save(file, request, response):
    with open(file, "a") as f:
        writer = csv.writer(f)
        writer.writerow([request, response])


@retry(
    retry=retry_if_not_exception_type(
        (openai.error.InvalidRequestError, openai.error.AuthenticationError)
    ),
    reraise=True,
    wait=wait_random_exponential(min=8, max=500),
    stop=stop_after_attempt(30),
    before_sleep=before_sleep_log(logger, logging.DEBUG),
)
def get_response(request):
    response = openai.ChatCompletion.create(
        engine=None,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are an AI assistant that helps people find information."
                ),
            },
            {"role": "user", "content": request},
        ],
        temperature=0.7,
        max_tokens=7000,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=None,
        request_timeout=60,
        model="gpt-4-0613",
    )
    response = response["choices"][0]["message"]["content"]
    return response


if __name__ == "__main__":
    if not os.path.exists(JSONL_FILE):
        download(JSONL_URL, JSONL_FILE, ACCESS_TOKEN)

    save(DATA_FILE, "request", "response")
    with open(JSONL_FILE) as f:
        for line in tqdm(f):
            request = line.strip()
            request = json.loads(request)["conversations"][0]

            try:
                response = get_response(request)
            except openai.error.InvalidRequestError:
                response = "MAXIMUM TOKEN LIMIT"

            save(DATA_FILE, request, response)
