import argparse
import asyncio
import configparser
import json
import os
import pickle
import random
from itertools import islice

import openai
import pandas as pd
from prompting_utils import (
    fetch_headline_ratings_async,
    fix_json_errors,
    integrate_errors,
    process_headlines,
    save_checkpoint,
)
from tqdm import tqdm

os.chdir("../../../")
from setup_utils import add_api_env

add_api_env()

data_dir = os.path.join(BASE_DIR, "datasets/nyt/data")


def main(years, topics, seed):
    random.seed(seed)
    rated_headlines, sampled_headlines, headlines_dict = {}, {}, {}

    # Read in sample of headlines
    for year in years:
        rated_headlines[year] = {}
        sampled_headlines[year] = {}
        for topic in topics:
            with open(
                os.path.join(data_dir, year, f"{topic}_rated_subset_headlines.json"),
                "r",
            ) as f:
                rated_headlines[year][topic] = json.load(f)
                sampled_headlines[year][topic] = random.sample(
                    rated_headlines[year][topic],
                    min(len(rated_headlines[year][topic]), 50),
                )
                for sample in sampled_headlines[year][topic]:
                    headlines_dict[sample["headline"]] = {
                        "year": sample["year"],
                        "topic": topic,
                    }

    # Generate altered versions of the headlines using GPT-4 concurrently
    output_dict = {}
    error_catcher = []

    rated_headlines = await process_headlines(
        prompt_type="paraphrase",
        inputs=headlines_dict,
        output_dict=output_dict,
        error_catcher=error_catcher,
        model="gpt-4-1106-preview",
        thread_cap=40,
        batch_size=20,
        checkpoint_interval=5,
        output_path=os.path.join(
            data_dir,
            "headline_experiments",
            "paraphrased_headlines_ckpt_2022_2024.json",
        ),
        errors_path=os.path.join(
            data_dir, "headline_experiments", "paraphrased_errors_ckpt_2022_2024.pkl"
        ),
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Process some parameters.")
    parser.add_argument(
        "--start_year",
        type=int,
        help="Start year for range of NYT headlines on which we want to run experiments",
    )
    parser.add_argument(
        "--end_year",
        type=int,
        help="End year for range of NYT headlines on which we want to run experiments",
    )

    parser.add_argument(
        "--topics",
        nargs="+",
        type=str,
        help="List of news desks to pull headlines from",
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="Seed for reproducibility",
    )
    parser.add_argument(
        "--experiment_type",
        type=str,
        help="Type of experiment we're running. Options: paraphrased, untrue, future_hypothetical, fiction '",
    )

    args = parser.parse_args()

    years = range(args.start_year, int(args.end_year + 1))

    main(years, args.topics, args.seed)
