# slava.py

import logging
import os
import uuid

import pandas as pd
from dotenv import load_dotenv
from pymongo import MongoClient

from utils.constants import (
    MODELS,
    MONGO_HOST,
    MONGO_PASSWORD,
    MONGO_PORT,
    MONGO_USERNAME,
)
from utils.src import add_task, filter_models, replace_curl

load_dotenv()
TASK = "SLAVA_only4"

# Configuring logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()],
)
logging.info("Logging configured successfully.")

# MongoDB connection
mongo_uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
client = MongoClient(mongo_uri)
db = client["TrustLLM_ru"]
collection = db[TASK]

# Generating a unique task ID for the current startup
job_id = str(uuid.uuid4())

df_for_llm = pd.read_json(
    "path_to_project/TrustLLM_ru/data/slava/open_questions_data__one_answer.jsonl",
    lines=True,
)

# 1. Replacing a phrase in the 'instruction' column
df_for_llm["instruction"] = df_for_llm["instruction"].str.replace(
    "Выберите один вариант правильного ответа и укажите его номер в ответе.",
    "Укажите в результате цифру правильного ответа. Результат должен содержать только одну цифру. Результат:",
    regex=False,
)

# 2. We leave only the lines with 4 possible answers.
df_for_llm["num_options"] = df_for_llm["inputs"].apply(
    lambda x: sum(1 for v in x["options"].values() if v is not None)
)
df_for_llm = df_for_llm[df_for_llm["num_options"] == 4].reset_index(drop=True)

# Deleting the temporary column 'num_options'
df_for_llm = df_for_llm.drop(columns=["num_options"])

# Model filtering
models_to_add = filter_models(MODELS, collection)

if not models_to_add:
    logging.info("All models from MODELS are already present in the 'SLAVA_only4' collection.")
else:
    for model in models_to_add:
        for _, row in df_for_llm.iterrows():
            row_dict = row.to_dict()
            prompt = row["instruction"]
            variables = {"task": row["inputs"]["task"], "text": row["inputs"]["text"]}
            # We only add non-empty answers.
            options = row["inputs"]["options"]
            option_keys = sorted([k for k in options if options[k] is not None])
            for key in option_keys:
                # Converting the key 'option_1' to 'Option_1'
                variables[key.capitalize()] = options[key]
            add_task(
                collection=collection,
                row=row_dict,
                job_id=job_id,
                model=model,
                prompt=prompt,
                variables=variables,
                target=row.get("outputs", -1),
            )
    logging.info(f"All Slava tasks have been added for models: {models_to_add}")

print(
    f"Data from the file 'path_to_project/Trust LLC_en/data/save/open_questions_date__one_answer.json' successfully uploaded to the collection '{TASK}'."
)
