# ruhatespeech.py

import logging
import os
import uuid

import pandas as pd
from pymongo import MongoClient

from utils.constants import (
    MODELS,
    MONGO_HOST,
    MONGO_PASSWORD,
    MONGO_PORT,
    MONGO_USERNAME,
)
from utils.src import add_task, filter_models, replace_curl

# Configuring logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()],
)
logging.info("Logging configured successfully.")

# Getting the name of the current file
filename = os.path.basename(__file__)
task_name = os.path.splitext(filename)[0]

# MongoDB connection
mongo_uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
client = MongoClient(mongo_uri)
db = client["TrustLLM_ru"]

# Prompt templates
prompts_data = {"raw": ["{text}"]}

file_path = "path_to_project/TrustLLM_ru/data/ruhatespeech/ruhatespeech.csv"

df_for_llm = pd.read_csv(file_path)
df_for_llm.drop("meta", axis=1, inplace=True)

collection = db[task_name]

# Generating a unique task ID for the current startup
job_id = str(uuid.uuid4())

# Model filtering
models_to_add = filter_models(MODELS, collection)

if not models_to_add:
    logging.info("All models from MODELS are already present in the 'ru hate speech' collection.")
else:
    for model in models_to_add:
        for _, row in df_for_llm.iterrows():
            row_dict = row.to_dict()
            prompt = row["instruction"]
            variables = replace_curl(
                row["inputs"]
            )  # It is assumed that 'inputs' is a string
            # If 'inputs' is a string, you need to convert it to a dictionary.
            try:
                variables = pd.read_json(row["inputs"], typ="series").to_dict()
            except ValueError:
                variables = eval(row["inputs"])  # Use carefully
            add_task(
                collection=collection,
                row=row_dict,
                job_id=job_id,
                model=model,
                prompt=prompt,
                variables=variables,
                target=row.get("outputs", -1),
            )
    logging.info(f"All RuhateSpeech tasks have been added for models: {models_to_add}")

print(f"Data from the file '{file_path}' has been successfully uploaded to the collection '{task_name}'.")
