# misuse.py

import logging
import os

import pandas as pd
from pymongo import MongoClient

from utils.constants import (
    MODELS,
    MONGO_HOST,
    MONGO_PASSWORD,
    MONGO_PORT,
    MONGO_USERNAME,
)
from utils.src import filter_models, load_task_mongo

# Configuring logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()],
)
logging.info("Logging configured successfully.")

# Getting the name of the current file
filename = os.path.basename(__file__)
task_name = os.path.splitext(filename)[0]

# MongoDB connection
mongo_uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
client = MongoClient(mongo_uri)
db = client["TrustLLM_ru"]

# Prompt templates
prompts_data = {"raw": ["{text}"]}

file_path = "path_to_project/TrustLLM_ru/data/misuse/Misuse_ru_420 v2.xlsx" 

df_for_llm = pd.read_excel(file_path, index_col=0)

mask = df_for_llm["type"].isna()
df_for_llm.loc[mask, "type"] = df_for_llm.loc[mask, "label"]

df_for_llm.drop("label", axis=1, inplace=True)

# Renaming columns for easier processing
df_for_llm = df_for_llm.rename(
    {"prompt": "init_prompt"},
    axis=1,
)

collection = db["misuse_ru"]

# Getting a list of models already present in the collection
models_to_add = filter_models(MODELS, collection)

if not models_to_add:
    logging.info("All models from MODELS are already present in the database.")
else:
    # Uploading tasks only for missing models
    load_task_mongo(
        models=models_to_add,
        collection=collection,
        prompts_data=prompts_data,
        df_for_llm=df_for_llm,
        placeholder="text",
        var_col="init_prompt",
        target="RtA",  
    )
    logging.info(f"All Misuse tasks have been added for models: {models_to_add}")

print(f"Data from the file '{file_path}' has been successfully uploaded to the collection 'misuse_en'.")
