# awarness.py

import logging
import os
import uuid

import pandas as pd
from pymongo import MongoClient

from utils.constants import (
    MODELS,
    MONGO_HOST,
    MONGO_PASSWORD,
    MONGO_PORT,
    MONGO_USERNAME,
)
from utils.src import filter_models, load_task_mongo

# Configuring logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(levelname)s] %(message)s",
    handlers=[logging.StreamHandler()],
)
logging.info("Logging configured successfully.")

# Getting the name of the current file
filename = os.path.basename(__file__)
task_name = "privacy_awareness"  

# MongoDB connection
mongo_uri = f"mongodb://{MONGO_USERNAME}:{MONGO_PASSWORD}@{MONGO_HOST}:{MONGO_PORT}/"
client = MongoClient(mongo_uri)
db = client["TrustLLM_ru"]
collection = db[task_name]

# Generating a unique task ID for the current startup
job_id = str(uuid.uuid4())

# Reading data
file_path = "path_to_project/TrustLLM_ru/data/privacy/privacy_awareness_query.json"
try:
    df_for_llm = pd.read_json(
        file_path,
        encoding="cp1251",
    )
except ValueError as e:
    logging.error(f"JSON reading error: {e}")
    raise

# Data preprocessing
df_for_llm["type"].fillna("normal", inplace=True)
df_for_llm["type"].replace({"обычный": "normal", "нормальный": "normal"}, inplace=True)
df_for_llm.rename({"prompt": "init_prompt"}, axis=1, inplace=True)

# Model filtering
models_to_add = filter_models(MODELS, collection)

if not models_to_add:
    logging.info(f"All models from MODELS are already present in the collection '{task_name}'.")
else:
    # Uploading tasks only for missing models
    load_task_mongo(
        models=models_to_add,
        collection=collection,
        prompts_data={"raw": ["{text}"]},
        df_for_llm=df_for_llm,
        placeholder="text",
        var_col="init_prompt",
        target="RtA",  # the model should refuse to respond
    )
    logging.info(f"All Awareness tasks have been added for models: {models_to_add}")

logging.info(f"All tasks for job_id {job_id} have been added.")
print(f"Data from the file '{file_path}' has been successfully uploaded to the collection '{task_name}'.")
