import glob
import json
import logging

import numpy as np
import pandas as pd
from flask import Flask, jsonify, request
from sentence_transformers import SentenceTransformer

log_format = "%(asctime)s - %(filename)s:%(lineno)d - %(funcName)s - %(levelname)s - %(message)s"
logging.basicConfig(level=logging.INFO, format=log_format, datefmt="%Y-%m-%d %H:%M:%S", force=True)
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

EMB_MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
emb_model: SentenceTransformer = None  # type: ignore


def init_model():
    global emb_model
    emb_model = SentenceTransformer(EMB_MODEL_NAME, model_kwargs={"torch_dtype": "bfloat16"})
    emb_model.encode(["hi"])
    logger.info("Model loaded successfully.")


app = Flask(__name__)


@app.route("/encode", methods=["POST"])
def encode():
    try:
        data = request.get_json()
        texts = data.get("texts", [])

        if len(texts) == 1:
            logger.info(f"Received request text to encode: '{texts[0]}'")
        else:
            logger.info(f"Received request to encode {len(texts)} texts.")

        prompt = data.get("prompt", "")
        if not texts:
            return jsonify({"error": "No texts provided"}), 400

        embeddings = emb_model.encode(texts, prompt=prompt)
        return jsonify({"embeddings": embeddings.tolist()})
    except Exception as e:
        logger.exception(f"Error during encoding: {e}")
        return jsonify({"error": str(e)}), 500


@app.route("/similarity", methods=["POST"])
def similarity():
    try:
        data = request.get_json()
        texts1 = data.get("texts1", [])
        texts2 = data.get("texts2", [])
        logger.info(
            f"Received request to calculate similarity between {len(texts1)} and {len(texts2)} texts."
        )

        if not texts1 or not texts2:
            return jsonify({"error": "Both texts1 and texts2 must be provided"}), 400

        similarities = emb_model.similarity(texts1, texts2)
        return jsonify({"similarities": similarities.tolist()})
    except Exception as e:
        return jsonify({"error": str(e)}), 500


@app.route("/health", methods=["GET"])
def health():
    return jsonify({"status": "healthy"})


def encode_hints_db(db_path: str) -> str:
    global emb_model
    if emb_model is None:
        init_model()
    # Load the hints database
    hint_db = pd.read_csv(db_path)
    uniq_hints = hint_db.drop_duplicates(subset=["hint"], keep="first")
    hints = uniq_hints["hint"].tolist()
    semantic_keys = uniq_hints["semantic_keys"].tolist()
    lines = [f"{k}: {h}" for h, k in zip(hints, semantic_keys)]
    embeddings = emb_model.encode(lines, prompt="task hint")
    emb_dict = np.array([{k: v for k, v in zip(lines, embeddings)}])
    # Save the embeddings to numpy
    embs_path = f"{db_path}.embs.npy"
    embs_metadata_path = f"{db_path}.embs.metadata.json"
    np.save(embs_path, emb_dict)
    with open(embs_metadata_path, "w") as f:
        json.dump({"model": EMB_MODEL_NAME}, f)
    return embs_path


if __name__ == "__main__":
    init_model()
    # db_path = "/Users/<user>/agentlab_results/2025-08-18_18-52-47-applicability_test3_with_hints_gpt-5/hint_db_updated.csv"
    # encode_hints_db(db_path)
    app.run(host="127.0.0.1", port=5000, threaded=True)
    """
    Example usage:

    # Test the encode endpoint

    # Encode texts
    response = requests.post('http://localhost:5000/encode',
                            json={'texts': ['Hello world', 'How are you?']})
    print(response.json())

    # Calculate similarity
    response = requests.post('http://localhost:5000/similarity',
                            json={'texts1': ['Hello world'],
                                  'texts2': ['Hi there']})
    print(response.json())

    # Health check
    response = requests.get('http://localhost:5000/health')
    print(response.json())
    """
