import os
import pandas as pd
from config_base import *

def evaluate_recommendation():
    # Load alias mapping: protein ID ↔ preferred name
    alias_df = pd.read_csv(ALIAS_V11, sep="\t")
    id_to_name = dict(zip(alias_df["protein_external_id"], alias_df["preferred_name"]))

    # Load rediscovery summary
    summary_df = pd.read_csv(os.path.join(OUTPUT_DIR, "rediscovery_summary.tsv"), sep="\t")

    # Result container
    results = []

    # Loop over each protein1
    for _, row in summary_df.iterrows():
        p1 = row["protein1"]
        if row["# rediscovered"] == 0 or row["p2_in_v11"] == 0:
            continue

        rediscovered = row["rediscovered_proteins"]
        if pd.isna(rediscovered) or rediscovered.strip() == "":
            continue

        rediscovered_list = [p.strip() for p in rediscovered.split(",")]

        sim_file = os.path.join(OUTPUT_DIR, p1, "top_similars.tsv")
        if not os.path.exists(sim_file):
            print(f"Missing file: {sim_file}")
            continue

        df = pd.read_csv(sim_file, sep="\t")
        df["similar_protein_name"] = df["similar_protein"].map(id_to_name)

        # For each rediscovered protein
        for rediscovered_protein in rediscovered_list:
            best_rank = None
            best_score = None
            best_partner = None

            for kp in df["known_partner"].unique():
                kp_df = df[df["known_partner"] == kp].copy()
                kp_df = kp_df.sort_values("similarity_score", ascending=False).reset_index(drop=True)
                kp_df["rank"] = kp_df.index + 1

                match = kp_df[kp_df["similar_protein_name"] == rediscovered_protein]
                if not match.empty:
                    rank = int(match.iloc[0]["rank"])
                    score = float(match.iloc[0]["similarity_score"])

                    if best_rank is None or rank < best_rank:
                        best_rank = rank
                        best_score = score
                        best_partner = kp

            if best_rank is not None:
                results.append({
                    "protein1": p1,
                    "rediscovered_protein": rediscovered_protein,
                    "best_rank": best_rank,
                    "best_known_partner": best_partner,
                    "similarity_score": best_score
                })

    # Save final result
    out_df = pd.DataFrame(results)
    out_df.to_csv(RANK_FILE, sep="\t", index=False)
    print(f"Saved detailed ranks to: {RANK_FILE}")

if __name__ == "__main__":
    evaluate_recommendation()
    print("Recommendation evaluation completed.")