import pandas as pd
from config import *
from utils import *
import os

def main():
    # Load aliases for names 
    aliases_v11 = pd.read_csv(ALIAS_V11, sep="\t")
    id2name_v11 = dict(zip(aliases_v11["protein_external_id"], aliases_v11["preferred_name"]))

    aliases_v12 = pd.read_csv(ALIAS_V12, sep="\t")
    id2name_v12 = dict(zip(aliases_v12["#string_protein_id"], aliases_v12["preferred_name"]))

    # Load v11 and v12
    v11 = pd.read_csv(V11_FILE, sep="\t", usecols=[0, 1], names=["protein1", "protein2"], header=None)
    v12 = pd.read_csv(V12_IS_FILE, sep="\t", usecols=[0, 1], names=["protein1", "protein2"], header=None)

    # Normalize to unordered pairs
    v11_norm = normalize_pairs(v11, "protein1", "protein2")
    v12_norm = normalize_pairs(v12, "protein1", "protein2")

    # Difference
    merged = v12_norm.merge(v11_norm, on=["protein1", "protein2"], how="left", indicator=True)
    novel = merged[merged["_merge"] == "left_only"].drop(columns="_merge")

    # Map to names
    novel["protein1_name"] = novel["protein1"].map(id2name_v12).fillna(novel["protein1"].map(id2name_v11))
    novel["protein2_name"] = novel["protein2"].map(id2name_v12).fillna(novel["protein2"].map(id2name_v11))

    # Save
    out_path = os.path.join(OUTPUT_DIR, "novel_pairs.tsv")
    novel.to_csv(out_path, sep="\t", index=False)
    print(f"Saved novel interactions: {len(novel)} → {out_path}")

if __name__ == "__main__":
    main()