from pandas import pd
from config import *

def prepare_v11_named():
    print("Loading STRING v11 interactions and aliases...")

    # Load mapping ID → name
    aliases = pd.read_csv(ALIAS_V11, sep="\t")
    id_to_name = dict(zip(aliases["protein_external_id"], aliases["preferred_name"]))

    # Load v11 interactions
    v11 = pd.read_csv(V11_FILE, sep="\t", names=["protein1", "protein2", "label"], header=0)

    # Map to names
    v11["protein1_name"] = v11["protein1"].map(id_to_name)
    v11["protein2_name"] = v11["protein2"].map(id_to_name)

    # Remove rows with missing names 
    v11 = v11.dropna(subset=["protein1_name", "protein2_name"])
    print(f"Mapped {len(v11)} interactions with valid names.")

    # Save output
    out_path = f"{OUTPUT_DIR}/v11_named.tsv"
    v11.to_csv(out_path, sep="\t", index=False)
    print(f"[prepare_v11_named] Saved: {out_path}")
