import pandas as pd

from SEPAL import SEPAL_DIR, TARGETS


def main():
    yago4_to_wd = pd.read_parquet(
        SEPAL_DIR / "datasets/knowledge_graphs/yago4/link_to_wikidata.parquet"
    )

    yago45_to_wd = pd.read_parquet(
        SEPAL_DIR / "datasets/knowledge_graphs/yago4.5/link_to_wikidata.parquet"
    )
    yago45_bw_to_wd = pd.read_parquet(
        SEPAL_DIR / "datasets/knowledge_graphs/yago4.5/link_to_wikidata_BW.parquet"
    )
    yago45_to_wd = pd.concat([yago45_to_wd, yago45_bw_to_wd])

    for target_path in TARGETS.keys():
        print(f"Processing {TARGETS[target_path]}")
        # Load target
        target = pd.read_parquet(SEPAL_DIR / target_path)
        # Merge target and yago4_to_wd on 'yago4_col_to_embed' and 'Yago_entity'
        target = target.merge(
            yago4_to_wd,
            left_on="yago4_col_to_embed",
            right_on="Yago_entity",
            how="left",
        )
        # Drop 'Yago_entity' column
        target.drop(columns="Yago_entity", inplace=True)
        # Merge target and yago45_to_wd on 'Wikidata_entity'
        target = target.merge(yago45_to_wd, on="Wikidata_entity", how="left")
        # Rename 'Yago_entity' to 'yago4.5_col_to_embed'
        target.rename(columns={"Yago_entity": "yago4.5_col_to_embed"}, inplace=True)
        # Drop 'Wikidata_entity' column
        target.drop(columns="Wikidata_entity", inplace=True)
        # Save target
        target.to_parquet(SEPAL_DIR / target_path)


if __name__ == "__main__":
    main()
