import pandas as pd

from SEPAL import SEPAL_DIR, TARGETS


def main():
    # Load link between Freebase and Yago4
    yago4_to_fb = pd.read_parquet(
        SEPAL_DIR / "datasets/knowledge_graphs/yago4/link_to_freebase.parquet"
    )

    for target_path in TARGETS.keys():
        print(f"Processing {TARGETS[target_path]}")
        # Load target
        target = pd.read_parquet(SEPAL_DIR / target_path)
        # Merge target and yago4_to_fb on 'yago4_col_to_embed' and 'Yago_entity'
        target = target.merge(
            yago4_to_fb,
            left_on="yago4_col_to_embed",
            right_on="Yago_entity",
            how="left",
        )
        # Drop 'Yago_entity' column
        target.drop(columns="Yago_entity", inplace=True)
        # Rename 'Freebase_entity' to 'freebase_col_to_embed'
        target.rename(
            columns={"Freebase_entity": "freebase_col_to_embed"}, inplace=True
        )
        # Save target
        target.to_parquet(SEPAL_DIR / target_path)


if __name__ == "__main__":
    main()
