



def evaluate(cfg, model):
    """High-level test function"""

    logger.info("Starting evaluation")

    # Compute metrics
    test_metrics = eval_dci_scores(cfg, model, partition=cfg.eval.eval_partition)
    test_metrics.update(eval_enco_graph(cfg, model, partition=cfg.eval.eval_partition))
    test_metrics.update(eval_implicit_graph(cfg, model, partition=cfg.eval.eval_partition))
    test_metrics.update(eval_test_metrics(cfg, model))

    # Log results
    for key, val in test_metrics.items():
        mlflow.log_metric(f"eval.{key}", val)

    # Print DCI disentanglement score
    logger.info(
        f"Final evaluation: causal disentanglement = {test_metrics['causal_disentanglement']:.2f}"
    )

    # Store results in csv file
    # Pandas does not like scalar values, have to be iterables
    test_metrics_ = {key: [val] for key, val in test_metrics.items()}
    df = pd.DataFrame.from_dict(test_metrics_)
    df.to_csv(Path(cfg.general.exp_dir) / "metrics" / "test_metrics.csv")

    return test_metrics