import argparse
import logging
from pathlib import Path

from kge.preprocessing.manager import PreprocessingManager


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Run ID mapping for multiple datasets")
    parser.add_argument(
        "--datasets",
        type=str,
        nargs="+",
        required=True,
        help="Names of datasets to process",
    )
    parser.add_argument(
        "--data-folder",
        type=Path,
        required=False,
        default=Path("data", "src"),
        help="Base directory containing dataset folders",
    )
    parser.add_argument(
        "--output-folder",
        type=Path,
        required=False,
        default=Path("data", "processed"),
        help="Base directory to save the ID-mapped data files",
    )
    return parser.parse_args()


def main() -> None:
    # Set up logging
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
    )
    logging.getLogger().setLevel(logging.INFO)

    args = parse_args()

    preprocessor = PreprocessingManager()

    for dataset_name in args.datasets:
        try:
            logging.info(f"\nProcessing dataset: {dataset_name}")
            dataset_path = args.data_folder / dataset_name
            output_path = args.output_folder / dataset_name

            # Create output directory
            output_path.mkdir(parents=True, exist_ok=True)

            # Run ID mapping
            logging.info(f"Creating ID mappings in {output_path}")
            preprocessor.create_mappings(dataset_path, output_path)

            logging.info(f"Completed processing {dataset_name}")
        except Exception as e:
            logging.exception(f"Error processing dataset {dataset_name}: {e!s}")
            continue

    logging.info("\nAll datasets processed!")


if __name__ == "__main__":
    main()
