import argparse
import ast

from impugen.utils.data import create_dataset_config


def main() -> None:
    """
    Main entry point for generating a YAML-like config file for a dataset.

    This script parses command-line arguments to determine the dataset name, paths
    for the training/test/holdout CSVs, and any column-specific instructions (e.g.,
    which columns to treat as categorical). It then calls `create_dataset_config`
    to build and write a configuration file, which can be used for subsequent
    tasks within the impugen framework.

    Example usage:
        python script_name.py \\
            --name MyDataset \\
            --train_path /path/to/train.csv \\
            --test_path /path/to/test.csv \\
            --holdout_path /path/to/holdout.csv \\
            --target_column target \\
            --categorical_columns "['cat_col1','cat_col2']" \\
            --output_path /path/to/output.yaml \\
            --auto
    """
    parser = argparse.ArgumentParser(
        description="Generate a YAML config file for a new dataset."
    )
    parser.add_argument(
        "--name",
        required=True,
        help="Name of the new dataset."
    )
    parser.add_argument(
        "--target_column",
        required=False,
        default=None,
        help="Name of the target column. Defaults to the last column of the training CSV."
    )
    parser.add_argument(
        "--categorical_columns",
        required=False,
        default=[],
        type=ast.literal_eval,
        help="List of columns to treat as categorical (passed as a Python list)."
    )
    parser.add_argument(
        "--train_path",
        required=True,
        help="Path to the training dataset CSV file."
    )
    parser.add_argument(
        "--test_path",
        required=False,
        default=None,
        help="Path to the test dataset CSV file (optional)."
    )
    parser.add_argument(
        "--holdout_path",
        required=False,
        default=None,
        help="Path to the holdout dataset CSV file for DPIMIA (optional)."
    )
    parser.add_argument(
        "--output_path",
        required=True,
        help="Output path for the YAML config file."
    )
    parser.add_argument(
        "--auto",
        action='store_true',
        help="Enable automatic dtype configuration (detect 'numerical' columns with low cardinality as categorical)."
    )

    args = parser.parse_args()

    create_dataset_config(
        save_path=args.output_path,
        name=args.name,
        train_csv_path=args.train_path,
        test_csv_path=args.test_path,  # correct arg name here
        holdout_csv_path=args.holdout_path,
        target_column=args.target_column,
        categorical_columns=args.categorical_columns,
        auto=args.auto
    )


if __name__ == "__main__":
    main()
