#用于验证提交格式

import pandas as pd
from pathlib import Path
from mlebench.registry import registry, Competition

def read_csv(*args, **kwargs) -> pd.DataFrame:
    """Reads a CSV file and returns a DataFrame with custom default kwargs."""
    try:
        new_default_kwargs = {"float_precision": "round_trip"}
        new_kwargs = {**new_default_kwargs, **kwargs}
        return pd.read_csv(*args, **new_kwargs)
    except pd.errors.EmptyDataError:
        return pd.DataFrame()

def is_dataset_prepared(competition: Competition, grading_only: bool = False) -> bool:
    """Checks if the competition has non-empty `public` and `private` directories with the expected files."""
    public = competition.public_dir
    private = competition.private_dir

    if not grading_only:
        if not public.is_dir():
            print("Public directory does not exist.")
            return False
        if not any(public.iterdir()):
            print("Public directory is empty.")
            return False

    if not private.is_dir():
        print("Private directory does not exist.")
        return False
    if not any(private.iterdir()):
        print("Private directory is empty.")
        return False

    return True

def validate_submission(submission: Path, competition: Competition) -> tuple[bool, str]:
    """
    Validates a submission for the given competition by actually running the competition grader.
    """
    if not submission.is_file():
        return False, f"Submission invalid! Submission file {submission} does not exist."

    if not submission.suffix.lower() == ".csv":
        return False, "Submission invalid! Submission file must be a CSV file."

    if not is_dataset_prepared(competition, grading_only=True):
        raise ValueError(
            f"Dataset for competition `{competition.id}` is not prepared! "
            f"Please run `mlebench prepare -c {competition.id}` to prepare the dataset."
        )

    try:
        # Note: competition.answers is a Path to the answers file
        competition.grader.grade_fn(read_csv(submission), read_csv(competition.answers))
    except Exception as e:
        return (
            False,
            f"Submission invalid! The following error occurred when running the competition grader:\n{e}",
        )

    return True, "Submission is valid."

if __name__ == "__main__":
    import sys
    submission_file = Path("experiment/mlebench/competitions")
    if len(sys.argv) < 2:
        print("Usage: python extracted_validator.py <competition_id>")
        sys.exit(1)

    competition_id = sys.argv[1]
    submission_file = submission_file / str(competition_id) / "submission.csv"

    try:
        competition = registry.get_competition(competition_id)
        is_valid, message = validate_submission(submission_file, competition)
        print(f"Valid: {is_valid}")
        print(f"Message: {message}")
    except Exception as e:
        print(f"Error: {e}")
