#!/usr/bin/env python3
"""
Small script to explore the MBPP dataset using the MBPPAdapter and show split information.
"""

from dataset_adapters import MBPPAdapter
from datasets import load_dataset


def explore_original_splits():
    """Explore the original MBPP splits to see what task IDs are in each."""
    print("=== ORIGINAL MBPP SPLITS ===")

    original_splits = ["train", "validation", "prompt", "test"]

    for split_name in original_splits:
        print(f"\n--- {split_name.upper()} SPLIT ---")
        try:
            dataset = load_dataset(
                "google-research-datasets/mbpp", "full", split=split_name
            )
            task_ids = [example["task_id"] for example in dataset]
            print(f"Count: {len(task_ids)}")
            print(f"Task IDs: {sorted(task_ids)}")
        except Exception as e:
            print(f"Error loading {split_name}: {e}")


def explore_adapter_splits():
    """Explore the splits as seen through the MBPPAdapter."""
    print("\n\n=== MBPP ADAPTER SPLITS ===")

    adapter = MBPPAdapter()
    adapter_splits = ["train", "valid", "test"]

    for split_name in adapter_splits:
        print(f"\n--- ADAPTER {split_name.upper()} SPLIT ---")
        try:
            dataset = adapter.load_dataset(split_name)
            task_ids = [adapter.extract_problem_name(example) for example in dataset]
            print(f"Count: {len(task_ids)}")
            print(f"Task IDs: {sorted(task_ids)}")

            if len(dataset) > 0:
                example = dataset[0]
                print(f"\nFirst example:")
                print(f"  Task ID: {adapter.extract_problem_name(example)}")
                print(
                    f"  Problem: {adapter.extract_problem_description(example)[:100]}..."
                )

        except Exception as e:
            print(f"Error loading adapter {split_name}: {e}")


def main():
    print("Exploring MBPP dataset splits and task IDs...\n")
    explore_adapter_splits()


if __name__ == "__main__":
    main()
