import json
import os


def load_info_metadata(info_path: str) -> dict:
    with open(info_path, "r") as file:
        return json.load(file)


def load_dataset_metadata(
    dataset_metadata_path: str, dataset_metadata_key: str = None
) -> dict:
    with open(dataset_metadata_path, "r") as file:
        dataset_metadata = json.load(file)
    if dataset_metadata_key is not None:
        dataset_metadata = dataset_metadata[dataset_metadata_key]
    return dataset_metadata


def get_dataset_description(dataset_metadata_path: str, qid: int) -> str:
    """
    Generate a human-readable description of the dataset based on its metadata.

    Args:
        dataset_metadata_path: Path to the dataset metadata JSON file

    Returns:
        str: Formatted description of the dataset
        int: qid of query
    """

    metadata = load_dataset_metadata(dataset_metadata_path)
    description = []

    # Add header
    description.append("##### QUESTION #####")
    # Add dataset query (hard coded first query for now)
    description.append(f"\nQuestion: {metadata['queries'][0][qid]['question']}\n")

    # Add header
    description.append("##### DATASET DESCRIPTION #####")
    # Add dataset info
    description.append("\n### DATASETS: ###\n")
    for dataset in metadata["datasets"]:
        description.append(f"Dataset Name: {dataset['name']}")
        description.append(f"Dataset Description: {dataset['description']}")
        description.append("\n### COLUMNS: ###")
        for col in dataset["columns"]["raw"]:
            description.append(f"\n{col['name']}:")
            description.append(f"  {col['description']}")

    return "\n".join(description)


def get_datasets_fpaths(dataset_metadata: str) -> list:
    # Read the json, loop through "datasets" key, then extract dataset path from "name" key
    with open(dataset_metadata, "r") as file:
        obj = json.load(file)

    metadata_parent_path = os.path.dirname(dataset_metadata)

    paths = []
    for d in obj.get("datasets", []):
        paths.append(os.path.join(metadata_parent_path, d["name"]))

    return paths


# def get_load_dataset_experiment(dataset_paths):
#     # Set up the initial experiment to load the dataset
#     load_dataset_objective = "Load the dataset and generate summary statistics. "
#     load_dataset_steps = f"1. Load the dataset(s) at {[os.path.basename(dp) for dp in dataset_paths]}.\n2. Generate summary statistics for the dataset(s)."
#     load_dataset_deliverables = (
#         "1. Dataset(s) loaded.\n2. Summary statistics generated."
#     )
#     if args.run_eda:
#         load_dataset_steps += "\n3. Perform some exploratory data analysis (EDA) on the dataset(s) to get a better understanding of the data."
#         load_dataset_deliverables += "\n3. Exploratory data analysis (EDA) performed."
#     if args.dataset_metadata_type == "blade":
#         load_dataset_objective += f"Here is the dataset metadata:\n\n{get_blade_description(args.dataset_metadata)}"
#     else:  # DiscoveryBench-style
#         load_dataset_objective += f"Here is the dataset metadata:\n\n{get_dataset_description(args.dataset_metadata)}"
#     load_dataset_experiment = {
#         "hypothesis": None,
#         "experiment_plan": {
#             "objective": load_dataset_objective,
#             "steps": load_dataset_steps,
#             "deliverables": load_dataset_deliverables,
#         },
#     }
#     return load_dataset_experiment
