import os
import csv
import json


def read_csv(fp):
    """
    Convert label to int
    """
    with open(fp) as f:
        reader = csv.DictReader(f)
        data = []
        for item in reader:
            if "label" in item:
                item["label"] = int(item["label"])
            data.append(item)
    return data


def load_de(dataset_name):
    """
    Load differential expression dataset
    """
    fp = f"data/{dataset_name}-de.csv"
    if not os.path.exists(fp):
        options = [name.split("-")[0] for name in os.listdir("data")
                    if "de.csv" in name]
        raise Exception(f"Invalid dataset: {dataset_name}. Options: {options}")
    # organize by split
    data = {"train": [], "test": []}
    for item in read_csv(fp):
        data[item["split"]].append(item)
        del item["split"]
    return data


def load_dir(dataset_name):
    """
    Load direction of change dataset
    """
    fp = f"data/{dataset_name}-dir.csv"
    if not os.path.exists(fp):
        options = [name.split("-")[0] for name in os.listdir("data")
                    if "dir.csv" in name]
        raise Exception(f"Invalid dataset: {dataset_name}. Options: {options}")
    # organize by split
    data = {"train": [], "test": []}
    for item in read_csv(fp):
        data[item["split"]].append(item)
        del item["split"]
    return data


def load_gse(dataset_name, skip_empty=True):
    """
    Load direction of change dataset
    """
    fp = f"data/k562_{dataset_name}-gse.json"
    if not os.path.exists(fp):
        options = ["pert", "gene"]
        raise Exception(f"Invalid dataset: {dataset_name}. Options: {options}")
    with open(fp) as f:
        data = json.load(f)
    if skip_empty:
        data = [x for x in data if x["label"] is not None]
    return data


