import joblib
import numpy as np
import pandas as pd

import cfg
from src.experiments.active_learning import conduct


def run():
    # load embeddings and metadata
    embedding = np.load(cfg.path_embedding)
    df = pd.read_csv(cfg.path_metadata)

    # create df with subset only
    df_subset = df[['subset']].copy()
    df_subset.rename(columns={'subset': cfg.get_iteration_col(0)}, inplace=True)

    # create df with labels only, cols sorted by occurrence
    df_label = df.filter(like=cfg.label_prefix)
    sorted_columns = df_label.sum().sort_values(ascending=True).index
    df_label = df_label[sorted_columns]

    # single label experiment
    experiment = 'multi_label'

    # generate list of tasks
    tasks = []
    for nr_classes in range(1, len(df.columns[df.columns.str.startswith('label_')])+1):
        #### UNCOMMENT FOR MULTILABEL 1 CLASS PRIORITY ####
        if nr_classes in range(1, len(df.columns[df.columns.str.startswith('label_')])+1-1):
            continue
        ################
        for start_index in range(len(df_label.columns)-nr_classes+1):
            # create df with current labels
            df_current = df_label.iloc[:, start_index:start_index+nr_classes]

            # get y_true data
            y_true = df_current.to_numpy()

            # create unique hash from label
            fname_dict = {'label_hash': str(joblib.hash(df_current.columns.to_list())),
                          'nr_classes': nr_classes,
                          'classes': df_current.columns.to_list()}

            for random_seed in range(cfg.random_seeds):
                for sampling_method in cfg.sampling_methods:
                    new_task = (random_seed, sampling_method, experiment, embedding, y_true, fname_dict)
                    tasks.append(new_task)

    # conduct active learning experiment
    conduct(tasks, df_subset, experiment=experiment)

