import os
import pandas as pd

data_dir = "../../data/kitsune+network+attack+dataset/"
folders = os.listdir(data_dir)

normal_split_kitsune = 1000000

save_normal = True
if save_normal:
    save_path = os.path.join(data_dir, "train_normal_all.csv")
else:
    save_path = os.path.join(data_dir, "test_all.csv")


if __name__ == "__main__":
    exists_ok = False
    if os.path.exists(save_path) and not exists_ok:
        raise Exception("save path exists, you may not want to overwrite / append to it")
    # data_list = []
    print(folders)
    i = 0
    for att in folders:
        if "." in att:
            continue
        i += 1
        print("folder", i)
        if i == 4:
            # skip Mirai because it is a different network
            continue

        att_dir = data_dir + att
        att_title = " ".join(att.split("_")).title()
        dataset_folder = att_dir + "/" + att_title + "_dataset.csv/"
        x_folder = dataset_folder + os.listdir(dataset_folder)[0]
        y_folder = att_dir + "/" + att_title + "_labels.csv/" + att_title + "_labels.csv"

        # data = get_df(x_folder, columns=None, drop=False)
        if save_normal:
            df = pd.read_csv(x_folder, header=None, nrows=normal_split_kitsune)
            # df = data.iloc[:normal_split_kitsune]
            df["attack"] = 0
        else:
            df = pd.read_csv(x_folder, header=None, skiprows=normal_split_kitsune)
            # labels = pd.read_csv(y_folder, header=None, nrows=3)
            # print(labels.iloc[:, -1])
            # print(type(labels))
            # try:
            labels = pd.read_csv(y_folder, skiprows=normal_split_kitsune).iloc[:, -1]
            # except:
            #     with open(y_folder, "r") as f:
            #         labels = f.read().splitlines()[normal_split_kitsune:]
                # labels = pd.read_csv(y_folder, header=None).iloc[normal_split_kitsune:]
            df["attack"] = labels * i

        # append data to current save path
        df.to_csv(save_path, index=False, mode="a", header=False)
        # data_list.append(df)

    # df_save = pd.concat(data_list, ignore_index=True)
    # df_save.to_csv(save_path, index=False)

    print("Done, Num of Folders Used:", i)

