import argparse
import glob
import os
import soundfile as sf

import numpy as np
from tqdm import tqdm

from g_lfcc import linear_fbank, extract_lfcc
from utils import utils

MODES = [
    "train",
    "dev",
    "eval"
]

LABEL_DICT = {
        "real": 0,
        "parallel_wavegan": 1,
        "hifigan": 2,
        "mb_melgan": 3,
        "style_melgan": 4
    }


def annotate(in_dir, manifest_save_dir, mode, cpmanif=False, label=0):
    os.makedirs(manifest_save_dir, exist_ok=True)
    fid = open(os.path.join(manifest_save_dir, f"{mode}_wav.txt"), 'a')
    if os.path.exists(os.path.join(in_dir, "manifest.txt")):
        manif_path = os.path.join(in_dir, "manifest.txt")
        print(f"Found a manifest file @ {in_dir}!")
        wav_list = [line.strip() for line in open(manif_path, "r")]
        for entry in wav_list:
            fp = entry.replace("\\", "/")
            fid.write(f"{fp}\t{label}\n")
    elif os.path.exists(os.path.join(in_dir, "wav.scp")):
        scp_path = os.path.join(in_dir, "wav.scp")
        print(f"Found a wav.scp file @ {in_dir}; that works too!")
        wav_list = [line.strip().split(" ")[1] for line in open(scp_path, "r")]
        for entry in wav_list:
            fp = entry.replace("\\", "/")
            fid.write(f"{fp}\t{label}\n")
    else:
        print(f"No manifest found @ {in_dir}; traversing the directory...")
        in_dir = os.path.abspath(in_dir).replace("\\", "/")
        glob_pattern = f"{in_dir}/*.wav"
        for fn in glob.glob(glob_pattern):
            fp = fn.replace("\\", "/")
            fid.write(f"{fp}\t{label}\n")

    fid.close()


def FE_pipeline(feature_list, feats_save_to, manifest_save_to, mode, overwrite=False):
    create_root = os.path.join(feats_save_to, mode)
    os.makedirs(create_root, exist_ok=True)

    fid = open(os.path.join(manifest_save_to, f"{mode}_feats.txt"), 'w')

    for i, row in tqdm(enumerate(feature_list), total=len(feature_list)):
        filepath = row.split('\t')[0]
        model_id = row.split('\t')[1]
        model_name = filepath.split('/')[-3]
        if model_name.find("si") != -1:
            model_name = "real"
        filename = filepath.split('/')[-1]
        create_folders = os.path.join(create_root, model_name)
        if not os.path.exists(create_folders):
            os.makedirs(create_folders)
        dest_filepath = os.path.join(create_folders, f"{filename[:-4]}-{model_name}.npy")
        if (not os.path.exists(dest_filepath)) or overwrite:
            try:
                extract_feats = utils.feature_extraction(filepath)
            except IOError:
                print('!!!error' + filepath)
                continue
            np.save(dest_filepath, extract_feats)
        fid.write(f"{dest_filepath}\t{model_id}\n")

    fid.close()


def main():
    parser = argparse.ArgumentParser(
        description="Preprocessing"
    )
    parser.add_argument(
        "--mode",
        default="extract",
        type=str,
        help="execution mode (=['annotate', 'extract'])",
    )
    parser.add_argument(
        "--rootdir",
        default="",
        type=str,
        help="root directory of the dataset or the directory containing the manifest"
    )
    parser.add_argument(
        "--outdir",
        default="meta/exp3",
        type=str,
        help="output directory"
    )
    args = parser.parse_args()
    assert (args.mode is not None), "You need to specify a mode"

    if args.mode == "annotate":
        for mode in MODES:
            for subdir in LABEL_DICT.keys():
                print(f"Annotating {mode} files for {subdir}...")
                annotate(os.path.join(args.rootdir, mode, subdir), args.outdir, mode, label=LABEL_DICT[subdir])
    elif args.mode == "extract":
        for mode in MODES:
            manif_path = os.path.join(args.rootdir, f"{mode}_wav.txt")
            wav_list = [line.strip() for line in open(manif_path, "r")]
            FE_pipeline(wav_list, "dump/exp", args.outdir, mode)


if __name__ == '__main__':
    main()
