import os
import pandas as pd
from scipy.io import arff
from os.path import join
import numpy as np
import time
import re


def get_type(type):
    if type == "numeric":
        return "real"


def write_arff_file(arff_file_name, data, meta, output_path, labels):
    new_file = open(join(output_path, arff_file_name), 'w+')
    new_file.write("@relation {0}\n".format(meta.name))
    for i in range(len(meta.names())):
        attr = meta.names()[i]
        if attr != "class":
            new_file.write("@attribute {0} {1}\n".format(attr, get_type(meta.types()[i])))
        else:
            new_file.write("@attribute {0} {{".format(attr))
            sorted_labels = sorted(map(int, labels))
            new_file.write("{0}".format(",".join(map(str, sorted_labels))))
            new_file.write("}\n")
    new_file.write("@data\n")
    for _, row in data.iterrows():
        new_file.write(",".join(map(str, row)) + "\n")


def read_params(file_path):
    if os.path.isfile(file_path):
        data = pd.read_csv(file_path)
    else:
        data = []

    return data


def read_lines(file_path):
    if os.path.isfile(file_path):
        data = open(file_path, 'r')
        data = np.array(data.read().splitlines())
    else:
        data = []

    return data


def read_results(results_file):
    results = open(results_file, 'r')
    results = results.readlines()
    # first line of results contains the number of found
    # clusters and the dimension of the data
    first_line = results[0].split("\t")
    found_clusters = int(first_line[0])
    dim = int(first_line[1])

    # finding found clusters id
    # data_n_winner is a tuple data id and winner id,
    # typical of .results files.
    data_n_winner = []

    if found_clusters + 1 < len(results):
        results = pd.read_csv(results_file, sep="\t", skiprows=found_clusters + 1, header=None)
        results = results.iloc[:, :2]
        data_n_winner = results.values.tolist()

    return data_n_winner, found_clusters, dim


def read_header(files, folder, header_rows=5, save_parameters=True):
    datasets = []
    folds = []
    headers = []
    metrics = []

    for file in files:
        if ".csv" in file:
            header = pd.read_csv(join(folder, file), nrows=header_rows, header=None)
            header = header.transpose()
            header = header.rename(columns=header.iloc[0])
            header = header.drop([0])
            header = header.dropna(axis=0, how='any')
            header = header.astype(np.float64)

            headers.append(header)

            if len(datasets) <= 0:
                results = pd.read_csv(join(folder, file), skiprows=header_rows + 1, header=None)

                datasets = results.iloc[0]

                first_param = None
                if 'som_in' in datasets.values:
                    first_param = "som_in"
                elif 'lr_cnn' in datasets.values:
                    first_param = "lr_cnn"
                elif 'n_max' in datasets.values:
                    first_param = "n_max"
                elif 'at' in datasets.values:
                    first_param = "at"

                if first_param is None:
                    datasets = datasets[1:]
                else:
                    datasets = datasets[1: datasets[datasets == first_param].index[0]]
                    if save_parameters:
                        save_params_file(results, first_param, folder)

                datasets_fold = []
                folds = []
                metrics = []
                for dataset in datasets:
                    search_folds = re.search("(_x\d_k\d)", dataset)
                    if search_folds is not None:
                        folds.append(os.path.splitext(dataset[search_folds.start(0):])[0])
                        datasets_fold.append(os.path.splitext(dataset[: search_folds.start(0)])[0])
                    else:
                        datasets_fold.append(os.path.splitext(dataset)[0])

                    search_metrics = dataset.split(".")
                    if len(search_metrics) > 1:
                        metrics.append(search_metrics[1])

    metrics = np.array(metrics)
    _, metrics_idx = np.unique(metrics, return_index=True)
    metrics = metrics[np.sort(metrics_idx)]
    return np.unique(datasets_fold), max(1,int(len(np.unique(folds)))), headers, metrics


def read_params_and_results(file_name, rows=5):
    results = pd.read_csv(file_name, skiprows=rows + 1, header=None)

    first_param_idx = results.iloc[0]

    if 'som_in' in first_param_idx.values:
        first_param_idx = first_param_idx[first_param_idx == "som_in"].index[0]
    elif 'lr_cnn' in first_param_idx.values:
        first_param_idx = first_param_idx[first_param_idx == "lr_cnn"].index[0]
    elif 'n_max' in first_param_idx.values:
        first_param_idx = first_param_idx[first_param_idx == "n_max"].index[0]
    elif 'at' in first_param_idx.values:
        first_param_idx = first_param_idx[first_param_idx == "at"].index[0]
    else:
        first_param_idx = None

    if first_param_idx is not None:
        params = results.drop(results.columns[range(first_param_idx)], axis=1)
        params = params.rename(columns=params.iloc[0])
        params = params.drop([0])
        params = params.astype(np.float64)

        results = results.drop(results.columns[range(first_param_idx, len(results.columns))], axis=1)
        results = results.drop(results.columns[0], axis=1)
        results = results.rename(columns=results.iloc[0])
        results = results.drop([0])
    else:
        params = None
        results = None

    return params, results


def save_params_file(results, starting_param_name, filename):
    parameters = results.rename(columns=results.iloc[0])
    parameters = parameters.drop([0])
    parameters = parameters.astype(np.float64)
    parameters = parameters.iloc[:, parameters.columns.get_loc(starting_param_name):]

    min_row = parameters.min(0)
    max_row = parameters.max(0)
    min_max = pd.DataFrame([list(min_row), list(max_row)], columns=parameters.columns, index=['min', 'max'])

    full_data = pd.concat([min_max, parameters])

    if filename.endswith("/"):
        filename = filename[:-1]

    full_data.to_csv(join(filename, "parameters-" + filename.split("/")[-1] + ".csv"), sep=',', index=True)


def adjust_start_stop_limits(max, start, stop):
    if stop <= 0 or stop > max:
        stop = max

    if stop <= start:
        start = 0
        stop = max

    return start, stop


def parameters_start_stop(parameters, start, stop):
    max = len(parameters)
    start, stop = adjust_start_stop_limits(max, start, stop)

    parameters = parameters.iloc[start:stop, :]

    return parameters


def get_data_targets(path, file, target_idx=None):
    if file.endswith(".arff"):
        data, _ = arff.loadarff(open(join(path, file), 'r'))
        targets = data['class'] if target_idx is None else data[target_idx]
    else:
        data = pd.read_csv(join(path, file), header=None)
        if target_idx is None:
            targets = data.iloc[:, -1].values.astype('int16')
        else:
            targets = data.ix[:, target_idx].values.astype('int16')

    return targets


def create_folders(path):
    if not os.path.exists(os.path.dirname(path)):
        os.makedirs(os.path.dirname(path))


class Timer(object):
    
    def __init__(self):
        self.reset()

    def reset(self):
        self.interval = 0
        self.time = time.time()

    def value(self):
        return time.time() - self.time
        
    def tic(self):
        self.time = time.time()
        
    def toc(self):
        self.interval = time.time() - self.time
        self.time = time.time()
        return self.interval


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = float(self.sum) / self.count
