import pandas as pd
import zipfile
import numpy as np
from ucimlrepo import fetch_ucirepo 
from sklearn.preprocessing import MinMaxScaler
import traceback

def get_concrete():
    concrete_compressive_strength = fetch_ucirepo(id=165) 

    X = concrete_compressive_strength.data.features 
    
    data = X.loc[1:]
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    np.savetxt(r'datasets/concrete_clusters.csv', data, delimiter=',')
    return
    
def get_banknote():
    banknote_authentication = fetch_ucirepo(id=267) 

    X = banknote_authentication.data.features 
    
    scaler = MinMaxScaler()
    data = scaler.fit_transform(X)
    np.savetxt(r'datasets/banknote_clusters.csv', data, delimiter=',')
    return

def get_airfoil():
    airfoil_self_noise = fetch_ucirepo(id=291) 

    X = airfoil_self_noise.data.features 
    data = X.loc[1:]

    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)
    np.savetxt(r'datasets/airfoil_clusters.csv', data, delimiter=',')
    return

def get_trip_advisor():
    #Getting data from UCI repository
    travel_reviews = fetch_ucirepo(id=484) 
  
    X = travel_reviews.data.features 
    
    data = (X.drop(columns=['User ID'])).loc[1:]
    
    scale = MinMaxScaler()
    matrix = scale.fit_transform(data)

    np.savetxt(r'datasets/tripadvisor_clusters.csv', matrix, delimiter=',')
    return

def get_qsarfish():
    #file downloaded from https://archive.ics.uci.edu/dataset/504/qsar+fish+toxicity
    file_path = r'datasets\new_files\qsar+fish+toxicity.zip'

    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        with zip_ref.open(r'qsar_fish_toxicity.csv') as arquivo_csv:
            df = pd.read_csv(arquivo_csv, sep=';')

            #Dropping target column
            df = df.drop(df.columns[-1], axis=1)

    # Normalizing data between 0 and 1
    scaler = MinMaxScaler()
    data = scaler.fit_transform(df)

    np.savetxt(r'datasets/qsarfish_clusters.csv', data, delimiter=',')
    return


def get_geographicalmusic():

    #file downloaded from https://archive.ics.uci.edu/dataset/315/geographical+original+of+music
    file_path = r'datasets\new_files\geographical+original+of+music.zip'

    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        with zip_ref.open(r'Geographical Original of Music/default_plus_chromatic_features_1059_tracks.txt') as arquivo_csv:
            df = pd.read_csv(arquivo_csv, sep=',')

            #Dropping target columns
            df = df.drop([df.columns[-1],df.columns[-2]], axis=1)

    #Normalizing data between 0 and 1
    scaler = MinMaxScaler()
    data = scaler.fit_transform(df)
    
    np.savetxt(r'datasets/geographicalmusic_clusters.csv', data, delimiter=',')
    return


def main():
    try:
        get_geographicalmusic()
        print('Geographical Music data loaded')
    except Exception as e:
        print('Error in get_geographicalmusic: ', e)

    try:
        get_qsarfish()
        print('Qsarfish data loaded')
    except Exception as e:
        print('Error in get_qsarfish: ', e)

    try:
        get_banknote()
        print('Banknote data loaded')
    except Exception as e:
        print('Error in get_banknote: ', e)

    try:
        get_concrete()
        print('Concrete data loaded')
    except Exception as e:
        print('Error in get_concrete: ', e)
    
    try:
        get_airfoil()
        print('Airfoil data loaded')
    except Exception as e:
        print('Error in get_airfoil: ', e)
    
    try:
        get_trip_advisor()
        print('Trip Advisor data loaded')
    except:
        print('Error in get_trip_advisor: ', e)


if __name__ == '__main__':
    main()