
import numpy as np

import os

from other_algo import run_experiment
from FJR import run_FJR
from appr_FJR import run_appr_FJR
import os
import pandas
import requests
import sklearn.cluster
from scipy.cluster.hierarchy import fcluster
from sklearn.cluster import KMeans
from sklearn.preprocessing import scale
from sklearn_extra.cluster import KMedoids
from statistics import mean
import sys
from scipy.spatial.distance import pdist, squareform
from FGC import run_FGC
from core import run_core
from measures import within_cluster
from measures import kmeans_objective
from measures import kmedoids_objective
import math
import random

import sys

#It gets as input the number of clusters
#Run the program as following: python3 experiments.py k##




data_set = 'Iris'  # possible choices: 'Adult', 'Iris', 'Diabetes'
dist_function = 'euclidean'  

print(data_set)



x = sys.argv[1]
x=int(x)

print(f"Argument 1: {x}")




if data_set == 'Iris':
    file_path = 'Iris.txt'
    data = []
    with open(file_path, 'r') as file:
        next(file)  
        for line in file:
            row = line.strip().split(',')
            converted_row = [float(value) for value in row[1:-1]]
            data.append(converted_row)
    data=np.array(data)
    n = data.shape[0] 
   
    distance_vector = pdist(data, metric=dist_function)
    distance_matrix= squareform(distance_vector)    
    r=0
    for k in range(x,x+1):
        print(k)
        results=[]
        gl_it=1
        GC_results=np.zeros((gl_it,7))
        GC=run_FGC(n,k,distance_matrix)
        GC_results[r,0]=k  
        GC_results[r,1]=run_appr_FJR(n, k, distance_matrix, GC)  
        GC_results[r,2]=run_FJR(n, k, distance_matrix, GC,GC_results[r,1])  
        GC_results[r,3]=run_core(n, k, distance_matrix, GC,GC_results[r,1])
        GC_results[r,4]=within_cluster(distance_vector, GC) 
        GC_results[r,5]=kmeans_objective(distance_vector, GC) 
        GC_results[r,6]=kmedoids_objective(distance_vector, GC) 
        it=20
        temp_means=np.zeros((it,7))
        temp_medoids=np.zeros((it,7))
        
        for l in range(it):

            k_means_clustering, k_medoids_clustering=  run_experiment(data,k, distance_matrix,dist_function)  
            temp_means[l,0]+=k
            temp_means[l,1]+=run_appr_FJR(n, k, distance_matrix, k_means_clustering)  
            temp_means[l,2]+=run_FJR(n, k, distance_matrix, k_means_clustering,temp_means[l,1])  
            temp_means[l,3]+=run_core(n,k, distance_matrix, k_means_clustering,temp_means[l,1])
            temp_means[l,4]+=within_cluster( distance_vector, k_means_clustering) 
            temp_means[l,5]+=kmeans_objective( distance_vector, k_means_clustering) 
            temp_means[l,6]+=kmedoids_objective( distance_vector, k_means_clustering) 

            temp_medoids[l,0]+=k
            temp_medoids[l,1]+=run_appr_FJR(n, k, distance_matrix, k_medoids_clustering)  
            temp_medoids[l,2]+=run_FJR(n, k, distance_matrix, k_medoids_clustering,  temp_medoids[l,1])  
            temp_medoids[l,3]+=run_core(n,k,distance_matrix, k_medoids_clustering,temp_medoids[l,1])
            temp_medoids[l,4]+=within_cluster( distance_vector, k_medoids_clustering) 
            temp_medoids[l,5]+=kmeans_objective( distance_vector, k_medoids_clustering) 
            temp_medoids[l,6]+=kmedoids_objective( distance_vector, k_medoids_clustering) 
   
    
        temp=(GC_results.mean(axis=0)  )
        results.append(temp)
        temp=1.96*GC_results.std(axis=0)/math.sqrt(gl_it)
        results.append(temp)


        temp=(temp_means.mean(axis=0)  )
        results.append(temp)
        temp=1.96*temp_means.std(axis=0)/math.sqrt(it)
        results.append(temp)
        

        temp=(temp_medoids.mean(axis=0)  )
        results.append(temp)
        temp=1.96*temp_medoids.std(axis=0)/math.sqrt(it)
        results.append(temp)
    
        np.savetxt('k='+str(k)+'-'+data_set+'max.csv', results, delimiter=',', fmt='%f')

if data_set == 'Diabetes' or data_set == 'Adults':
    if data_set == 'Diabetes':
        file_path = 'diabetes.txt'
        data = []
        with open(file_path, 'r') as file:
            next(file)  
            for line in file:
                row = line.strip().split(',')
                converted_row = [float(value) for value in row]
                data.append(converted_row)
        data_all=np.array(data)
      
    if data_set == 'Adults':
        df = pandas.read_csv('adult.data', sep=',', header=None)
   # for k in range(25,26):                   
    #for k in range(21,26):           
    #for k in range(17,21):          
    #for k in range(13,17):        
    #for k in range(9,13):          
   # for k in range(5,9):
    for k in range(x,x+1):
        results=[]
        print(k)
        n=100
        gl_it=40
        GC_results=np.zeros((gl_it,7))
        means_results=np.zeros((gl_it,7))
        medoids_results=np.zeros((gl_it,7))
        for r in range(gl_it):
            print(r)
            
            if data_set == 'Adults':
                numbers = list(range(0, 32561))
                weights = df.iloc[:,2].values
               
    # Generate n weighted random choices
                generated_numbers = random.choices(numbers, weights=weights, k=n)
                data_new=np.zeros((n,6))
                for i in range(n):
                    row = df.iloc[generated_numbers[i],[0,4,9,10,11,12]]
                    data_new[i,0]=row.iloc[0] #age
                    data_new[i,1]=row.iloc[1] #education_num
                    data_new[i,3]=row.iloc[3] #capital gain
                    data_new[i,4]=row.iloc[4] #capital gain 
                    data_new[i,5]=row.iloc[5] #works-per-hour
                    gender = row.iloc[2].strip().lower()  # Strip whitespace and convert to lowercase
                    if gender=='male':
                        data_new[i,2]=1

                data=data_new   
                data = scale(data)
                
            if data_set == 'Diabetes':
                random_numbers = [random.randint(0, 767) for _ in range(n)]      
                data=data_all[random_numbers,:]
                data = scale(data)
            distance_vector = pdist(data, metric=dist_function)
            distance_matrix= squareform(distance_vector)    
               
            GC=run_FGC(n,k,distance_matrix)
            GC_results[r,0]=k  
            GC_results[r,1]=run_appr_FJR(n, k, distance_matrix, GC)  
            GC_results[r,2]=run_FJR(n, k, distance_matrix, GC,GC_results[r,1])  
            GC_results[r,3]=run_core(n, k, distance_matrix, GC,  GC_results[r,1])
            GC_results[r,4]=within_cluster(distance_vector, GC) 
            GC_results[r,5]=kmeans_objective(distance_vector, GC) 
            GC_results[r,6]=kmedoids_objective(distance_vector, GC) 

            temp_means=np.zeros(7)
            temp_medoids=np.zeros(7)
            it=20
            for l in range(it):

                k_means_clustering, k_medoids_clustering=  run_experiment(data,k, distance_matrix,dist_function)  
                temp_means[0]+=k
                temp_means[1]+=run_appr_FJR(n, k, distance_matrix, k_means_clustering)  
                temp_means[2]+=run_FJR(n, k, distance_matrix, k_means_clustering,temp_means[1])  
                temp_means[3]+=run_core(n,k, distance_matrix, k_means_clustering,temp_means[1])
                temp_means[4]+=within_cluster( distance_vector, k_means_clustering) 
                temp_means[5]+=kmeans_objective( distance_vector, k_means_clustering) 
                temp_means[6]+=kmedoids_objective( distance_vector, k_means_clustering) 

                temp_medoids[0]+=k
                temp_medoids[1]+=run_appr_FJR(n, k, distance_matrix, k_medoids_clustering)  
                temp_medoids[2]+=run_FJR(n, k, distance_matrix, k_medoids_clustering,  temp_medoids[1])  
                temp_medoids[3]+=run_core(n,k,distance_matrix, k_medoids_clustering, temp_medoids[1])
                temp_medoids[4]+=within_cluster( distance_vector, k_medoids_clustering) 
                temp_medoids[5]+=kmeans_objective( distance_vector, k_medoids_clustering) 
                temp_medoids[6]+=kmedoids_objective( distance_vector, k_medoids_clustering) 
            for i in range(7):
                means_results[r,i]=temp_means[i]/it
                medoids_results[r,i]=temp_medoids[i]/it

        temp=(GC_results.mean(axis=0)  )
        results.append(temp)
        temp=1.96*GC_results.std(axis=0)/math.sqrt(gl_it)
        results.append(temp)


        temp=(means_results.mean(axis=0)  )
        results.append(temp)
        temp=1.96*means_results.std(axis=0)/math.sqrt(gl_it)
        results.append(temp)
        

        temp=(medoids_results.mean(axis=0)  )
        results.append(temp)
        temp=1.96*medoids_results.std(axis=0)/math.sqrt(gl_it)
        results.append(temp)

        np.savetxt('k='+str(k)+'-'+data_set+'.csv', results, delimiter=',', fmt='%f')




