import numpy as np
from scipy.optimize import minimize
import pandas as pd
import torch
import torchvision
# from torchvision import datasets, transforms
from sklearn.metrics.pairwise import cosine_similarity
import csv
import argparse
from sklearn.datasets import fetch_openml

parser = argparse.ArgumentParser(description='IncGLOC')

parser.add_argument('--l1', type=float, default=0.01)
parser.add_argument('--l2', type=float, default=10)
parser.add_argument('--eps', type=float, default=1)
parser.add_argument('--k', type=int, default=5)


args = parser.parse_args()

def compute_neighbors(features, k):
    num_samples, _ = features.shape
    reshaped_features = features.reshape(num_samples, -1)


    cosine_similarities = cosine_similarity(reshaped_features)


    k_nearest_indices = np.argsort(cosine_similarities, axis=1)[:, :-k-1:-1]
    return cosine_similarities, k_nearest_indices

def map_to_original_indices(neighbors_indices, original_indices):

    mapped_indices = original_indices[neighbors_indices]
    return mapped_indices

def calculate_neighbor_difference(neighbors1, neighbors2):
    set_neighbors1 = [set(neighbors1[i]) for i in range(neighbors1.shape[0])]
    set_neighbors2 = [set(neighbors2[i]) for i in range(neighbors2.shape[0])]
    
    difference_per_sample = [len(set_neighbors1[i] - set_neighbors2[i]) for i in range(len(set_neighbors1))]
    
    return np.array(difference_per_sample)



def objective(values_added,values_array, S,  eps_per_sample, eps_mean,  neighbors_indices_new, l1, l2):
    
    obj1 = np.sum([S[i, j] * (values_added[i] - values_added[j])**2 for i in range(len(values_added)) for j in neighbors_indices_new[i]])

    obj2 = l1 * np.sum([(eps_per_sample[i] / eps_mean) * (values_array[i] - values_added[i])**2 for i in range(len(values_array))])
    
    obj3 = l2 * np.linalg.norm(values_array) ** 2  

    return obj1 + obj2 + obj3




original_data_values = pd.read_csv("")


original_index = original_data_values['indices'].values
original_values = original_data_values['data_values'].values


index_array = np.array(original_index)
values_array = np.array(original_values)



data = fetch_openml(data_id=43974, as_frame=True) 


features = data.data.to_numpy()  
labels = data.target.to_numpy()  


dataset = np.column_stack((features, labels))




features = []
labels = []
for index in index_array:

    feature = dataset[index][:-1]
    label = dataset[index][-1]
    features.append(feature)
    labels.append(label)


features_tensor = np.vstack(features) 
labels_tensor = labels


random_add_index = np.random.randint(0, len(dataset))
while random_add_index in index_array:
    random_add_index = np.random.randint(0, len(dataset))


feature = dataset[random_add_index][:-1]
label = dataset[random_add_index][-1]




k = args.k  


_, neighbors_indices = compute_neighbors(features_tensor, k)


mapped_indices = map_to_original_indices(neighbors_indices, index_array)
print(mapped_indices)


new_features = features.copy()  
new_features.append(feature)
features_added_tensor = np.vstack(new_features)

new_labels = labels.copy() 
new_labels.append(label)
labels_added_tensor = new_labels

index_added = np.append(index_array, random_add_index)

cosine_similarity_new, neighbors_indices_new = compute_neighbors(features_added_tensor, k) 
# print(neighbors_indices_new) # 500*10

mapped_indices_new = map_to_original_indices(neighbors_indices_new, index_added)
print(mapped_indices_new)


difference_per_sample = calculate_neighbor_difference(mapped_indices, mapped_indices_new)

print(difference_per_sample)

eps_per_sample = (len(index_added)/len(index_array)) * (1+difference_per_sample/10) * args.eps

print(eps_per_sample)

eps_mean = np.mean(eps_per_sample)
new_labels_array = np.array(new_labels)
equal_labels = (new_labels_array[:, np.newaxis] == new_labels_array[np.newaxis, :]).astype(int)

Sim = cosine_similarity_new * (2 * equal_labels - 1)

sum_fenzi = 0
sum_fenmu = 0
for k in neighbors_indices[:,-1]:
    sum_fenzi = sum_fenzi + Sim[-1,k] * values_array[k]
    sum_fenmu = sum_fenmu + Sim[-1,k]
new_beta = sum_fenzi/sum_fenmu
values_added = np.append(values_array, new_beta)


# initial_guess = values_added

result = minimize(objective, values_added, args=(values_array, Sim, eps_per_sample, eps_mean,  neighbors_indices_new, args.l1, args.l2))



optimized_beta = result.x
print("Optimized beta values:", optimized_beta)
print(optimized_beta.shape)

with open("minibool-incgloc"+str(args.eps)+".csv", 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)
    
    for item in optimized_beta:
        csvwriter.writerow([item])

print("Optimized objective value:", result.fun)




