import os
from sentence_transformers import SentenceTransformer
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import numpy as np
from scipy.spatial.distance import cdist
import json
import pandas as pd
from collections import defaultdict
from tqdm import tqdm
import torch.nn as nn
import torch
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import numpy as np
from scipy.spatial.distance import cdist

import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def compute_center_tensor(tensor_list):
    tensor_shapes = [t.shape for t in tensor_list]
    sum_tensor = torch.zeros_like(torch.from_numpy(tensor_list[0]), dtype=torch.float32)
    for t in tensor_list:
        sum_tensor += t
    center_tensor = sum_tensor / len(tensor_list)
    
    return center_tensor

def kmeans_clustering_with_elbow(tensor_list, save_path=""):
    
    X = np.vstack(tensor_list)
    
    
    sse = []
    k_range = range(1, 20)  
    for k in k_range:
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(X)
        sse.append(kmeans.inertia_)
    
    plt.figure(figsize=(8, 6))
    plt.plot(k_range, sse, marker='o')
    plt.title('Elbow Method for Optimal K')
    plt.xlabel('Number of Clusters')
    plt.ylabel('Sum of Squared Errors (SSE)')
    plt.grid(True)
    
    plt.savefig(save_path)
    plt.close()  

    optimal_k = input()
    
    kmeans = KMeans(n_clusters=optimal_k, random_state=42)
    kmeans.fit(X)
    cluster_centers = kmeans.cluster_centers_
    
    CC = compute_center_tensor(cluster_centers)
    CC = CC.cpu().numpy()
    CC = torch.from_numpy(CC).to(device=device, dtype=torch.float32)

    torch.save(CC.cpu(), 'CC.pt')  
    
    return kmeans.labels_, cluster_centers, kmeans

def predict_cluster(new_tensor, cluster_centers):

    distances = cdist(new_tensor, cluster_centers, metric='euclidean')
    
    return np.argmin(distances)

def add_labels_to_comments(comments, kmeans_labels):
    
    result = []
    
    for i in range(len(comments)):
        result.append({
            "label": comments[i]['label'],
            "text": comments[i]['text'],
            "concepts": comments[i]['concepts'],
            "Cluster": kmeans_labels[i] 
        })
    
    return result

def Get_Tensor_List(reviews_list,Tensor_list,model):
    for item in tqdm(reviews_list, total=reviews_list.__len__()):
        text = item['text']
        q_embeddings = model.encode(text, normalize_embeddings=True)
        Tensor_list.append(q_embeddings)

def Get_Review_list(reviews_list,Datasets_Path):
    with open(Datasets_Path, 'r', encoding='utf-8') as f:
        for line in f:
            review = json.loads(line.strip())
            reviews_list.append(review)

def Rebalancing_and_Stretching(model_name,Datasets_Path):
    reviews_list = []
    Get_Review_list(reviews_list,Datasets_Path)

    model = SentenceTransformer(model_name)

    Tensor_list=[]
    Get_Tensor_List(reviews_list,Tensor_list,model)

    searching_for_conceptual_redundancy(reviews_list,Tensor_list)

def Update_Raw_Datasets(original_data, record_data, index_column='index', ifchange_column='IfChange'):
    
    original_df = pd.DataFrame(original_data)
    record_df = pd.DataFrame(record_data)
    
   
    record_df_sorted = record_df.sort_values(by=[index_column]).reset_index(drop=True)
    
   
    if ifchange_column not in original_df.columns:
        original_df[ifchange_column] = None 
    
    original_df[ifchange_column] = record_df_sorted[ifchange_column].values
    save_dataframe_to_json(original_df,r'Running.json')

    return original_df

def searching_for_conceptual_redundancy(review, tensors):
    kmeans_labels, cluster_centers, kmeans = kmeans_clustering_with_elbow(tensors) 
    
    my_label_setlist = list(range(len(cluster_centers)))
    label_set = set(my_label_setlist)  
    
    labels = {row['label'] for row in review}
    dataset_label_num = len(labels)
    
    list_A = {label: {'Cluster': [], 'label': [],'index':[]} for label in label_set} 
    list_A_Iftomin = {i1: {i2: [] for i2 in labels} for i1 in label_set} 
    
    id = 0
    for data, label in zip(review, kmeans_labels):
        list_A[label]['index'].append(id)
        list_A[label]['label'].append(data['label'])
        list_A[label]['Cluster'].append(label)
        id += 1
    
    balanced_list = []
    excess_list = []
    
    for i in range(len(label_set)):
        label = list_A[i]['label']
        Index = list_A[i]['index']
        
        label_count = defaultdict(int)
        
        for lbl in label:
            label_count[lbl] += 1

        min_count = min(label_count.values())  
        
        balanced_data = []
        excess_data = []
        
        for m in range(dataset_label_num):
            list_A_Iftomin[i][m] = 0
        
        count_Indexs = 0
        for lbl,id in tqdm(zip(label, Index), total=len(list_A[i]), desc=f"for label {i}"):
            if  list_A_Iftomin[i][lbl] == min_count:
                IfChangeForKM = 1
                excess_data.append({'label': lbl,'index': Index[count_Indexs], 'IfChangeForKM':IfChangeForKM})
                count_Indexs += 1
            else:
                IfChangeForKM = 0
                balanced_data.append({'label': lbl,'index': Index[count_Indexs], 'IfChangeForKM':IfChangeForKM})
                list_A_Iftomin[i][lbl] += 1
                count_Indexs += 1

        balanced_list.extend(balanced_data)
        excess_list.extend(excess_data)
    
    balanced_list.extend(excess_list) 
    Update_Raw_Datasets(review, balanced_list,'index', 'IfChangeForKM')

def adjust_tensor_by_proportion(center_tensor, target_tensor, proportion):
    difference_vector = target_tensor - center_tensor
    
    adjusted_target_tensor = center_tensor + proportion * difference_vector
    
    return adjusted_target_tensor

def save_dataframe_to_json(df, filename):
    df.to_json(filename, orient='records', lines=True)

def push_away(tensor, target_tensor, weight):
    
    diff = tensor - target_tensor
    
    new_tensor = tensor + weight * diff
    
    return new_tensor
