import pandas as pd
import math
from collections import defaultdict

# Specify the file paths for the dataset files
users_path = "../datasets/ml-1m/users.dat"
ratings_path = "../datasets/ml-1m/ratings.dat"
movies_path = "../datasets/ml-1m/movies.dat"


# Define column names for each dataset
users_cols = ['user_id', 'gender', 'age', 'occupation', 'zip_code']
ratings_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
movies_cols = ['movie_id', 'title', 'genres']

# Load data into Pandas DataFrames
users_df = pd.read_csv(users_path, sep='::', header=None, names=users_cols, encoding='latin-1', engine='python')
ratings_df = pd.read_csv(ratings_path, sep='::', header=None, names=ratings_cols, encoding='latin-1', engine='python')
movies_df = pd.read_csv(movies_path, sep='::', header=None, names=movies_cols, encoding='latin-1', engine='python')

# Optionally, convert DataFrames to NumPy arrays/matrices
users_array = users_df.values
ratings_array = ratings_df.values
movies_array = movies_df.values

ratings_df = pd.merge(ratings_df, movies_df)[['user_id', 'title', 'rating', 'timestamp']]
ratings_df["user_id"] = ratings_df["user_id"].astype(str)
user_lookup = {v: i+1 for i, v in enumerate(ratings_df['user_id'].unique())}
movie_lookup = {v: i+1 for i, v in enumerate(ratings_df['title'].unique())}
ratings_df['movie_id'] = ratings_df['title'].map(movie_lookup)
ratings_df['user_int'] = ratings_df['user_id'].map(user_lookup)
ratings_per_user = ratings_df.groupby('user_id').rating.count()
ratings_per_item = ratings_df.groupby('movie_id').rating.count()
sorted_ratings_per_item = ratings_per_item.sort_values(ascending=False)
user_item_rating_tuples = ratings_df[['user_int', 'movie_id', 'rating']].values.tolist()

from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(ratings_df[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_full.dump'

# Load the saved model
_, model = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 1)
train_df = ratings_df[ratings_df.is_valid==False]
valid_df = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df[['user_int', 'movie_id', 'rating']], reader)
# Retrieve the trainset
trainset1 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_item.dump'

# Load the saved model
_, model_last_item = dump.load(file_path)
def get_last_n_ratings_by_user(
    df, n, min_ratings_per_user=1, user_colname="user_id", timestamp_colname="timestamp"
):
    return (
        df.groupby(user_colname)
        .filter(lambda x: len(x) >= min_ratings_per_user)
        .sort_values(timestamp_colname)
        .groupby(user_colname)
        .tail(n)
        .sort_values(user_colname)
    )
def mark_last_n_ratings_as_validation_set(
    df, n, min_ratings=1, user_colname="user_id", timestamp_colname="timestamp"
):
    """
    Mark the chronologically last n ratings as the validation set.
    This is done by adding the additional 'is_valid' column to the df.
    :param df: a DataFrame containing user item ratings
    :param n: the number of ratings to include in the validation set
    :param min_ratings: only include users with more than this many ratings
    :param user_id_colname: the name of the column containing user ids
    :param timestamp_colname: the name of the column containing the imestamps
    :return: the same df with the additional 'is_valid' column added
    """
    df["is_valid"] = False
    df.loc[
        get_last_n_ratings_by_user(
            df,
            n,
            min_ratings,
            user_colname=user_colname,
            timestamp_colname=timestamp_colname,
        ).index,
        "is_valid",
    ] = True

    return df
mark_last_n_ratings_as_validation_set(ratings_df, 5)
train_df_5 = ratings_df[ratings_df.is_valid==False]
valid_df_5 = ratings_df[ratings_df.is_valid==True]
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import SVD
from surprise import accuracy

reader = Reader()
data = Dataset.load_from_df(train_df_5[['user_int', 'movie_id', 'rating']], reader)

# Retrieve the trainset
trainset5 = data.build_full_trainset()

from surprise import dump

file_path = 'surprise_model_last_five.dump'

# Load the saved model
_, model_last_five = dump.load(file_path)
user_item_matrix = trainset.ur
user_item_matrix_1 = trainset1.ur
user_item_matrix_5 = trainset5.ur
# Convert to a dictionary of dictionaries
user_item_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict[user] = items_dict
import torch
user_item_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_1.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict1[user] = items_dict

import torch
user_item_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor1[user_id] = item_rating_tensor

# Convert to a dictionary of dictionaries
user_item_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for user, items_ratings in user_item_matrix_5.items():
    items_dict = {item: rating for item, rating in items_ratings}
    user_item_rating_dict5[user] = items_dict
    
import torch
user_item_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for user_id, item_rating_dict in user_item_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    item_rating_list = list(item_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    item_rating_tensors = [torch.tensor([[item_id, rating]], dtype=torch.float) for item_id, rating in item_rating_list]
    
    # Stack the list of tensors along a new dimension to create a single tensor
    item_rating_tensor = torch.stack(item_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    user_item_rating_tensor5[user_id] = item_rating_tensor
    
import numpy as np
import torch
from tqdm import tqdm

def update_user_tensor_single(user_vector, items, ratings):
    Q_list = [model_last_item.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor(user_vector, items, ratings):
    Q_list = [model.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p
def update_user_tensor_five(user_vector, items, ratings):
    Q_list = [model_last_five.qi[item] for item in items]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

def get_all_recommendation_scores_stochastic(user_vector, sample_size, type_, beta = 0.8):
    device = user_vector.device
    num_samples = sample_size
    if type_ == 'keepall':
        n_items =len(model.qi)
    if type_ == 'single':
        n_items =len(model_last_item.qi)
    if type_ == 'five':
        n_items =len(model_last_five.qi)
    predicted_ratings = {index: None for index in range(0, n_items)}
    if type_ == 'keepall':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model.qi[item])#.to(device)
            predicted_ratings[item] = item_rating
    if type_ == 'single':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model_last_item.qi[item])
            predicted_ratings[item] = item_rating    
    if type_ == 'five':
        for item in range(0, n_items):
            item_rating = user_vector @ torch.tensor(model_last_five.qi[item])
            predicted_ratings[item] = item_rating       
    
    # Convert the predicted ratings dictionary to PyTorch tensor
    ratings_tensor_1 = torch.tensor(list(predicted_ratings.values()), dtype=torch.float)
    
    # Compute probabilities proportional to exp(beta*predicted_rating)
    probabilities = F.softmax(beta * ratings_tensor_1, dim=0)
    
    # Sample num_samples items based on the probability distribution
    sampled_indices = torch.multinomial(probabilities, num_samples, replacement=False)
    
    # Convert indices to item names and corresponding predicted scores
    sampled_items = [list(predicted_ratings.keys())[idx] for idx in sampled_indices]
    sampled_scores = [list(predicted_ratings.values())[idx] for idx in sampled_indices]
    
    return sampled_items, sampled_scores

#Finalized
#What I compare with is the first value
torch.set_printoptions(precision=7)
print("Starting now...")

def get_all_preferences(user_vector_1, item_vector_copy_h):
    # Compute ratings for each item by dot product with user_vector
    temp_vector = user_vector_1.detach().clone()
    item_ratings = 0.8 * torch.matmul(item_vector_copy_h, temp_vector.unsqueeze(1))
    return item_ratings.squeeze(1) 

def update_item_tensor_one(item_vector, users, ratings):
    Q_list = [model_last_item.pu[user] for user in users]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

def update_item_tensor_five(item_vector, users, ratings):
    Q_list = [model_last_five.pu[user] for user in users]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

def update_item_tensor(item_vector, users, ratings):
    Q_list = [model.pu[user] for user in users]
    Q = torch.tensor(Q_list, dtype=torch.float64)
    # p = np.linalg.inv(Q.T @ Q) @ Q.T @ ratings
    p = torch.inverse(Q.t() @ Q) @ Q.t() @ ratings
    return p

mse = torch.nn.MSELoss()

def hellinger_distance(p, q):
    return torch.sqrt(torch.sum((torch.sqrt(p) - torch.sqrt(q)) ** 2)) / torch.sqrt(torch.tensor(2.0))

item_user_matrix = trainset.ir
item_user_matrix1 = trainset1.ir
item_user_matrix5 = trainset5.ir

# Convert to a dictionary of dictionaries
item_user_rating_dict = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict[item] = users_dict
    
import torch
item_user_rating_tensor = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)

    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor[item_id] = user_rating_tensor
    
# Convert to a dictionary of dictionaries
item_user_rating_dict1 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix1.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict1[item] = users_dict
    
import torch
item_user_rating_tensor1 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict1.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor1[item_id] = user_rating_tensor
    
# Convert to a dictionary of dictionaries
item_user_rating_dict5 = defaultdict(dict)

# Populate the user_item_rating_dict
for item, users_ratings in item_user_matrix5.items():
    users_dict = {user: rating for user, rating in users_ratings}
    item_user_rating_dict5[item] = users_dict
    
import torch
item_user_rating_tensor5 = {}

# Iterate over each user_id and their item-rating dictionary
for item_id, user_rating_dict in item_user_rating_dict5.items():
    # Convert the item-rating dictionary to a list of tuples
    user_rating_list = list(user_rating_dict.items())
    
    # Convert the list of tuples to a tensor
    user_rating_tensors = [torch.tensor([[user_id, rating]], dtype=torch.float) for user_id, rating in user_rating_list]
    user_rating_tensor = torch.stack(user_rating_tensors)
    
    # Store the item-rating tensor in the converted dictionary with the user_id as the key
    item_user_rating_tensor5[item_id] = user_rating_tensor

import torch.nn.functional as F

def past_k_stability(adversary, curr_user, k):
    file_path = 'surprise_model_last_five.dump'
    _, model_last_five = dump.load(file_path)
    
    item_and_rating = trainset.ur[adversary][-k:]
    chosen_items = [i[0] for i in item_and_rating]
    chosen_ratings = [i[1] for i in item_and_rating]
    user_action = torch.tensor(chosen_ratings, requires_grad=True, dtype=torch.float64)
    optimizer = torch.optim.Adam([user_action], lr=0.8)
    
    item_vector_copy = torch.tensor(model_last_five.qi)  # change as per k
    user_vector = torch.tensor(model_last_five.pu[curr_user])
    
    initial_rec_list = get_all_preferences(user_vector, item_vector_copy)
    initial_rec_list_softmax = F.softmax(initial_rec_list, dim=0)
    #print(initial_rec_list_softmax)
    #print(initial_rec_list)
    
    n_epochs = 20
    distance_metric = None
    
    for epoch in tqdm(range(0, n_epochs)):
        user_action_clamped = user_action.clamp(1, 5)
        item_vector_copy_tensor = torch.tensor(model_last_five.qi)  # change as per k
        
        for loop_var in range(0, k):
            curr_item = chosen_items[loop_var]
            user_list = [item_user_matrix5[curr_item][i][0] for i in range(len(item_user_matrix5[curr_item]))]  # change as per k
            rating_list = torch.tensor([item_user_matrix5[curr_item][i][1] for i in range(len(item_user_matrix5[curr_item]))], dtype=torch.float64)  # change as per k
            user_list.append(adversary)
            rating_list = torch.cat((rating_list, user_action_clamped[loop_var].unsqueeze(0)), dim=0)
            item_vector_copy_tensor[curr_item] = update_item_tensor_five(item_vector_copy_tensor[curr_item], user_list, rating_list)
        
        final_rec_list = get_all_preferences(user_vector, item_vector_copy_tensor)
        final_rec_list_softmax = F.softmax(final_rec_list, dim=0)
        #print(max(final_rec_list))
        # print(sum(final_rec_list_softmax))
        distance_metric = -hellinger_distance(final_rec_list_softmax, initial_rec_list_softmax)
        #print(distance_metric)
        #print(user_action)
        
        distance_metric.backward()
        optimizer.step()
        optimizer.zero_grad()
    
    return distance_metric.item()

n_users = len(model_last_five.pu)
n_items = len(model_last_five.qi)

import random

def generate_random_lists(n_items, n_users, num_samples=30):
    item_list = [random.randint(0, n_items - 1) for _ in range(num_samples)]
    user_list = [random.randint(0, n_users - 1) for _ in range(num_samples)]
    return item_list, user_list

#ok time to create influential and non-influential users
ratings_count = {}
for user_id, ratings1 in trainset.ur.items():
    # Count the number of ratings for the user and store it in ratings_count
    ratings_count[user_id] = len(ratings1)

# Sort the ratings_count dictionary by the number of ratings in descending order
sorted_ratings_count = sorted(ratings_count.items(), key=lambda x: x[1], reverse=True)

# Get the top 10 influential users based on the number of ratings
top_influential_users = [user_id for user_id, _ in sorted_ratings_count[:30]]

mid_users = [i for i, _ in sorted_ratings_count[300:330]]
print(top_influential_users)
print(mid_users)
# top_influential_items = [i for i in sorted_ratings_per_item[:30].keys()]

# mid_items = [i for i in sorted_ratings_per_item[200:230].keys()]

#items, users = generate_random_lists(n_items, n_users)
advs = [88, 657, 811, 998, 24, 180, 411, 839, 466, 704, 70, 84, 842, 967, 254, 567, 506, 302, 658, 490, 844, 54, 323, 625, 119, 566, 923, 432, 29, 934]
users = [595, 843, 451, 674, 762, 732, 636, 788, 989, 16, 39, 525, 33, 151, 19, 777, 255, 405, 277, 410, 590, 776, 846, 207, 439, 103, 706, 36, 650, 878]
time_till = 5

def save_list_to_file(lst, filename):
    with open(filename, 'w') as file:
        for item in lst:
            file.write(str(item) + '\n')

all_stabilities1 = []
for p_user in tqdm(users):
    for p_item in top_influential_users:
        if p_user!=p_item:
            dist_val = past_k_stability(p_item, p_user, time_till)
            print(dist_val)
            if math.isnan(dist_val)==False:
                all_stabilities1.append(dist_val)
                
all_stabilities2 = []
for p_user in tqdm(users):
    for p_item in mid_users:
        if p_user!=p_item:
            dist_val = past_k_stability(p_item, p_user, time_till)
            if math.isnan(dist_val)==False:
                all_stabilities2.append(dist_val)
                
all_stabilities3 = []
for p_user in tqdm(top_influential_users):
    for p_item in advs:
        if p_user!=p_item:
            dist_val = past_k_stability(p_item, p_user, time_till)
            if math.isnan(dist_val)==False:
                all_stabilities3.append(dist_val)
                
all_stabilities4 = []
for p_user in tqdm(mid_users):
    for p_item in advs:
        if p_user!=p_item:
            dist_val = past_k_stability(p_item, p_user, time_till)
            if math.isnan(dist_val)==False:
                all_stabilities4.append(dist_val)
        
import statistics

import math

def calculate_mean_std(lst):
    # Using the statistics module
    mean = statistics.mean(lst)
    std_dev = statistics.stdev(lst)

    # Calculating manually
    manual_mean = sum(lst) / len(lst)
    manual_std_dev = math.sqrt(sum((x - manual_mean) ** 2 for x in lst) / len(lst))

    print(f"Mean (using statistics module): {mean}")
    print(f"Standard Deviation (using statistics module): {std_dev}")
    print(f"Mean (calculated manually): {manual_mean}")
    print(f"Standard Deviation (calculated manually): {manual_std_dev}")

calculate_mean_std(all_stabilities1)

save_list_to_file(all_stabilities1, 'aggregate/past_stability_top_adv.txt')      

calculate_mean_std(all_stabilities2)

save_list_to_file(all_stabilities2, 'aggregate/past_stability_mid_adv.txt')      

calculate_mean_std(all_stabilities3)

save_list_to_file(all_stabilities3, 'aggregate/past_stability_top_usr.txt')      

calculate_mean_std(all_stabilities4)

save_list_to_file(all_stabilities4, 'aggregate/past_stability_mid_usr.txt')        
