import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid,Coauthor
from torch_geometric.loader import NeighborSampler
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.utils import to_networkx
import networkx as nx
import random
from torch.nn import Linear
from sklearn.metrics import f1_score
import time
from lib_utils import utils
from lib_gnn_model.gcn.gcn_conv_batch import GCNConvBatch
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
from torch_scatter import scatter_add
from sklearn.metrics import roc_auc_score

print("-----------------------------------------------------")

print(torch.__version__)
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

r = 100
un_ratio = 0.2
print(un_ratio)
batch_size = 512
target_model = 'GCN'
lr = 0.05
weight_decay = 0.0001
# 导入Cora数据集
# dataset = Planetoid(root='data/Cora', name="Cora")  # root: 指定路径 name: 数据集名称
dataset = Coauthor(root='data/CS', name="CS")
print(dataset)
print("未使用零值域分解")
# 查看数据的基本情况
# print("网络数据包含的类数量:", dataset.num_classes)
# print("网络数据边的特征数量:", dataset.num_edge_features)
# print("网络数据边的数量:", dataset[0].edge_index.shape[1] / 2)  # 除以2是OOC的组织形式
# print("网络数据节点的特征数量:", dataset.num_node_features)
# print("网络数据节点的数量:", dataset[0].x.shape[0])
# print("网络节点标签的数量:", len(dataset[0].y))



def _set_random_seed(seed=2022):
    
    np.random.seed(seed)
    random.seed(seed)

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    print("set pytorch seed")

# _set_random_seed(20221012)
# _set_random_seed(20250301)
# _set_random_seed(20250304)
# _set_random_seed(20250306)
# _set_random_seed(20250309)
# _set_random_seed(20250312)
import datetime

timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
seed = int(timestamp.replace("_", "").replace(":", "").replace("-", "")) % (2**32)
print("seed:", seed)
_set_random_seed(seed)

# visualization
def visualize(out, color, filename):
    z = TSNE(n_components=2).fit_transform(out.detach().cpu().numpy())
    plt.figure(figsize=(10,10))
    plt.grid(True, linestyle='--', color='gray', linewidth=0.5)
    plt.scatter(z[:, 0], z[:, 1], s=18, c=color.cpu(), alpha=0.9, cmap="Set2")
    plt.show()
    plt.savefig(filename, bbox_inches='tight')


# train from all
# batch version
class GCNNet(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCNNet, self).__init__()
        self.num_layers = 2
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConvBatch(dataset.num_features, hidden_channels, cached=False, add_self_loops=True, bias=False))
        self.convs.append(GCNConvBatch(hidden_channels, dataset.num_classes, cached=False, add_self_loops=True, bias=False))

    def forward(self, x, adjs, edge_weight):
        for i, (edge_index, e_id, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index, edge_weight=edge_weight[e_id])
            if i != self.num_layers - 1:
                x = F.relu(x)
                x = F.dropout(x, p=0.5, training=self.training)
        return F.log_softmax(x, dim=1)

    def reset_parameters(self):
        for i in range(self.num_layers):
            self.convs[i].reset_parameters()
            
    def inference(self, x_all, subgraph_loader, edge_weight, device):
        x_last = []
        for i in range(self.num_layers):
            xs = []
            if i==1:
                x_last = x_all # mark the last layer output
            for batch_size, n_id, adj in subgraph_loader:
                edge_index, e_id, size = adj.to(device)
                x = x_all[n_id].to(device)                                # 上一层的结果，这一层的输入
                x_target = x[:size[1]]
                x = self.convs[i]((x, x_target), edge_index, edge_weight=edge_weight[e_id])
                if i != self.num_layers - 1:
                    x = F.relu(x)
                xs.append(x.cpu())
            x_all = torch.cat(xs, dim=0)                                  # 用来拼接不同的batch,而不是拼接不同的dimension
        return x_last, x_all

model = GCNNet(hidden_channels=16).to(device)
criterion = torch.nn.CrossEntropyLoss()                                   # Define loss criterion.
opt = torch.optim.Adam(model.parameters(), lr=0.05, weight_decay=0.0001)  # Define optimizer.
data = dataset[0].to(device)
train_indices, test_indices = train_test_split(np.arange((data.num_nodes)), test_size=0.1, random_state=100)
data.train_mask = torch.from_numpy(np.isin(np.arange(data.num_nodes), train_indices))
data.test_mask = torch.from_numpy(np.isin(np.arange(data.num_nodes), test_indices))

train_indices = np.nonzero(data.train_mask.cpu().numpy())[0]
edge_index = utils.filter_edge_index(data.edge_index, train_indices, reindex=False)


# print("节点数量 (N):", dataset[0].x.shape[0])
# print("边数量 (E):", dataset[0].edge_index.shape[1] // 2)
# print("输入特征维度 (F_in):", dataset.num_node_features)
# print("类别数量 (C):", dataset.num_classes)
# print("隐藏层维度 (H1, H2):", 16)
# print("训练节点数量:", data.train_mask.sum().item())
# print("测试节点数量:", data.test_mask.sum().item())

if edge_index.shape[1] == 0:
    edge_index = torch.tensor([[1, 2], [2, 1]])

train_loader = NeighborSampler(
    edge_index, node_idx=data.train_mask,
    sizes=[5, 5], num_nodes=data.num_nodes,
    batch_size=batch_size, shuffle=True,
    num_workers=0)

edge_weight = None
if target_model in ['GCN','SGC']:
    _, edge_weight = gcn_norm(
        data.edge_index,
        edge_weight=None,
        num_nodes=data.x.shape[0],
        add_self_loops=False)
edge_weight_1 = edge_weight

test_loader = NeighborSampler(
            data.edge_index, node_idx=None,
            sizes=[-1], num_nodes=data.num_nodes,
            batch_size=64, shuffle=False,
            num_workers=0)
test_loader_1 = test_loader

def evaluate_model(model, _data, filename, test_loader):
    model.eval()
    edge_index = _data.edge_index   
    if target_model in ['GCN','SGC']:
        _, out = model.inference(_data.x, test_loader, edge_weight, device)
        visualize(out, color=_data.y, filename=filename)
    else:
        _, out = model.inference(_data.x, test_loader, device)
        visualize(out, color=_data.y, filename=filename)

    y_true = _data.y.cpu().unsqueeze(-1)
    y_pred = out.argmax(dim=-1, keepdim=True)
    results = []
    for mask in [_data.train_mask, _data.test_mask]:# 分别计算test和train的结果
        # results += [int(y_pred[mask].eq(y_true[mask]).sum()) / int(mask.sum())]
        y_pred = y_pred.to(mask.device)  # 将 y_pred 移到 mask 所在的设备
        y_true = y_true.to(mask.device)  # 同样，将 y_true 移到 mask 所在的设备
        results += [int(y_pred[mask].eq(y_true[mask]).sum()) / int(mask.sum())]
    
    print("results")
    print(results)
    return results
    
model.reset_parameters()
for epoch in range(r):
    model.train()
    for batch_size, n_id, adjs in train_loader:
        adjs = [adj.to(device) for adj in adjs]
        opt.zero_grad()

        if target_model in ['GCN', 'SGC']:
            out = model(data.x[n_id], adjs, edge_weight)
        else:
            out = model(data.x[n_id], adjs)

        loss = F.nll_loss(out, data.y[n_id[:batch_size]])
        loss.backward()
        opt.step()

model.eval()
temp_out, out = model.inference(data.x, test_loader_1, edge_weight, device)
KL_xy_all = out
temp_KL_xy_all = temp_out
filename = 'gcn_train_from_all_batch.pdf'
train_from_all_results = evaluate_model(model, data, filename, test_loader_1)

test_f1_train_from_all = f1_score(
        data.y[data.test_mask].cpu().numpy(), 
        out[data.test_mask].argmax(axis=1).cpu().numpy(), 
        average="micro"
)
# train from all
    
# train from scratch
class GCNNet_scratch(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCNNet_scratch, self).__init__()
        self.num_layers = 2
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConvBatch(dataset.num_features, hidden_channels, cached=False, add_self_loops=True, bias=False))
        self.convs.append(GCNConvBatch(hidden_channels, dataset.num_classes, cached=False, add_self_loops=True, bias=False))

    def forward(self, x, adjs, edge_weight):
        for i, (edge_index, e_id, size) in enumerate(adjs):
            x_target = x[:size[1]]  # Target nodes are always placed first.
            x = self.convs[i]((x, x_target), edge_index, edge_weight=edge_weight[e_id])
            if i != self.num_layers - 1:
                x = F.relu(x)
                x = F.dropout(x, p=0.5, training=self.training)
        return F.log_softmax(x, dim=1)

    def reset_parameters(self):
        for i in range(self.num_layers):
            self.convs[i].reset_parameters()
            
    def inference(self, x_all, subgraph_loader, edge_weight, device):
        x_last = []
        for i in range(self.num_layers):
            xs = []
            if i==1:
                x_last = x_all # mark the last layer output
            for batch_size, n_id, adj in subgraph_loader:
                edge_index, e_id, size = adj.to(device)
                x = x_all[n_id].to(device)                                # 上一层的结果，这一层的输入
                x_target = x[:size[1]]
                x = self.convs[i]((x, x_target), edge_index, edge_weight=edge_weight[e_id])
                if i != self.num_layers - 1:
                    x = F.relu(x)
                xs.append(x.cpu())
            x_all = torch.cat(xs, dim=0)                                  # 用来拼接不同的batch,而不是拼接不同的dimension
        return x_last, x_all

model_scratch = GCNNet_scratch(hidden_channels=16).to(device)
criterion = torch.nn.CrossEntropyLoss()                                   # Define loss criterion.
opt = torch.optim.Adam(model_scratch.parameters(), lr=0.05, weight_decay=0.0001)  # Define optimizer.
# data_scratch = dataset[0].to(device)
# data_scratch.train_mask = torch.from_numpy(np.isin(np.arange(data_scratch.num_nodes), train_indices))                # 新的训练节点 
# data_scratch.test_mask  = torch.from_numpy(np.isin(np.arange(data_scratch.num_nodes), test_indices))                 # 新的测试节点
data_scratch = data.to(device)

train_mask = data_scratch.train_mask                    # 获取train_mask
train_nodes = torch.nonzero(train_mask).squeeze(1)      # 获取train_mask中为True的节点索引
num_nodes_to_remove = int(len(train_nodes) * un_ratio)  # 计算需要随机置为False的节点数量（10%的节点）
random.seed(1)
nodes_to_remove = random.sample(list(train_nodes), num_nodes_to_remove)                     # 随机选取需要置为False的节点
nodes_to_remove = torch.tensor(nodes_to_remove, dtype=torch.long).to(device)

train_mask[nodes_to_remove] = False                                                         # 将这些节点的train_mask置为False
data_scratch.train_mask = train_mask                                                        # 更新回数据对象
combined_mask = torch.logical_or(data_scratch.train_mask, data_scratch.test_mask).to(device)
train_indices = np.nonzero(data_scratch.train_mask.cpu().numpy())[0]                        # 训练索引

edge_index_0 = data_scratch.edge_index
left_mask  = torch.any(torch.eq(edge_index_0[0].unsqueeze(1), nodes_to_remove), dim=1)
right_mask = torch.any(torch.eq(edge_index_0[1].unsqueeze(1), nodes_to_remove), dim=1)
mask = ~(left_mask | right_mask)
data_scratch.edge_index = edge_index_0[:, mask]                       # 保留不涉及被删除节点的边

edge_index = utils.filter_edge_index(data_scratch.edge_index, train_indices, reindex=False) # 对应的边的索引

num_true_nodes = torch.sum(data_scratch.train_mask).item()
print("训练数据集的节点数量",num_true_nodes)

print("节点数量 (N):", dataset[0].x.shape[0])
print("边数量 (E):", dataset[0].edge_index.shape[1] // 2)
print("输入特征维度 (F_in):", dataset.num_node_features)
print("类别数量 (C):", dataset.num_classes)
print("隐藏层维度 (H1, H2):", 16)
print("训练节点数量:", data.train_mask.sum().item())
print("测试节点数量:", data.test_mask.sum().item())

if edge_index.shape[1] == 0:
    edge_index = torch.tensor([[1, 2], [2, 1]])

train_loader_scratch = NeighborSampler(
    edge_index, node_idx=data_scratch.train_mask,
    sizes=[5, 5], num_nodes=data_scratch.num_nodes,
    batch_size=batch_size, shuffle=True,
    num_workers=0)

edge_weight = None
if target_model in ['GCN','SGC']:
    _, edge_weight = gcn_norm(
        data_scratch.edge_index,
        edge_weight=None,
        num_nodes=data.x.shape[0],
        add_self_loops=False)

test_loader = NeighborSampler(
            data_scratch.edge_index, node_idx=None,
            sizes=[-1], num_nodes=data.num_nodes,
            batch_size=64, shuffle=False,
            num_workers=0)
test_loader_2 = test_loader
    
model_scratch.reset_parameters()
for epoch in range(r):
    model_scratch.train()
    for batch_size, n_id, adjs in train_loader_scratch:
        adjs = [adj.to(device) for adj in adjs]
        opt.zero_grad()
        if target_model in ['GCN', 'SGC']:
            out = model_scratch(data_scratch.x[n_id], adjs, edge_weight)
        else:
            out = model_scratch(data_scratch.x[n_id], adjs)
        loss = F.nll_loss(out, data_scratch.y[n_id[:batch_size]])
        loss.backward()
        opt.step()

model_scratch.eval()
temp_out, out = model_scratch.inference(data.x, test_loader, edge_weight, device)
KL_xy_scratch = out  
temp_KL_xy_scratch = temp_out
filename = 'train_from_scratch_batch.pdf'
train_from_scratch_results = evaluate_model(model_scratch, data_scratch, filename, test_loader)

out = out.to(data.test_mask.device)  # 确保 out 和 test_mask 在同一设备

test_f1_train_from_scratch = f1_score(
        data.y[data.test_mask].cpu().numpy(), 
        out[data.test_mask].argmax(axis=1).cpu().numpy(), 
        average="weighted"
)
# train from scratch

# unlearning
degree = torch.zeros(dataset[0].x.shape[0], dtype=torch.long)
edge_index = dataset[0].edge_index
for i in range(edge_index.shape[1]):
    degree[edge_index[0, i]] += 1
    degree[edge_index[1, i]] += 1
    
ratio = 0.8
top_10_percent_threshold = torch.quantile(degree.float(), ratio)
top_10_percent_indices = torch.argsort(degree, descending=True)[:int(dataset[0].x.shape[0] * (1-ratio))]
top_10_mask = torch.zeros(dataset[0].x.shape[0], dtype=torch.bool).to(device)
top_10_mask[top_10_percent_indices] = 1
intersection_mask = top_10_mask & train_mask

class MLP(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(MLP, self).__init__()
        self.fc1 = torch.nn.Linear(input_dim, hidden_dim)
        self.relu = torch.nn.Sigmoid()
        self.fc2 = torch.nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model.eval()                       # 设置为评估模式
conv2_linear_layer = model.convs[1]   # 获取模型第二层conv2的线性层
H = conv2_linear_layer.lin.weight.to(device)  # 维度: [out_features, in_features]
H_pinv = torch.pinverse(H).to(device)         # 维度: [in_features, out_features]
bias = conv2_linear_layer.lin.bias            # 偏置: [bias]

print("bias:",bias)
print(type(bias))
if bias == None:
    bias = torch.zeros(dataset.num_classes).to(device)

# edge_n = degree[nodes_to_remove].sum()/num_nodes_to_remove
degree = degree.to(device)
nodes_to_remove = nodes_to_remove.to(device)
edge_n = degree[nodes_to_remove].sum() / num_nodes_to_remove

print("=====edge_n=====")
print(edge_n)
w_fix = 1 + 1/edge_n
print("=====w_fix=====")
print(w_fix.cpu().numpy())

# hadamard_edge_n = degree[nodes_to_remove]/num_nodes_to_remove
# hadamard_w_fix = 1 + 1/hadamard_edge_n
from torch_geometric.nn.conv.gcn_conv import GCNConv
class GCN_Unlearn(torch.nn.Module):
    def __init__(self, hidden_channels): # hidden channel is the first layer width
        super().__init__()
        torch.manual_seed(1234567)
        self.hidden_channels = hidden_channels
        
        self.influ_ij = MLP(dataset.num_classes, int((dataset.num_classes * hidden_channels) ** 0.5), hidden_channels)           # plus
        self.influ_ji = MLP(2 * hidden_channels, int((2*hidden_channels*dataset.num_classes) ** 0.5), dataset.num_classes)       # minus
        self.H = H.detach().to(device)
        self.H_pinv = H_pinv.detach().to(device)
        self.bias = bias.detach().to(device)

    def forward(self, temp_x, x, edge_index, unlearn): # , edge_weight
        row, col = edge_index 
        x_i = temp_x[row]
        x_j = temp_x[col]
        x_cat_ij = torch.cat([x_i, x_j], dim=-1)  # 拼接后的特征，形状为 [num_edges, 2 * in_channels]
        
        influ_ji = self.influ_ji(x_cat_ij)        # 计算第二层的影响力
        influ_ij = self.influ_ij(influ_ji)        # 计算第一层的影响力
        
        I = torch.eye(self.H_pinv.size(0), device=device)  # 定义单位矩阵
        
        Null_Space = I - self.H_pinv @ self.H
        Range_Space= self.H_pinv.T
        
        mask = (col.unsqueeze(1) == unlearn).any(dim=1) # unlearn的部分不考虑
        
        influ_ij_RND = influ_ij
        influ_ij_RND = (influ_ji - self.bias) @ self.H_pinv.T + influ_ij @ (I - self.H_pinv @ self.H) 
        minus_influ = torch.zeros_like(influ_ji)
        minus_influ[mask] = influ_ji[mask]
        
        aggregated_matrix = torch.zeros_like(x, device=x.device)          # minus unlearn node influence
        aggregated_matrix = scatter_add(
            src=minus_influ,       # 需要聚合的数据 [num_edges, out_channels]
            index=row,             # 目标节点索引 [num_edges]
            dim=0,                 # 节点维度
            dim_size=x.size(0)     # 确保输出维度匹配
        )
        
        aggregated_matrix_1 = torch.zeros(x.size(0), self.hidden_channels, device=x.device)
        aggregated_matrix_1 = scatter_add(
            src=influ_ij_RND,      # 需要聚合的数据 [num_edges, out_channels]
            index=row,             # 目标节点索引 [num_edges]
            dim=0,                 # 节点维度
            dim_size=x.size(0)     # 确保输出维度匹配
        )
        x = x - w_fix * aggregated_matrix  # 如果这个地方我换一个输出会怎么样？
        return x, aggregated_matrix_1      # 我感觉可以试试直接把influ_ij的结果输出
    
KLloss = torch.nn.KLDivLoss(reduction="mean", log_target=True)
KLloss_p = torch.nn.KLDivLoss(reduction='none', log_target=True)

model_Unlearn = GCN_Unlearn(hidden_channels=16).to(device)
opt = torch.optim.Adam(model_Unlearn.parameters(), lr=0.01, weight_decay = 5e-4)  # Define optimizer.

x, edge_index, y = data.x, data.edge_index, data.y
unlearning_node = nodes_to_remove
    
model.eval()
temp_out, out = model.inference(data.x, test_loader_2, edge_weight, device)
middle_out = out

middle_out = middle_out.to(device)

test_f1_test_middle = f1_score(
    data.y[data.test_mask].cpu().numpy(), 
    middle_out[data.test_mask].argmax(axis=1).cpu().numpy(), 
    average="weighted"
)

# original_neighbors = out[intersection_mask].to(device)
original_neighbors = out[intersection_mask.to(out.device)].to(device)

original_unlearned = temp_out[unlearning_node.to(temp_out.device)].to(device)
original_unlearned = temp_out.to(device)
    
results_unlearn = []
out = None
data_all_edge = dataset[0].edge_index.to(device)



start_time = time.time()
for i in range(10):
    model_Unlearn.train()
    opt.zero_grad()
    output, hidden_output = model_Unlearn.forward(temp_KL_xy_all.to(device), middle_out.to(device), data_all_edge, unlearning_node)
#     output, hidden_output = model_Unlearn.forward(temp_KL_xy_all.to(device), KL_xy_all.to(device), data_all_edge, unlearning_node)
    output_neighbors = output[intersection_mask].to(device)
    output_unlearned = hidden_output.to(device)
    
    # Step 3: Compute KL divergence losses
    weighted_kl_loss_neighbors = KLloss_p(F.log_softmax(original_neighbors.detach(), dim=-1), F.log_softmax(output_neighbors, dim=-1)) * degree[intersection_mask].view(-1, 1).to(device)
    kl_loss_neighbors = weighted_kl_loss_neighbors.mean()  

    # 3.2: KL divergence for unlearned nodes
    weighted_kl_loss_unlearned = KLloss_p(F.log_softmax(original_unlearned.detach(), dim=-1), F.log_softmax(output_unlearned, dim=-1)) * degree.view(-1, 1).to(device)
    kl_loss_unlearned = weighted_kl_loss_unlearned.mean()
 
    # Step 4: Compute classification loss
    classification_loss = -F.cross_entropy(F.log_softmax(output[~combined_mask], dim=-1), y[~combined_mask])

    # Step 5: Combine losses
    kamma = 0.4
    tamma = 0.6
    alpha = 0.8
    beta = 0.2
    loss = tamma * classification_loss + kamma * (alpha * kl_loss_neighbors + beta * kl_loss_unlearned) 
    loss.backward(retain_graph=True)
    opt.step()

end_time = time.time()
print(f"Elapsed time: {end_time - start_time} seconds")
model_Unlearn.eval()
_data = data

output, _  = model_Unlearn(temp_KL_xy_all.to(device), middle_out.to(device), data_all_edge, unlearning_node)
y_true = _data.y.cpu().unsqueeze(-1).to(device)
y_pred = output.argmax(dim=-1, keepdim=True).to(device)
results = []
for mask in [_data.train_mask, _data.test_mask]:# 分别计算test和train的结果
    results += [int(y_pred[mask].eq(y_true[mask]).sum()) / int(mask.sum())]
print("==============results==============")
print(results)
results_unlearn = results

y_pred = middle_out.argmax(dim=-1, keepdim=True).to(device)
results_middle = []
for mask in [_data.train_mask, _data.test_mask]:# 分别计算test和train的结果
    results_middle += [int(y_pred[mask].eq(y_true[mask]).sum()) / int(mask.sum())]

test_f1_test_unlearn = f1_score(
    data.y[data.test_mask].cpu().numpy(), 
    output[data.test_mask].argmax(axis=1).cpu().numpy(), 
    average="weighted"
)

# Step 6: Evaluation
mask = combined_mask.to(KL_xy_scratch.device)  # 保证设备一致

print("combined_mask: ")
print("num: ", i)
print("KL divergence of a and b:{}".format(KLloss(F.log_softmax(KL_xy_scratch[mask].detach(), dim=-1), F.log_softmax(KL_xy_all[mask].detach(), dim=-1))))
print("KL divergence of b and c:{}".format(KLloss(F.log_softmax(KL_xy_scratch[mask].detach().to(device), dim=-1), F.log_softmax(output[mask].detach().to(device), dim=-1))))
print("KL divergence of a and c:{}".format(KLloss(F.log_softmax(KL_xy_all[mask].detach().to(device), dim=-1), F.log_softmax(output[mask].detach().to(device), dim=-1))))
print("KL divergence of b and d:{}".format(KLloss(F.log_softmax(KL_xy_scratch[mask].detach().to(device), dim=-1), F.log_softmax(middle_out[mask].detach().to(device), dim=-1))))
print("KL divergence of d and c:{}".format(KLloss(F.log_softmax(middle_out[mask].detach().to(device), dim=-1), F.log_softmax(output[mask].detach().to(device), dim=-1))))

print("all_mask: ")
print("KL divergence of a and c:{}".format(KLloss(F.log_softmax(KL_xy_all.detach().to(device), dim=-1), F.log_softmax(output.detach().to(device), dim=-1))))
out = output
    
filename = 'gcn_unlearning_20250319.pdf'
visualize(out, color=data.y, filename=filename)
print("**************train from all**************")
print(train_from_all_results)
print("**************train from scratch**************")
print(train_from_scratch_results)
print("**************middle-out**************")
print(results_middle)
print("**************unlearn**************")
print(results_unlearn)

print("(((((((((((((f1-score)))))))))))))))")
print("**************train from all**************")
print(test_f1_train_from_all)
print("**************train from scratch**************")
print(test_f1_train_from_scratch)
print("**************middle-out**************")
print(test_f1_test_middle)
print("**************unlearn**************")
print(test_f1_test_unlearn)

# MIA测试相关代码
def get_posteriors(model, data, test_loader, edge_weight, device='cpu'):
    """
    获取模型对全部节点的后验概率
    """
    model.eval()
    with torch.no_grad():
        if isinstance(model, GCN_Unlearn):
            output, _ = model(temp_KL_xy_all.to(device), KL_xy_all.to(device), data.edge_index, nodes_to_remove)
            posteriors = torch.softmax(output, dim=1)
        else:
            _, out = model.inference(data.x, test_loader, edge_weight, device)
            posteriors = torch.softmax(out, dim=1)
    return posteriors.cpu().numpy()

def mia_attack(original_probs, unlearn_model, data, train_mask, test_mask, test_loader, edge_weight, device='cpu'):
    """
    MIA攻击流程
    """
    # 获取原始模型的后验概率
    # original_probs = get_posteriors(original_model, data, test_loader, edge_weight, device)
    # 获取未学习模型的后验概率
    unlearn_probs = get_posteriors(unlearn_model, data, test_loader, edge_weight, device)
    # 计算L2距离（特征差异）
    distances = np.linalg.norm(original_probs - unlearn_probs, axis=1)
    
    # 获取被遗忘节点索引（正样本）
    unlearn_indices = nodes_to_remove.cpu().numpy()
    
    # 获取测试节点索引（负样本）
    test_indices = torch.where(test_mask.cpu())[0].numpy()
    
    # 平衡正负样本数量
    min_samples = min(len(unlearn_indices), len(test_indices))
    
    # 随机选择相同数量的样本
    unlearn_indices = np.random.choice(unlearn_indices, min_samples, replace=False)
    test_indices = np.random.choice(test_indices, min_samples, replace=False)
    
    print(f"Number of positive samples (unlearned nodes): {len(unlearn_indices)}")
    print(f"Number of negative samples (test nodes): {len(test_indices)}")
    
    # 构造特征和标签
    features = np.concatenate([
        distances[unlearn_indices],  # 正样本：被遗忘节点的距离
        distances[test_indices]      # 负样本：测试节点的距离
    ])
    
    labels = np.concatenate([
        np.ones(len(unlearn_indices)),  # 正样本标签1
        np.zeros(len(test_indices))     # 负样本标签0
    ])
    # 计算AUC
#     print("\nL2 Distances for Positive Samples (Unlearned Nodes):")
#     print(distances[unlearn_indices])
#     print("\nL2 Distances for Negative Samples (Test Nodes):")
#     print(distances[test_indices])
    
    auc = roc_auc_score(labels, features, max_fpr=0.1)
    return auc

unlearn_probs= torch.softmax(out.detach(), dim=1).to(device)
model_probs  = torch.softmax(KL_xy_all.detach(), dim=1).to('cpu')
scratch_probs= torch.softmax(KL_xy_scratch.detach(), dim=1).to(device)

# 运行MIA攻击
print("\n============== MIA Attack Results Model_vs_Unlearn ==============")
mia_auc = mia_attack(
    original_probs=model_probs,
    unlearn_model=model_Unlearn,
    data=data,
    train_mask=data.train_mask,
    test_mask=data.test_mask,
    test_loader=test_loader_1,
    edge_weight=edge_weight,
    device=device
)
print(f"MIA Attack AUC: {mia_auc:.4f}")

print("\n============== MIA Attack Results Model_vs_Scratch ==============")
mia_auc = mia_attack(
    original_probs=model_probs,
    unlearn_model=model_scratch,
    data=data,
    train_mask=data.train_mask,
    test_mask=data.test_mask,
    test_loader=test_loader_1,
    edge_weight=edge_weight_1,
    device=device
)
print(f"MIA Attack AUC: {mia_auc:.4f}")

# 计算不同模型之间的KL散度
print("\n============== KL Divergence Results ==============")
print("KL divergence between original and unlearned model:")
print(KLloss(F.log_softmax(KL_xy_all[mask].detach().to(device), dim=-1), 
            F.log_softmax(out[mask].detach().to(device), dim=-1)).item())

print("\nKL divergence between scratch and unlearned model:")
print(KLloss(F.log_softmax(KL_xy_scratch.detach().to(device), dim=-1), 
            F.log_softmax(out.detach().to(device), dim=-1)).item())

#     输出正样本和负样本的 L2 距离值
#     print("\nL2 Distances for Positive Samples (Unlearned Nodes):")
#     print(distances[unlearn_indices])
#     print("\nL2 Distances for Negative Samples (Test Nodes):")
#     print(distances[test_indices])

def calculate_macc(
    num_nodes, num_edges, feature_dim, hidden_dim, num_classes,
    include_activation=True, include_dropout=True, include_softmax=True
):
    """
    计算模型的 MACC（Multiply-Accumulate Count）
    
    参数：
    - num_nodes: 节点数量
    - num_edges: 边数量
    - feature_dim: 输入特征维度
    - hidden_dim: 隐藏层维度
    - num_classes: 输出类别数
    - include_activation: 是否包含激活函数计算量
    - include_dropout: 是否包含 Dropout 计算量
    - include_softmax: 是否包含 Softmax 计算量
    
    返回：
    - total_macc: 总计算量
    - details: 每部分的计算量
    """
    # GCN 第一层计算量 (特征变换 + 邻居聚合)
    macc_conv1 = num_edges * feature_dim * hidden_dim * 2  # 乘法 + 加法

    # GCN 第二层计算量 (特征变换 + 邻居聚合)
    macc_conv2 = num_edges * hidden_dim * num_classes * 2  # 乘法 + 加法

    # 激活函数计算量 (ReLU)
    macc_activation = 0
    if include_activation:
        macc_activation = num_nodes * (hidden_dim + num_classes)

    # Dropout 计算量 (近似)
    macc_dropout = 0
    if include_dropout:
        macc_dropout = num_nodes * hidden_dim

    # Softmax 计算量 (exp + sum + 除法)
    macc_softmax = 0
    if include_softmax:
        macc_softmax = num_nodes * num_classes * 3

    # 总计算量
    total_macc = macc_conv1 + macc_conv2 + macc_activation + macc_dropout + macc_softmax

    # 返回详细信息
    details = {
        "conv1": macc_conv1,
        "conv2": macc_conv2,
        "activation": macc_activation,
        "dropout": macc_dropout,
        "softmax": macc_softmax,
    }
    return total_macc, details


def calculate_macc_unlearn(
    num_nodes, num_edges, feature_dim, hidden_dim, num_classes,
    include_activation=True, include_dropout=True, include_softmax=True
):
    """
    计算 Unlearn 模型的 MACC（Multiply-Accumulate Count）
    
    参数：
    - num_nodes: 节点数量
    - num_edges: 边数量
    - feature_dim: 输入特征维度
    - hidden_dim: 隐藏层维度
    - num_classes: 输出类别数
    - include_activation: 是否包含激活函数计算量
    - include_dropout: 是否包含 Dropout 计算量
    - include_softmax: 是否包含 Softmax 计算量
    
    返回：
    - total_macc: 总计算量
    - details: 每部分的计算量
    """
    # GCN 层计算量
    macc_gcn = num_edges * feature_dim * hidden_dim * 2  # 第一层 GCN
    macc_gcn += num_edges * hidden_dim * num_classes * 2  # 第二层 GCN

    # MLP 计算量 (influ_ij 和 influ_ji)
    macc_mlp = 0
    macc_mlp += num_classes * int((num_classes * hidden_dim) ** 0.5)  # FC1
    macc_mlp += int((num_classes * hidden_dim) ** 0.5) * hidden_dim  # FC2
    macc_mlp += 2 * hidden_dim * int((2 * hidden_dim * num_classes) ** 0.5)  # FC1
    macc_mlp += int((2 * hidden_dim * num_classes) ** 0.5) * num_classes  # FC2

    # 矩阵运算计算量
    macc_mat = hidden_dim * num_classes * num_classes  # H_pinv @ H

    # Scatter 操作计算量 (近似)
    macc_scatter = num_edges * (hidden_dim + num_classes)  # 两个 scatter_add

    # 激活函数计算量 (ReLU)
    macc_activation = 0
    if include_activation:
        macc_activation = num_nodes * (hidden_dim + num_classes)

    # Dropout 计算量 (近似)
    macc_dropout = 0
    if include_dropout:
        macc_dropout = num_nodes * hidden_dim

    # Softmax 计算量 (exp + sum + 除法)
    macc_softmax = 0
    if include_softmax:
        macc_softmax = num_nodes * num_classes * 3

    # 总计算量
    total_macc = macc_gcn + macc_mlp + macc_mat + macc_scatter + macc_activation + macc_dropout + macc_softmax

    # 返回详细信息
    details = {
        "gcn": macc_gcn,
        "mlp": macc_mlp,
        "matrix": macc_mat,
        "scatter": macc_scatter,
        "activation": macc_activation,
        "dropout": macc_dropout,
        "softmax": macc_softmax,
    }
    return total_macc, details

def calculate_macc_train_from_scratch(num_nodes, num_edges, feature_dim, hidden_dim, num_classes):
    # GCN第一层计算量 (特征变换+邻居聚合)
    macc_conv1 = num_edges * feature_dim * hidden_dim * 2
    
    # GCN第二层计算量
    macc_conv2 = num_edges * hidden_dim * num_classes * 2
    
    # 激活函数计算量(ReLU)
    macc_activation = num_nodes * (hidden_dim + num_classes)
    
    # Dropout计算量(近似)
    macc_dropout = num_nodes * hidden_dim
    
    # Softmax计算量
    macc_softmax = num_nodes * num_classes * 3  # exp+sum+除法
    
    # 总计算量
    total_macc = macc_conv1 + macc_conv2 + macc_activation + macc_dropout + macc_softmax
    return total_macc

# 示例参数
num_nodes = dataset[0].x.shape[0]
num_edges = dataset[0].edge_index.shape[1] // 2
feature_dim = dataset.num_node_features
hidden_dim = 16
num_classes = dataset.num_classes

# 计算 Train from Scratch 的 MACC
macc_scratch, details_scratch = calculate_macc(
    num_nodes, num_edges, feature_dim, hidden_dim, num_classes
)
print(f"从头训练的总 MACCs: {macc_scratch}")
print("详细信息:", details_scratch)

# 计算 Unlearn 的 MACC
macc_unlearn, details_unlearn = calculate_macc_unlearn(
    num_nodes, num_edges, feature_dim, hidden_dim, num_classes
)
print(f"Unlearn 模型的总 MACCs: {macc_unlearn}")
print("详细信息:", details_unlearn)