import torch
import torch.nn as nn
from torch.nn.init import xavier_normal_, constant_
from torch.nn import functional as F
import functools
import dgl.function as fn
import dgl
from torch.nn.utils import weight_norm


def get_norm_layer(norm_type='none'):
    """
    :param norm_type: str, the name of the normalization layer: batch | instance | layer | none
    :return:
        norm_layer, a normalization layer
    For BatchNorm, we use learnable affine parameters and track running statistics (mean/stddev).
    For InstanceNorm, we do not use learnable affine parameters. We do not track running statistics.
    """
    if norm_type == 'batch':
        norm_layer = functools.partial(nn.BatchNorm1d, affine=True, track_running_stats=True)
    elif norm_type == 'instance':
        norm_layer = functools.partial(nn.InstanceNorm1d, affine=False, track_running_stats=False)
    elif norm_type == 'layer':
        norm_layer = functools.partial(nn.LayerNorm)
    elif norm_type == 'none':
        norm_layer = functools.partial(nn.Identity)
    else:
        raise NotImplementedError('normalization layer [%s] is not found' % norm_type)
    return norm_layer

class Chomp2d(nn.Module):
    def __init__(self, chomp_size):
        super(Chomp2d, self).__init__()
        self.chomp_size = chomp_size

    def forward(self, x):
        return x[:, :, :-self.chomp_size, :-self.chomp_size].contiguous()

class TemporalBlock(nn.Module):
    def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
        super(TemporalBlock, self).__init__()
        self.conv1 = weight_norm(nn.Conv2d(n_inputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp1 = Chomp2d(padding)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout)

        self.conv2 = weight_norm(nn.Conv2d(n_outputs, n_outputs, kernel_size,
                                           stride=stride, padding=padding, dilation=dilation))
        self.chomp2 = Chomp2d(padding)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(dropout)

        self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
                                 self.conv2, self.chomp2, self.relu2, self.dropout2)
    
        self.downsample = nn.Conv2d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
        self.relu = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        self.conv1.weight.data.normal_(0, 0.01)
        self.conv2.weight.data.normal_(0, 0.01)
        if self.downsample is not None:
            self.downsample.weight.data.normal_(0, 0.01)

    def forward(self, x):
        out = self.net(x)
        # x.shape : [2, 8, 1000, 64]
        # y.shape : [2, 9, 1000, 66]

        res = x if self.downsample is None else self.downsample(x)
        return self.relu(out + res)

class TemporalConvNet(nn.Module):
    def __init__(self, num_inputs, num_channels, kernel_size=2, dropout=0.2):
        super(TemporalConvNet, self).__init__()
        layers = []
        num_levels = len(num_channels)
        # num_levels : 8
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_channels = num_inputs if i == 0 else num_channels[i-1]
            out_channels = num_channels[i]
            layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
                                     padding=(kernel_size-1) * dilation_size, dropout=dropout)]

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        out_data = self.network(x)
        return out_data

class TCN(nn.Module):
    def __init__(self, input_size=8, output_size=8, num_channels=[8,8,8,8], kernel_size=2, dropout=0.3):
        super(TCN, self).__init__()
        self.tcn = TemporalConvNet(input_size, num_channels, kernel_size=kernel_size, dropout=dropout)

    def forward(self, inputs):
        """Inputs have to have dimension (N, C_in, L_in)"""
        y1 = self.tcn(inputs)  # input should have dimension (N, C, L)
        return y1

class GCNN_Net(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        # print('var_embeds', var_embeds.shape)#var_embeds torch.Size([1000, 64])
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

class PD_Net(nn.Module):
    def __init__(self, h_dim=64, v_dim=17, c_dim=5, T=2):
        super().__init__()
        self.T = T
        self.v_embed = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU())
        self.c_embed = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU())
        self.v2c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU()) for _ in range(T)])
        self.c2v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU()) for _ in range(T)])
        self.fc = nn.ModuleList([nn.Sequential(nn.Linear(2*h_dim, h_dim), nn.ReLU(), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.fv = nn.ModuleList([nn.Sequential(nn.Linear(2*h_dim, h_dim), nn.ReLU(), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.fs = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Linear(h_dim, 1))

    def score(self, g):
        return self.fs(g.nodes['v'].data['h'])

    def forward(self, g):
        message_func = fn.u_mul_e('h_mm', 'h', 'm')
        reduce_func = fn.sum('m', 'aggr')
        # 编码
        g.nodes['v'].data['h'] = self.v_embed(g.nodes['v'].data['h'])
        g.nodes['c'].data['h'] = self.c_embed(g.nodes['c'].data['h'])
        # 图卷积
        for t in range(self.T):
            g.nodes['v'].data['h_mm'] = self.v2c[t](g.nodes['v'].data['h'])
            g['v2c'].update_all(message_func, reduce_func)
            feat = torch.cat((g.nodes['c'].data['h'], g.nodes['c'].data['aggr']), dim=1)
            g.nodes['c'].data['h'] = self.fc[t](feat)

            g.nodes['c'].data['h_mm'] = self.c2v[t](g.nodes['c'].data['h'])
            g['c2v'].update_all(message_func, reduce_func)
            feat = torch.cat((g.nodes['v'].data['h'], g.nodes['v'].data['aggr']), dim=1)
            g.nodes['v'].data['h'] = self.fv[t](feat)

        return self.score(g)

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

class GCNN_Net_new_1(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=35, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g
  
    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_2(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g
    
    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))


class GCNN_Net_new_3(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g


    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))



class GCNN_Net_new_3_0(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g


    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_3_1(nn.Module):
    def __init__(self, h_dim=64, c_dim=66, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.cons_tree_fea = nn.Sequential(nn.Linear(h_dim+h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
                            

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']

        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g


    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_3_2(nn.Module):
    def __init__(self, h_dim=64, c_dim=5+53, v_dim=25, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.cons_tree_fea = nn.Sequential(nn.Linear(h_dim+h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
                            

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']

        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g


    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_3_root(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # new 
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

class GCNN_Net_new_4(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

class GCNN_Net_new_5(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_6(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_7(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_8(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        return out_data

    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)

        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)

        out_data = out_data.reshape(-1, 1)

        return out_data
    
class GCNN_Net_new_10(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)

        # print("graphs_list.len:",len(graphs_list))
        assert len(graphs_list) % 8 == 0

        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        # graphs_tensor = graphs_tensor[:,-1]
        # out_data = self.f_s(graphs_tensor)
        
        return out_data

    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)
        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1)

        return out_data

class GCNN_Net_new_12(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)

        # print("graphs_list.len:",len(graphs_list))
        assert len(graphs_list) % 8 == 0

        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        # graphs_tensor = graphs_tensor[:,-1]
        # out_data = self.f_s(graphs_tensor)
        
        return out_data


    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)
        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1)

        return out_data

class GCNN_Net_new_13(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        
        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])

        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)

            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)

        # print("graphs_list.len:",len(graphs_list))
        assert len(graphs_list) % 8 == 0

        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        # graphs_tensor = graphs_tensor[:,-1]
        # out_data = self.f_s(graphs_tensor)
        
        return out_data

    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)
        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1)

        return out_data

class GCNN_Net_new_14(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        
        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])

        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)

            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)

        # print("graphs_list.len:",len(graphs_list))
        assert len(graphs_list) % 8 == 0

        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        # graphs_tensor = graphs_tensor[:,-1]
        # out_data = self.f_s(graphs_tensor)
        
        return out_data


    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)
        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1)

        return out_data

class GCNN_Net_new_15(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # TCN
        self.tcn = TCN(input_size=8,output_size=1,num_channels=[8,8,8,1])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        # var_embeds = g.nodes['v'].data['h']

        graphs_list = dgl.unbatch(g)

        # print("graphs_list.len:",len(graphs_list))
        assert len(graphs_list) % 8 == 0

        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]

        n = 8
        graphs_list_list = [graphs_list[i:i+n] for i in range(0,len(graphs_list),n)]

        graphs_tensor = torch.stack([torch.stack(one_list) for one_list in graphs_list_list])

        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1000, 1)

        # graphs_tensor = graphs_tensor[:,-1]
        # out_data = self.f_s(graphs_tensor)
        
        return out_data


    def eval_forward_1(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        graphs_list = dgl.unbatch(g)
        graphs_list = [one_graph.nodes['v'].data['h'] for one_graph in graphs_list]
        return graphs_list

    def eval_forward_2(self, graphs_list):

        graphs_tensor = torch.stack(graphs_list)
        graphs_tensor = graphs_tensor.reshape(1,8,-1,64)
        tcn_out = self.tcn(graphs_tensor)
        out_data = self.f_s(tcn_out)
        out_data = out_data.reshape(-1, 1)

        return out_data

class GCNN_Net_new_16(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_17(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

class GCNN_Net_new_18(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))
        
# class ContrastiveLoss(nn.Module):
#     def __init__(self, alpha=0.5, beta=1.0, temperature=0.1):
#         super().__init__()
#         # 可学习参数，初始值为建议值
#         self.alpha = nn.Parameter(torch.tensor(alpha))
#         self.beta = nn.Parameter(torch.tensor(beta))
#         self.temperature = temperature

#     def forward(self, embeds, labels):
#         embeds = F.normalize(embeds, p=2, dim=1)
#         sim_matrix = torch.div(torch.matmul(embeds, embeds.T), self.temperature)

#         logits_mask = ~torch.eye(sim_matrix.size(0), dtype=torch.bool, device=embeds.device)

#         labels = labels.contiguous().view(-1, 1)
#         label_diff = torch.abs(labels - labels.T)

#         # 构造权重矩阵
#         weight_matrix = torch.zeros_like(sim_matrix)
#         weight_matrix[label_diff == 0] = 1.0                             # 同组，权重为1
#         weight_matrix[label_diff == 1] = self.alpha                     # 相邻组，权重为α
#         weight_matrix[label_diff >= 2] = self.beta                      # 非相邻组，权重为β

#         weight_matrix = weight_matrix * logits_mask.float()            # 去除 self-similarity

#         exp_sim = torch.exp(sim_matrix) * logits_mask.float()
#         log_prob = sim_matrix - torch.log(exp_sim.sum(1, keepdim=True) + 1e-8)

#         mean_log_prob_pos = (weight_matrix * log_prob).sum(1) / (weight_matrix.sum(1) + 1e-8)

#         return -mean_log_prob_pos.mean()
        
class GCNN_Net_new_20_cons(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1,alpha = 0.5):
        super().__init__()
        self.T = T
        self.alpha = alpha
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim+2, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # new 
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        
        # self.contrastive_loss = ContrastiveLoss(alpha=0.5, beta=1.5, temperature=0.1)
        self.theta = torch.nn.Parameter(torch.zeros(5))
        
        
    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count
    
    def get_graph_embedding(self, g):
        return dgl.mean_nodes(g, 'h', ntype='v')

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def forward_sb(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']#要返回的是这个
        # print('var_embeds', var_embeds.shape)
        graph_embeds = dgl.mean_nodes(g, 'h', ntype='v')
        # print('graph_embeds', graph_embeds.shape)
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)

        var_embeds = torch.cat((var_embeds, g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']), dim=1)

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g, graph_embeds
    

    def supervised_contrastive_loss(self, embeds, labels, temperature=0.1):
        embeds = F.normalize(embeds, p=2, dim=1)
        sim_matrix = torch.div(torch.matmul(embeds, embeds.T), temperature)
        logits_mask = ~torch.eye(sim_matrix.size(0), dtype=torch.bool, device=embeds.device)

        labels = labels.contiguous().view(-1, 1)
        label_diff = torch.abs(labels - labels.T)

        # 限制范围，防止训练出错
        beta = F.softplus(self.theta)               # β_j ≥ 0
        alpha_vals = torch.cumsum(beta, dim=0)      # α_k = Σ β_j
        alpha_vals = torch.cat([torch.ones(1, device=embeds.device), alpha_vals])
        alpha_vals = torch.sigmoid(alpha_vals)      # w(g(i),g(j))∈(0,1)

        weight_matrix = torch.zeros_like(sim_matrix)
        for k in range(5):                          # group distances 0~4
            weight_matrix[label_diff == k] = alpha_vals[min(k, len(alpha_vals)-1)]
        weight_matrix[label_diff >= 5] = alpha_vals[-1]

        weight_matrix = weight_matrix * logits_mask.float()

        exp_weighted_sim = torch.exp(weight_matrix * sim_matrix) * logits_mask.float()
        pos_mask = (label_diff == 0) * logits_mask
        pos_exp_sim = torch.exp(sim_matrix) * pos_mask.float()

        denom = exp_weighted_sim.sum(1, keepdim=True) + 1e-8
        numerator = pos_exp_sim.sum(1, keepdim=True) + 1e-8
        log_prob = torch.log(numerator / denom)

        loss = -log_prob.mean()
        return loss


    
    # def supervised_contrastive_loss(self, embeds, labels, temperature=0.1):
    #     embeds = F.normalize(embeds, p=2, dim=1)
    #     sim_matrix = torch.div(torch.matmul(embeds, embeds.T), temperature)
    #     logits_mask = ~torch.eye(sim_matrix.size(0), dtype=torch.bool, device=embeds.device)

    #     labels = labels.contiguous().view(-1, 1)
    #     mask = torch.eq(labels, labels.T).float().to(embeds.device)

    #     # mask-out self-comparisons
    #     mask = mask * logits_mask.float()

    #     exp_sim = torch.exp(sim_matrix) * logits_mask.float()
    #     log_prob = sim_matrix - torch.log(exp_sim.sum(1, keepdim=True) + 1e-8)

    #     mean_log_prob_pos = (mask * log_prob).sum(1) / (mask.sum(1) + 1e-8)
    #     return -mean_log_prob_pos.mean()


    def compute_loss(self, batch_graphs, labels, pred_loss):
        _, graph_embeds = self.forward_sb(batch_graphs)
        contrastive = self.supervised_contrastive_loss(graph_embeds, labels)
        # contrastive = self.contrastive_loss(graph_embeds, labels)
        return pred_loss + self.alpha * contrastive


class GCNN_Net_new_20(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim+2, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # new 
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def forward_sb(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']#要返回的是这个

        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)

        var_embeds = torch.cat((var_embeds, g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']), dim=1)

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g

class GCNN_Net_new_22(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

class GCNN_Net_new_23(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

class GCNN_Net_new_25(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        self.kh_dim = 72
        self.ff_size = 256
        self.h_dim = h_dim

        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

        self.output_module = nn.Sequential(
            nn.Linear(self.h_dim + self.kh_dim, self.ff_size, bias=True),
            nn.ReLU(), nn.Dropout(0.1),
            nn.Linear(self.ff_size, self.ff_size, bias=True),
            nn.ReLU(), nn.Dropout(0.1),
            nn.Linear(self.ff_size, self.ff_size, bias=True),
            nn.ReLU(), nn.Dropout(0.1),
            nn.Linear(self.ff_size, 1, bias=False)
        )


    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, count, g, kh_feats):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        # g.nodes['v'].data['s'] = self.f_s(var_embeds)
        # return var_embeds, g

        var_embeds = torch.reshape(var_embeds, [count, -1, self.h_dim])

        input = torch.cat([kh_feats, var_embeds], axis=2)
        output = self.output_module(input)

        return output

    def get_var_embeds(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
       
        return var_embeds

    def eval_forword(self, count, var_embeds, kh_feats):
        var_embeds = torch.reshape(var_embeds, [count, -1, self.h_dim])
        kh_feats = torch.reshape(kh_feats, [count, -1, self.kh_dim])
        input_data = torch.cat([kh_feats, var_embeds], axis=2)

        print("var_embeds:",var_embeds.size())
        print("kh_feats:",kh_feats.size())
        print("input_data:",input_data.size())

        
        output = self.output_module(input_data)

        return output

class GCNN_Net_new_28(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # # new
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))


    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

class GCNN_Net_new_29(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # # new
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def eval_forward_sb(self, g):

        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g.nodes['v'].data['s']

    def eval_forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

class GCNN_Net_new_30(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # # new
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def eval_forward_sb(self, g):

        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g.nodes['v'].data['s']

    def eval_forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

class GCNN_Net_new_31(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # # new
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def eval_forward_sb(self, g):

        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g.nodes['v'].data['s']

    def eval_forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

    def eval_forward_all(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)

        return g.nodes['v'].data['s'], g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

class GCNN_Net_new_32(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, t_dim=61, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # new
        self.tree_embedding = nn.Sequential(nn.Linear(t_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        # 图卷积:特征层 Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        # new
        self.feature_module_trees = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])

        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        # new
        self.feature_final_trees = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
    
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # new
        self.f_t = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        
        # # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        # # new
        self.f_s_0 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))
        self.f_s_1 = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_tree(self, g, dir, t):
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.feature_module_conss[t](g.nodes['t'].data['h'])
        
        #  对源节点的h特征和目标节点的h特征求积，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_mul_v('h', 'h', 'u*v'))

        # g.nodes['t'].data['h'].shape : [1, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [1000, 1]
        # g.edata['u*v'].shape : [1000, 64]

        # 这里可能可以继续优化，
        # 首先，搜索树节点与变量节点的边的值，都是1，这一点，应该具有很大的优化空间，不可能全是1吧
        # 然后，简单的data['u+v']，也就是变量想加，可能也没有太大的意义，
        # 这里完全没有利用上 搜索树的节点呀

        # joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        joint = g.edata['u*v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final_trees[t](joint)
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        h_neigh = g.nodes['v'].data['h_neigh']
        # h_neigh shape: [1000, 64]
        h = g.nodes['v'].data['h']
        # h.shape : [1000, 64]

        g.nodes['v'].data['h'] = self.f_t[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # g.edata['joint'].shape : [34861, 64]


        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        # dsttype : 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        # h_neigh shape: [520, 64]
        h = g.nodes[dsttype].data['h']
        # h.shape : [520, 64]

        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g

    def eval_forward_sb(self, g):

        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        g.nodes['v'].data['s'] = self.f_s(var_embeds)

        return g.nodes['v'].data['s']

    def eval_forward_lp(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)
        
        return g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

    def eval_forward_all(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        # new
        g.nodes['t'].data['h'] = self.tree_embedding(g.nodes['t'].data['h'])

        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # new
        g_t2v = g['t2v']

        # 卷积 convolution
        for t in range(self.T):
            # new
            self.joint_tree(g_t2v, 't2v', t)
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)

        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        # new
        g.nodes['v'].data['s_0'] = self.f_s_0(var_embeds)
        g.nodes['v'].data['s_1'] = self.f_s_1(var_embeds)

        return g.nodes['v'].data['s'], g.nodes['v'].data['s_0'], g.nodes['v'].data['s_1']

class GCNN_Net_new_33(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

class GCNN_Net_new_33_1(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def get_var_embeds(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        
        return var_embeds

class GCNN_Net_new_33_2(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

class GCNN_Net_new_33_3(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding约束和变量分别建模
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

class GCNN_Net_new_33_4(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

class GCNN_Net_new_34(nn.Module):
    """
    NoTree policy.
    """
    def __init__(self, var_dim=17, node_dim=8, mip_dim=53, hidden_size=64, depth=2, dropout=0.0, dim_reduce_factor=2, infimum=8,
                 norm='none'):
        """
        :param var_dim: int, dimension of variable state
        :param node_dim: int, dimension of node state
        :param mip_dim: int, dimension of mip state
        :param hidden_size: int, hidden size parameter for the branching policy network
        :param depth: int, depth parameter for the branching policy network
        :param dropout: float, dropout parameter for the branching policy network
        :param dim_reduce_factor: int, Dimension reduce factor of the branching policy network
        :param infimum: int, infimum parameter of the branching policy network
        :param norm: str, normalization type of the branching policy network
        """
        super().__init__()
        self.dropout = dropout
        self.norm = norm
        norm_layer = get_norm_layer(norm)

        # define the dimensionality of the features and the hidden states
        self.var_dim = var_dim
        self.node_dim = node_dim
        self.mip_dim = mip_dim
        self.hidden_size = hidden_size
        self.depth = depth

        # define CandidateEmbeddingNet
        self.CandidateEmbeddingNet = [nn.Linear(var_dim, hidden_size)]
        self.CandidateEmbeddingNet = nn.Sequential(*self.CandidateEmbeddingNet)

        # define the BranchingNet:
        unit_count = infimum
        input_dim = hidden_size
        self.n_layers = 1
        while unit_count < hidden_size:
            unit_count *= dim_reduce_factor
            self.n_layers += 1
        self.BranchingNet = []
        for i in range(self.n_layers):
            output_dim = int(input_dim / dim_reduce_factor)
            if i < self.n_layers - 1:
                self.BranchingNet += [nn.Linear(input_dim, output_dim),
                                      norm_layer(output_dim),
                                      nn.ReLU(True)]
            elif i == self.n_layers - 1:
                self.BranchingNet += [nn.Linear(input_dim, output_dim)]
            input_dim = output_dim
        self.BranchingNet = nn.Sequential(*self.BranchingNet)

        # do the Xavier initialization for the linear layers
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(tensor=m.weight, gain=nn.init.calculate_gain('relu'))

            elif isinstance(m, nn.LayerNorm):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.BatchNorm1d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, v_feats, cands_state_mat, node_state=None, mip_state=None):
        # go through the CandidateEmbeddingNet

        # print("v_feats:", v_feats.shape) [4, 1000, 17]
        # print("cands_state_mat:", cands_state_mat.shape) [6, 206, 25]


        cands_state_mat = v_feats
        cands_state_mat = self.CandidateEmbeddingNet(cands_state_mat)

        # print("cands_state_mat:", cands_state_mat.shape) [6, 206, 64]

        # go through the BranchingNet
        cands_state_mat = self.BranchingNet(cands_state_mat)  # No TreeState input to BranchingNet
        
        cands_prob = cands_state_mat.mean(dim=2, keepdim=True)  # Keep the axis

        return cands_prob

class GCNN_Net_new_35(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

class GCNN_Net_new_36(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def eval_forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g.nodes['v'].data['s']

class GCNN_Net_new_37(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim + h_dim, h_dim + h_dim), nn.ReLU(), nn.Dropout(0.1), 
        nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
        nn.Linear(h_dim, 1))
        self.f_s_all = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))


    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward_root_all(self, root_var_embeds, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        var_embeds = torch.cat((root_var_embeds, var_embeds), dim = 1)

        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def forword_all(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']

        g.nodes['v'].data['s'] = self.f_s_all(var_embeds)
        
        return g


class GCNN_Net_new_38(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g



class GCNN_Net_new_40(nn.Module):
    def __init__(self, h_dim=64, c_dim=5, v_dim=17, e_dim=1, T=1):
        super().__init__()
        self.T = T
        # constraints embedding
        self.cons_embedding = nn.Sequential(nn.Linear(c_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                            nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))

        self.var_embedding = nn.Sequential(nn.Linear(v_dim, h_dim), nn.ReLU(), nn.Dropout(0.1),
                                           nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1))
        # Graph Convolution: feature layers
        self.feature_module_conss = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_vars = nn.ModuleList([nn.Linear(h_dim, h_dim) for _ in range(T)])
        self.feature_module_edges = nn.ModuleList([nn.Linear(e_dim, h_dim) for _ in range(T)])
        self.feature_final = nn.ModuleList([nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim)) for _ in range(T)])
        self.f_c = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        self.f_v = nn.ModuleList([nn.Sequential(nn.Linear(h_dim + h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1)) for _ in range(T)])
        # output layers
        self.f_s = nn.Sequential(nn.Linear(h_dim, h_dim), nn.ReLU(), nn.Dropout(0.1), nn.Linear(h_dim, 1))

    def count_parameters(self):
        count = 0
        for param in self.parameters():
            count += param.data.numpy().flatten().shape[0]
        return count

    def joint_conv(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # g.nodes['c'].data['h'].shape : [520, 64]
        # g.nodes['v'].data['h'].shape : [1000, 64]
        # g.edata['h'].shape : [34861, 1]
        # g.edata['u+v'].shape : [34861, 64]

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        # 为什么边的权重都是负的呢?
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64
        # joint.shape : [34861, 64]，结合了feature_module_edges() 和 g.edata['u+v']

        g.edata['joint'] = self.feature_final[t](joint)
        # self.feature_final[t]() 64 -> 64
        # g.edata['joint'].shape : [34861, 64]

        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        
        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def joint_conv1(self, g, dir, t):
        g.nodes['c'].data['h'] = self.feature_module_conss[t](g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.feature_module_vars[t](g.nodes['v'].data['h'])
        #  对源节点的h特征和目标节点的h特征求和，然后将结果保存在边的u+v特征上
        g.apply_edges(fn.u_add_v('h', 'h', 'u+v'))

        # 也就是说，将边两端的v和c的data['h']相加，最后变成了edata['u+v']，可以，非常合理
        joint = self.feature_module_edges[t](g.edata['h']) + g.edata['u+v']
        # self.feature_module_edges[t]() 1->64

        g.edata['joint'] = self.feature_final[t](joint)
        g.update_all(fn.copy_e('joint', 'm'), fn.sum('m', 'h_neigh'))

        dsttype = 'v' if dir == 'c2v' else 'c'
        h_neigh = g.nodes[dsttype].data['h_neigh']
        h = g.nodes[dsttype].data['h']
        g.nodes[dsttype].data['h'] = self.f_c[t](torch.cat([h, h_neigh], dim=1))

    def forward(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        g.nodes['v'].data['s'] = self.f_s(var_embeds)
        
        return g

    def get_var_embeds(self, g):
        g.nodes['c'].data['h'] = self.cons_embedding(g.nodes['c'].data['h'])
        g.nodes['v'].data['h'] = self.var_embedding(g.nodes['v'].data['h'])
        g_v2c = g['v2c']
        g_c2v = g['c2v']
        # 卷积 convolution
        for t in range(self.T):
            self.joint_conv(g_v2c, 'v2c', t)
            self.joint_conv(g_c2v, 'c2v', t)
        # output
        # 将所有的信息汇总到g.nodes['v'].data['h']当中
        var_embeds = g.nodes['v'].data['h']
        
        return var_embeds



if __name__ == '__main__':
    import pickle
    import gzip
    import time
    from src.utils import graph_transform
    gcn = GCNN_Net()
    pd = PD_Net(T=2)
    for i in range(20):
        # 加载问题实例
        sample = pickle.load(gzip.open(f'../data/samples/indset_400n_4a_0se/train/sample_{i+1}.pkl', 'rb'))
        pd_data = sample['data'][0]
        # 获得pd图
        pd_graph = graph_transform(pd_data)
        t0 = time.time()
        s1 = gcn(pd_graph)
        t1 = time.time()
        test_graph = graph_transform(pd_data)
        t2 = time.time()
        s2 = pd(test_graph)
        t3 = time.time()
        print(100*'#')
        print("gcn time:\t",t1-t0)
        print("pd time:\t",t3-t2)
