model                      : our
v2_db                      : False
use_redis                  : False
task                       : class
subtask                    : train
sequence_modeling          : True
data_repr                  : penhance
code_encoder               : codet5
chunk_emb                  : cls
finetune_bert              : True
replace_with_random_weights : False
add_edges_fc_graph         : True
chunk_offset               : 16
max_code_tokens_len        : 64
token_att_masking          : True
preserve_keywords          : True
pk_version                 : 2
bypass_tf_layers           : False
apply_act_conv_first       : False
vis_transformer_att        : False
what_modalities            : programl+src_code
combine_fashion            : share_final_MLPs
feed_p_to_tf               : False
pc_links                   : False
multi_modality             : True
load_db                    : None
load_model                 : None
dataset                    : programl
benchmarks                 : ['machsuite',
                           :  'poly']
tag                        : wmp-d
round_num                  : 13
graph_type                 : 
graph_transformer_option   : None
gae_T                      : False
gae_P                      : False
SSL                        : False
load_pretrained_GNN        : False
class_model_path           : None
all_kernels                : True
sample_finetune            : False
FT_extra                   : False
new_speedup                : True
feature_extract            : False
ignore_kernels             : []
test_kernels               : ['stencil-3d',
                           :  'doitgen-red',
                           :  'gemm-ncubed',
                           :  'fdtd-2d',
                           :  'jacobi-2d',
                           :  'trmm-opt']
val_ratio_in_test_kernels  : 0
val_ratio_in_train_kernels : 0.15
test_ratio_in_train_kernels : 0.15
tvt_split_by               : kernels_inductive
itype_mask_perc            : 0
gtype                      : programl
only_pragma_nodes          : False
encode_full_text           : None
fulltext_dim               : None
MAML                       : False
force_regen                : True
load_encoders_label        : None
encoder_path               : None
model_tag                  : test
activation                 : elu
outlier_removal            : None
no_pragma                  : False
num_layers                 : 8
D                          : 512
target                     : ['perf',
                           :  'util-LUT',
                           :  'util-FF',
                           :  'util-DSP',
                           :  'util-BRAM']
gnn_type                   : transformer
min_allowed_latency        : 100.0
encode_edge                : True
encode_edge_position       : False
ptrans                     : False
jkn_mode                   : max
jkn_enable                 : True
node_attention             : True
node_attention_MLP         : False
separate_P_T               : False
pragma_encoder             : True
pragma_uniform_encoder     : True
epsilon                    : 0.001
normalizer                 : 10000000.0
util_normalizer            : 1
max_number                 : 10000000000.0
norm_method                : speedup-log2
target_preproc             : None
target_convert_back        : True
invalid                    : False
multi_target               : True
activation_type            : elu
margin_loss                : False
save_model                 : True
save_every_epoch           : 10000
encode_log                 : False
baseline_no_graph          : False
target_factor              : 1
target_transform           : None
loss_scale                 : {'perf': 1.0,
                           :  'util-DSP': 1.0,
                           :  'util-BRAM': 1.0,
                           :  'util-LUT': 1.0,
                           :  'util-FF': 1.0}
pairwise_class             : False
batch_size                 : 2
data_loader_num_workers    : 0
device                     : cuda:0
epoch_num                  : 200
debug_iter                 : -1
ignore_testing             : True
ignore_validation          : False
shuffle                    : True
opt_type                   : AdamW
lr                         : 1e-05
max_grad_norm              : None
weight_decay               : 0.01
plot_pred_points           : True
fix_randomness             : True
random_seed                : 123
user                       : xxx
hostname                   : xxxx
exp_name                   : 
ts                         : 2023-05-30T21-59-21.090393

MultiModalityNet(
  (loss_function): CrossEntropyLoss()
  (bert_model): T5Stack(
    (embed_tokens): Embedding(32234, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (1): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (2): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (3): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (4): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
      (5): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Dropout(p=0.1, inplace=False)
              (act): ReLU()
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )
      )
    )
    (final_layer_norm): T5LayerNorm()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (conv_first_programl): TransformerConv(158, 512, heads=1)
  (conv_first_src_code): MLP(
    (activation): ELU(alpha=1.0)
    (layers): ModuleList(
      (0): Linear(in_features=512, out_features=256, bias=True)
      (1): Linear(in_features=256, out_features=128, bias=True)
      (2): Linear(in_features=128, out_features=512, bias=True)
    )
  )
  (conv_layers_programl): ModuleList(
    (0): TransformerConv(512, 512, heads=1)
    (1): TransformerConv(512, 512, heads=1)
    (2): TransformerConv(512, 512, heads=1)
    (3): TransformerConv(512, 512, heads=1)
    (4): TransformerConv(512, 512, heads=1)
    (5): TransformerConv(512, 512, heads=1)
    (6): TransformerConv(512, 512, heads=1)
  )
  (jkn_programl): JumpingKnowledge(max)
  (conv_layers_src_code): ModuleList(
    (0): TransformerConv(512, 512, heads=1)
    (1): TransformerConv(512, 512, heads=1)
    (2): TransformerConv(512, 512, heads=1)
    (3): TransformerConv(512, 512, heads=1)
    (4): TransformerConv(512, 512, heads=1)
    (5): TransformerConv(512, 512, heads=1)
    (6): TransformerConv(512, 512, heads=1)
  )
  (jkn_src_code): JumpingKnowledge(max)
  (glob_programl): MyGlobalAttention(gate_nn=Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=1, bias=True)
  ), nn=None)
  (glob_src_code): MyGlobalAttention(gate_nn=Sequential(
    (0): Linear(in_features=512, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=1, bias=True)
  ), nn=None)
  (base_decoder_programl): MLP(
    (activation): ELU(alpha=1.0)
    (layers): ModuleList(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): Linear(in_features=512, out_features=512, bias=True)
      (2): Linear(in_features=512, out_features=512, bias=True)
    )
  )
  (base_decoder_src_code): MLP(
    (activation): ELU(alpha=1.0)
    (layers): ModuleList(
      (0): Linear(in_features=512, out_features=512, bias=True)
      (1): Linear(in_features=512, out_features=512, bias=True)
      (2): Linear(in_features=512, out_features=512, bias=True)
    )
  )
  (decoder_shared): ModuleDict(
    (perf): MLP(
      (activation): ELU(alpha=1.0)
      (layers): ModuleList(
        (0): Linear(in_features=1024, out_features=768, bias=True)
        (1): Linear(in_features=768, out_features=512, bias=True)
        (2): Linear(in_features=512, out_features=256, bias=True)
        (3): Linear(in_features=256, out_features=128, bias=True)
        (4): Linear(in_features=128, out_features=64, bias=True)
        (5): Linear(in_features=64, out_features=2, bias=True)
      )
    )
  )
)
