dataset_config:
  path: /dataset/text2graph_data/
  type: chem
  name: pcqm4m_qed_20_100000
  file_prefix: pcqm4m_qed_20_100000
  num_workers: 10
  batch_size: 6

training_params:
  checkpointing_metric: nll
  checkpointing_metric_sign: 1
  regularization_weight: 0.0000001
  lrn_rate: 0.00003
  beta1: 0.9
  beta2: 0.999
  max_training_epochs: 1

logging_params:
  logging_dir: /root/models/
  run_name: serialize_depth_edges

info_flow:
  SerializedGraphGenerator:
    model_dir:
    metadata:
      language_model_name: bigscience/bloom-560m
      randomize_sequence: False
      serialization_type: depth
      message_passing_type: edges
      quantize: False
      use_lora: False
    inputs:
      input_sequence: dataset
      input_attn_mask: dataset
      text_sequence: dataset
      text_attn_mask: dataset
      graph_sequence: dataset
      graph_attn_mask: dataset
    evaluations:
      CrossEntropyWMissingValues:
        logging_name: nll
        weight: 1.0
        is_loss: True
        inputs:
          graph_sequence_logits: SerializedGraphGenerator
          target_sequence: dataset
  # Grapher:
  #   model_dir:
  #   metadata:
  #     language_model_name: bigscience/bloom-560m
  #     max_nodes: 20
  #     default_seq_len_edge: 8
  #     randomize_sequence: False
  #     add_directions: False
  #   inputs:
  #     input_sequence: dataset
  #     input_attn_mask: dataset
  #     text_sequence: dataset
  #     text_attn_mask: dataset
  #     node_sequence: dataset
  #     node_attn_mask: dataset
  #   evaluations:
  #     GrapherLoss:
  #       logging_name: nll
  #       weight: 1.0
  #       is_loss: True
  #       inputs:
  #         node_sequence_logits: Grapher
  #         edge_matrices_logits: Grapher
  #         target_sequence: dataset
  #         edge_matrices: dataset
