general:
    name: 'pretrain'
    wandb: 'disabled' 
    gpus: 1
    resume: null
    test_only: null
    sample_every_val: 15
    samples_to_generate: 512      
    samples_to_save: 3
    log_every_steps: 50
    number_chain_steps: 8
    final_model_samples_to_generate: 5000
    final_model_samples_to_save: 20
    enable_progress_bar: False
    save_model: True
model:
    type: 'discrete'
    transition: 'marginal'                  
    model: 'graph_dit'
    diffusion_steps: 500
    diffusion_noise_schedule: 'cosine'
    guide_scale: 2
    hidden_size: 1280
    depth: 24
    num_heads: 16
    mlp_ratio: 4
    drop_condition: 0.01
    lambda_train: [1, 1, 1]
train:
    n_epochs: 200
    batch_size: 64
    lr: 0.00002
    warmup_ratio: 0.1
    min_lr_ratio: 0.1
    warmup_start_lr: 1e-5
    clip_grad: 0.1
    num_workers: 0
    weight_decay: 1e-12
    seed: 0
    val_check_interval: null
    check_val_every_n_epoch: 2
    noise_weight: null
    context_length: 150
tokenizer:
    processor: 5
    vocab_size: 3000
    retrain: False
    vocab_ring_len: 300
    simple_mode: False
    name: 'pretrain'
dataset:
    datadir: 'data/'
    task_name: 'pretrain'
    context_name: 'pretrain'
    pin_memory: False
