accum_freq: 1
batch_size: 8
data_key: json
dataset_resampled: False
dataset_type: auto
device: cuda:0
dist_backend: nccl
dist_url: env://
ffn_type: swiglu
hf_model: null
ignore_parse_errors: False
local_rank: 0
log_every_n_steps: 1
model: ../open_lm/model_configs/c4_original-d=96_l=8_h=4.json
model_norm: gain_only_layer_norm
name: random
no_set_device_rank: false
no_skip_tokens: false
positional_embedding_type: rotary
precision: amp_bfloat16
qk_norm: true
rank: 0
resume: checkpoint.pt
seed: 124
seq_len: 2048
target_mask_individual: null
target_mask_left: null
train_data_upsampling_factors: null
train_num_samples: 1000000000
val_batch_size: null
val_num_samples: 0
vocab_size: 50432
wandb: False
workers: 6
world_size: 8