{
    "model_path": "./runs/annotation/bert_base_512_lastln_t2t_1000G_bs256_lr_1e-04_linear_fp16/model_2000000/rmt_seglen_512_len42000_maxnsegm_10000_msz_5_bptt-1_lr5e-05_AdamW_cosine_wd1e-04_p10_bs64_it50000/run_1",
    "log_interval": 100,
    "valid_interval": 1000,
    "save_interval": null,
    "save_best": true,
    "use_generate_on_valid": false,
    "init_checkpoint": null,
    "skip_used_data": false,
    "reset_lr": true,
    "reset_iteration": true,
    "reset_optimizer": true,
    "lr": 5e-05,
    "batch_size": 1,
    "iters": 50000,
    "gradient_accumulation_steps": 1,
    "clip_grad_norm": null,
    "clip_grad_value": null,
    "early_stopping_patience": 10,
    "lr_scheduler": "cosine",
    "num_warmup_steps": 1500,
    "num_training_steps": null,
    "use_lr_drop": false,
    "lr_drop_factor": 0.1,
    "lr_drop_patience": 10,
    "lr_drop_threshold": 0.0001,
    "lr_drop_threshold_mode": "rel",
    "lr_drop_cooldown": 0,
    "lr_drop_min_lr": 0.0,
    "lr_drop_eps": 1e-08,
    "optimize_metric": "pr_auc_mean",
    "optimize_mode": "max",
    "data_path": "/home/jovyan/dnalm/downstream_tasks/annotation/data/train_trans250k_full.hdf5",
    "valid_data_path": "/home/jovyan/dnalm/downstream_tasks/annotation/data/val_trans250k_full.hdf5",
    "test_data_path": "/home/jovyan/dnalm/downstream_tasks/annotation/data/val_trans250k_full.hdf5",
    "seed": 43,
    "input_seq_len": 42000,
    "data_n_workers": 2,
    "targets_offset": 5000,
    "targets_len": 5000,
    "model_cfg": "./data/configs/L12-H768-A12-V32k-preln-lastln.json",
    "model_cls": "src.gena_lm.modeling_rmt:RMTEncoderForTokenClassification",
    "backbone_cls": "src.gena_lm.modeling_bert:BertForTokenClassification",
    "backbone_trainable": true,
    "backbone_checkpoint": "/home/jovyan/dnalm/model_hub/bert_base_512_lastln_t2t_1000G_bs256_lr_1e-04_linear_fp16/model_best.pth",
    "input_size": 512,
    "num_mem_tokens": 5,
    "max_n_segments": 10000,
    "bptt_depth": -1,
    "segment_ordering": "regular",
    "tokenizer": "./data/tokenizers/t2t_1000h_multi_32k/",
    "optimizer": "AdamW",
    "weight_decay": 0.0001,
    "ENV": {
        "CUDA_VISIBLE_DEVICES": "0,1,2,3,4,5,6,7"
    },
    "HVD_INIT": false,
    "accelerate": {
        "initialized": true,
        "num_processes": 8,
        "backend": "nccl",
        "distributed_type": "MULTI_GPU",
        "mixed_precision": "no"
    },
    "MACHINE": "distracted-mcnulty-0",
    "COMMIT": "a66279f0cafc0d9844316fa2927634b1546144d4"
}