{"no_progress_bar": false, "log_interval": 100, "log_format": "json", "log_file": null, "tensorboard_logdir": null, "wandb_project": null, "azureml_logging": false, "seed": 1, "cpu": false, "tpu": false, "bf16": false, "memory_efficient_bf16": false, "fp16": true, "memory_efficient_fp16": false, "fp16_no_flatten_grads": false, "fp16_init_scale": 4, "fp16_scale_window": null, "fp16_scale_tolerance": 0.0, "on_cpu_convert_precision": false, "min_loss_scale": 0.0001, "threshold_loss_scale": null, "amp": false, "amp_batch_retries": 2, "amp_init_scale": 128, "amp_scale_window": null, "user_dir": null, "empty_cache_freq": 0, "all_gather_list_size": 16384, "model_parallel_size": 1, "quantization_config_path": null, "profile": false, "reset_logging": false, "suppress_crashes": false, "use_plasma_view": false, "plasma_path": "/tmp/plasma", "criterion": "masked_lm", "tokenizer": null, "bpe": null, "optimizer": "adam", "lr_scheduler": "polynomial_decay", "scoring": "bleu", "task": "p_masked_lm_cluster_resample", "num_workers": 0, "skip_invalid_size_inputs_valid_test": true, "max_tokens": 1024, "batch_size": null, "required_batch_size_multiple": 8, "required_seq_len_multiple": 1, "dataset_impl": "fasta", "data_buffer_size": 10, "train_subset": "train50", "valid_subset": "valid50", "combine_valid_subsets": null, "ignore_unused_valid_subsets": true, "validate_interval": 99999, "validate_interval_updates": 0, "validate_after_updates": 0, "fixed_validation_seed": null, "disable_validation": false, "max_tokens_valid": 1024, "batch_size_valid": null, "max_valid_steps": null, "curriculum": 0, "gen_subset": "test", "num_shards": 1, "shard_id": 0, "distributed_world_size": 512, "distributed_num_procs": 8, "distributed_rank": 0, "distributed_backend": "nccl", "distributed_init_method": null, "distributed_port": 14490, "device_id": 0, "distributed_no_spawn": false, "ddp_backend": "c10d", "ddp_comm_hook": "none", "bucket_cap_mb": 25, "fix_batches_to_gpus": false, "find_unused_parameters": false, "fast_stat_sync": false, "heartbeat_timeout": 3600, "broadcast_buffers": false, "slowmo_momentum": null, "slowmo_algorithm": "LocalSGD", "localsgd_frequency": 3, "nprocs_per_node": 8, "pipeline_model_parallel": false, "pipeline_balance": null, "pipeline_devices": null, "pipeline_chunks": 0, "pipeline_encoder_balance": null, "pipeline_encoder_devices": null, "pipeline_decoder_balance": null, "pipeline_decoder_devices": null, "pipeline_checkpoint": "never", "zero_sharding": "none", "no_reshard_after_forward": false, "fp32_reduce_scatter": false, "cpu_offload": false, "use_sharded_state": false, "arch": "p_roberta_large", "max_epoch": 500, "max_update": 0, "stop_time_hours": 0, "clip_norm": 0.0, "use_inf_norm": false, "sentence_avg": false, "update_freq": [4], "lr": [0.0004], "stop_min_lr": -1.0, "use_bmuf": false, "save_dir": "/fsx-protein/halilakin/checkpoints/33layer_lr_poly.sample_ur50_to_90.ngpu512", "restore_file": "checkpoint_last.pt", "finetune_from_model": null, "reset_dataloader": false, "reset_lr_scheduler": false, "reset_meters": false, "reset_optimizer": false, "optimizer_overrides": "{}", "save_interval": 99999, "save_interval_updates": 10000, "keep_interval_updates": -1, "keep_interval_updates_pattern": -1, "keep_last_epochs": -1, "keep_best_checkpoints": -1, "no_save": false, "no_epoch_checkpoints": true, "no_last_checkpoints": false, "no_save_optimizer_state": false, "best_checkpoint_metric": "loss", "maximize_best_checkpoint_metric": false, "patience": -1, "checkpoint_suffix": "", "checkpoint_shard_count": 1, "load_checkpoint_on_all_dp_ranks": false, "write_checkpoints_asynchronously": false, "no_mid_epoch_validate": false, "encoder_layerdrop": 0, "encoder_layers_to_keep": null, "quant_noise_pq": 0, "quant_noise_pq_block_size": 8, "quant_noise_scalar": 0, "min_params_to_wrap": 100000000, "data": "/fsx-protein/zhongkai/datasets/202104esm2/03_output", "sample_break_mode": "eos", "tokens_per_sample": 1024, "mask_prob": 0.15, "leave_unmasked_prob": 0.1, "random_token_prob": 0.1, "freq_weighted_replacement": false, "mask_whole_words": false, "mask_multiple_length": 1, "mask_stdev": 0.0, "shorten_method": "random_crop", "shorten_data_split_list": "train50", "num_batch_buckets": 0, "cluster_resample_fasta_path": "/fsx-protein/zhongkai/datasets/202104esm2/03_output/uniref90.filtered.fasta", "cluster_resample_seq_id": 90, "cluster_resample_ur50_ur90_ur100_path": "/fsx-protein/zhongkai/datasets/202104esm2/01_inputs/ur50_ur90_ur100.no_ur_id_prefix.csv", "adam_betas": "[0.9,0.98]", "adam_eps": 1e-08, "weight_decay": 0.01, "use_old_adam": false, "warmup_updates": 2000, "force_anneal": null, "end_learning_rate": 2e-05, "power": 1.0, "total_num_update": "450000", "pad": 1, "eos": 2, "unk": 3, "max_positions": 1024, "activation_fn": "gelu", "use_rotary_embeddings": true, "encoder_normalize_after": true, "preact_normalize": true, "token_dropout": true, "layer_norm_fp32": true, "attention_dropout": 0.0, "dropout": 0.0, "activation_dropout": 0.0, "encoder_attention_heads": 20, "encoder_embed_dim": 1280, "encoder_ffn_embed_dim": 5120, "encoder_layers": 33, "no_seed_provided": false, "pooler_activation_fn": "tanh", "pooler_dropout": 0.0, "encoder_normalize_before": false, "encoder_learned_pos": false, "use_bert_init": false, "checkpoint_transformer_block": false, "checkpoint_activations": false, "effective_attention": false, "_name": "p_roberta_large", "untie_weights_roberta": false}