{"local_rank": -1, "model_name_or_path": "gpt2", "dataset_name": "gsm8k", "gradient_checkpointing": false, "per_device_train_batch_size": 8, "per_device_eval_batch_size": 8, "do_train": "True", "do_eval": "True", "output_dir": "./outputs", "overwrite_output_dir": "True", "seed": 555, "num_train_epochs": 4, "evaluation_strategy": "steps", "eval_steps": 100, "logging_steps": 100, "run_name": "fw-top50-ef21-bw-top50-ef21-topk-lazy-pt05-test", "report_to": "wandb", "remove_unused_columns": false, "learning_rate": 5e-05}