{"model": {"gpt2": {"name": "gpt2", "gpt2_type": "gpt2", "from_pretrained": false, "lm_head": true}, "name": "per_token_iql", "dataset": {"name": "wordle_human_dataset", "cache_id": "d_train"}, "alpha": 0.005, "gamma": 0.99, "beta": 0.0, "transition_weight": 0.0, "clip_weight": null, "value_max": null, "value_min": null, "detach_v": false, "detach_q": false, "detach_pi": false, "double_q": true, "seperate_policy": true, "seperate_target": true, "tau": 0.5, "exp_weights": true, "dm_margin": 0.0, "advanced_mlp": false, "cql_temp": 1.0, "load": {"name": "per_token_iql", "checkpoint_path": "outputs/wordle/wordle_bc_test1/model_converted.pkl", "strict_load": false}}, "train_dataset": {"token_reward": {"name": "constant_token_reward", "c": 0.0}, "name": "wordle_human_dataset", "max_len": null, "file_path": "data/wordle/random_human_tweet_data_200.json", "use_true_word": false, "index_file": "data/wordle/human_train_idxs.json", "top_p": null, "cache_id": "d_train"}, "eval_dataset": {"token_reward": {"name": "constant_token_reward", "c": 0.0}, "name": "wordle_human_dataset", "max_len": null, "file_path": "data/wordle/random_human_tweet_data_200.json", "use_true_word": false, "index_file": "data/wordle/human_eval_idxs.json", "top_p": null, "cache_id": "d_eval"}, "evaluator": {"env": {"vocab": {"name": "vocab", "vocab_path": "data/wordle/word_lists/wordle_official_200.txt", "cache_path": null, "fill_cache": true}, "name": "wordle_env"}, "name": "iql_evaluator", "verbose": true, "kind": "beam", "generation_kwargs": {"max_generation_len": 6, "beam_width": 1, "temp": 1.0, "top_k": null, "top_p": null, "exp_adv": true, "adv_weight": 16.0, "adv_clip": 0.0, "include_logits": true, "include_adv": true}}, "train": {"save_checkpoint_dir": "/Users/charliesnell/current_projects/ILQL/ILQL_main/src/utils/../../outputs/wordle/wordle_iql_official_test1/", "optim_state_path": null, "epochs": 1, "dataloader_workers": 1, "bsize": 64, "grad_accum_steps": 16, "log_every": 256, "eval_every": 1024, "save_every": 16384, "max_checkpoints": 1, "eval_bsize": 16, "eval_batches": 1, "lr": 1e-05, "weight_decay": 0.0, "hard_update_every": null, "max_steps": null, "loss": {"v_loss_weight": 1.0, "q_loss_weight": 1.0, "awac_weight": 0.0, "cql_loss_weight": 0.0001, "dm_loss_weight": 0.0, "mc_returns": false}}, "wandb": {"use_wandb": true, "wandb_project": "wordle_iql"}, "system": {"device": "cpu", "num_processes": 1, "use_fp16": false}}