def get_train_hparams():
    payload_template_str = """<start_of_turn>user
    Fill the below movie review as if you were a movie critic. No explanations or justifications are needed. Just write ONE review.
    Movie Review {payload}:
    <end_of_turn>
    <start_of_turn>model
    """

    hparams = {
        "dataset": {
            "name": "toy_cv",  # "imdb", "simpleqa_100". "same_prompt"
            "same_prompt": "This movie was really",
            "payload_prefixes": [
                payload_template_str.replace("{payload}", "0"),
                payload_template_str.replace("{payload}", "1"),
                payload_template_str.replace("{payload}", "2"),
                payload_template_str.replace("{payload}", "3"),
            ],  # ["(This is a negative review).", "(This is a positive review)."], ["(The payload is 0).", "(The payload is 1)."], ["I am happy.", "I am sad"], ["", ""]
            "payload_template": payload_template_str,  # "(This is a {payload} review)." # Set up a template for the stripper function to remove the payload from the prompt.
            # Set to None if no payload is present.
            # Example: "Movie Review {payload}:" -> "Movie Review 0: The movie 'The Titanic' is"
            # Example: "The payload is {payload}." -> "The payload is 0."
            # Example: "I am {payload}." -> "I am happy."
            "n_payloads": 2,
            "batch_size": 256,
            "use_seeded_prompt": False, # TODO: only applies to CV datasets
            "seperate_enc_dec_data": True,  # True to split the train data into two halves for enc and dec, non shared.
            "max_enc_size": 20000,
            "max_dec_size": 20000,
            "max_eval_size": 20000,
        },
        "enc": {
            "model_name": "unsloth/gemma-2-2b-it",  # "unsloth/gemma-2-2b-it", # "lvwerra/gpt2-imdb", 'openai-community/gpt2'
            "batch_size": 128,
            "mini_batch_size": 16,
            "gradient_accumulation_steps": 8,
            "init_kl_coef": 0.05,  # Default: 0.2. Set to 0.0 to disable KL control
            "do_lora": True,
            "do_quant": False,
            "output_len": {"min": 42, "max": 51},
            "lr": 1.41e-5,
            "vllm_inference": True,
            "target_kl": 50.0,
            "attn_implementation": "eager", # "eager", "flash_attention_2"
        },
        "dec": {
            "model_name": "distilbert/distilbert-base-uncased",  # "lvwerra/bert-imdb", "lvwerra/distilbert-imdb", "distilbert/distilbert-base-uncased"
            "new_classification_head": True,
            "batch_size": 256,
            "mini_batch_size": 128,
            "gradient_accumulation_steps": 2,
            "lr": 2e-5,
            "weight_decay": 0.01,
        },
        "overseer": {
            "type": "llm",  # "string_matching", "reward_model", "llm"
            "llm_model_name": None,
            "use_overseer": True,
            "eval_overseer": True,
            "dec_drop_sus": False,
            "prompt_name": "cv_uni_good", # cv_uni_bad, cv_uni_good, cv_gender_og, cv_gender_good
        },
        "train": {
            "do_update_enc": True,
            "do_update_dec": True,
            "dec_update_params": {
                "head": True,
                "body": True,
            },
            "leak_password": False,
            "do_paraphrase": False,
            "wandb_log": True,
            "train_loop_version": "v2_named",  # "v1_cheat", "v2_no_cheat", "v2_named"
            "n_dec_updates": 2,
            "n_enc_updates": 5,
            "n_dec_deepfry_steps": 1,
            "n_initial_dec_updates": None,
            "update_enc_first": False,
            "num_epochs": 100000,
            "generation_temp": {
                "dec_training": 1.0,
                "enc_training": 1.0,
                "eval": 1.0,
            },
            "cost_cap": 1.0,
            "checkpoint_id": "temp",
            "save_checkpoints": False,
            "strip_instruct_tune": False,
        },
        "rewards": {
            "use_probs_reward": False,
            "use_task_usefulness_reward": False,
            "use_length_reward": False,
            "clip_overseer_positive_rewards": False,
            "stego_scale": 1.0,
            "overseer_scale": 1.0,
            "task_scale": 0.1,
            "length_scale": 0.05,
            "eos_trick": None,  # Put positive penalty! or None for no penalty
        },
        "eval": {
            "eval_every_n": 50,
            "n_eval_samples": 100,
            "datasets_to_eval": ["enc_train", "dec_train", "eval"],
        },
        "debug": {
            "override_dec_batch": False,
        },
        "seed": 42,
    }

    return hparams