{
    "base_dir": "/mnt/sdc1/finetuning",
    "wandb_cache_dir": "{base_dir}/wandb_cache",
    "torch_dtype": "bfloat16",
    "device": "cuda",

    "preprocess": true,

    "popularity_sd": 0.5,
    "popularity_alpha": 0.1,
    "score_sd": 0.1,

    "popularity_west_coast": 1,
    "popularity_central": 2,
    "popularity_east_coast": 3,

    "popularity_ctr": 0,

    "base_model": "HuggingFaceTB/SmolLM2-360M-Instruct",

    "data": "mind-sport-abs",
    "data_path": "{base_dir}/data/{data}_scores_{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
    "data_path_preprocessed": "{base_dir}/data/{data}_scores_{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}_preprocessed",

    "exp_id": "mind-sport-abs-orthogonal-base",
    "experiment_path": "{base_dir}/experiments/{exp_id}",

    "pad_token": "<|endoftext|>",
    "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",

    "max_input_length": 1024,

    "messages_template": [
        {
            "role": "system",
            "content": "You are an editor of a news website. Your task is to generate a headline for each news article that will attract the most readers. The headline should be less than 30 words. Only respond with the headline."
        },
        {"role": "user", "content": "The news abstract is `{Abstract}`"},
        {"role": "assistant", "content": "{Title}"}
    ],

    "sft": {
        "model": "HuggingFaceTB/SmolLM2-360M-Instruct",
        "quantize_8bit": false,
        "lora_config": {
            "r": 16,
            "lora_alpha": 32,
            "lora_dropout": 0.1,
            "task_type": "CAUSAL_LM"
        },
        "resume_from_checkpoint": false,
        "early_stopping_patience": 3,
        "training_args": {
            "learning_rate": 2e-4,
            "per_device_train_batch_size": 8,
            "num_train_epochs": 1,
            "logging_steps": 100,
            "packing": false,
            "max_seq_length": 512,
            "do_eval": true,
            "optim": "adamw_8bit",
            "weight_decay": 0.01,
            "save_steps": 1000
        },
        "sample_training": false,
        "sample_training_seed": 42,
        "sample_training_size": 20000,

        "training_output_path": "{base_dir}/experiments/{exp_id}/sft_training/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
        "model_output_path": "{base_dir}/experiments/{exp_id}/sft_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}"
    },

    "reward": {

        "train_set": "reward",
        "valid_set": "reward_valid",
        
        "has_peft": true,
        "model": "{base_dir}/experiments/mind-sport-abs-orthogonal-base/sft_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",

        "reward_column_name": "score",

        "lora_config": {
            "r": [8],
            "lora_alpha": 32,
            "lora_dropout": 0.1
        },
        "resume_from_checkpoint": false,
        "early_stopping_patience": 3,
        "seeds": [6550],
        "training_model": "sequence_classification",
        "training_args": {
            "learning_rates": [1e-3, 2e-4, 6e-4, 8e-4, 2e-3],
            "per_device_train_batch_size": 8,
            "num_train_epochs": [1],
            "logging_steps": 100,
            "remove_unused_columns": false,
            "eval_steps": 1250,
            "save_steps": 1250,
            "eval_strategy": "steps"
        },
        "trainer_args": {},
        "sample_training": false,
        "sample_training_seed": 42,
        "sample_training_size": 10000,

        "training_output_path": "{base_dir}/experiments/{exp_id}/reward_training/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
        "model_output_path": "{base_dir}/experiments/{exp_id}/reward_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}"
    },

    "ppo": {
        "use_wandb": true,
        "has_peft": true,
        "model": "{base_dir}/experiments/mind-sport-abs-orthogonal-base/sft_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",

        "reward_has_peft": true,
        "reward_model_base": "{base_dir}/experiments/mind-sport-abs-orthogonal-base/sft_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
        "reward_model_peft": "{base_dir}/experiments/{exp_id}/reward_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",

        "lora_config": {
            "r": 8,
            "lora_alpha": 32,
            "lora_dropout": 0.1,
            "task_type": "CAUSAL_LM"
        },
        "resume_from_checkpoint": false,
        "reward_training_model": "sequence_classification",
        "training_args": {
            "learning_rate": 2e-4,
            "per_device_train_batch_size": 8,
            "num_train_epochs": 1,
            "logging_steps": 100,
            "eval_steps": 100,
            "save_steps": 100,
            "local_rollout_forward_batch_size": 4,
            "missing_eos_penalty": 1.0,
            "gradient_accumulation_steps": 1,
            "stop_token": "eos",
            "report_to": null,
            "kl_coef": 0.02
        },
        "trainer_args": {},
        "sample_training": false,
        "sample_training_seed": 42,
        "sample_training_size": 10000,

        "training_output_path": "{base_dir}/experiments/{exp_id}/ppo_training/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
        "model_output_path": "{base_dir}/experiments/{exp_id}/ppo_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}"
    },

    "generation_has_peft": true,
    "generation_model_base": "{base_dir}/experiments/mind-sport-abs-orthogonal-base/sft_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
    "generation_model_peft": "{base_dir}/experiments/{exp_id}/ppo_models/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",
    "generated_output_path": "{base_dir}/experiments/{exp_id}/generated/{popularity_sd}_{popularity_alpha}_{score_sd}_{popularity_ctr}",


    "sample_generation": true,
    "sample_generation_seed": 42,
    "sample_generation_size": 3000,
    "generation_batch_size": 32,

    "generation_config": {
        "max_new_tokens": 53,
        "do_sample": false,
        "num_return_sequences": 1
    },
    
    "evaluation_batch_size": 16,
    "evaluated_output_path": "{base_dir}/experiments/{exp_id}/evaluated/{effort_sd}_{effort_alpha}_{score_sd}_{effort_quantiles}"
}