{
    "train_dataset_settings": {
        "sources": [
            {
                "name": "uf_train",
                "records_path": "data/uf/train_preferences.jsonl",
                "sample_rate": 1.0
            }
        ],
        "dataset_type": "pair_preferences",
        "chat_settings": {
            "prompt_template": {
                "role_tag_mapping": {
                    "bot": "assistant",
                    "user": "user",
                    "system": "system"
                },
                "prefix_template": "<|start_header_id|>{role}<|end_header_id|>\n\n",
                "suffix_template": "<|eot_id|>"
            },
            "max_tokens_count": 4096,
            "keep_end": true
        }
    },
    "val_dataset_settings": {
        "sources": [
            {
                "name": "uf_val",
                "records_path": "data/uf/val_preferences.jsonl",
                "sample_rate": 1.0
            }
        ],
        "dataset_type": "pair_preferences",
        "chat_settings": {
            "prompt_template": {
                "role_tag_mapping": {
                    "bot": "assistant",
                    "user": "user",
                    "system": "system"
                },
                "prefix_template": "<|start_header_id|>{role}<|end_header_id|>\n\n",
                "suffix_template": "<|eot_id|>"
            },
            "keep_end": true,
            "max_tokens_count": 4096
        }
    },
    "cherry_pick_settings": {
        "generator_transformers_settings": {
            "num_beams": 1,
            "do_sample": true,
            "num_return_sequences": 5,
            "max_new_tokens": 256,
            "temperature": 0.9,
            "top_p": 1.0,
            "top_k": 40,
            "stop_strings": [
                "<|eot_id|>",
                "<|end_of_text|>"
            ],
            "stop_token_ids": [
                128009,
                128001
            ]
        },
        "custom_generation_settings": {
            "remove_prompt": true,
            "skip_special_tokens": true
        },
        "dataset_settings": {
            "sources": [
                {
                    "name": "uf150",
                    "records_path": "data/uf/val_chat150.jsonl",
                    "sample_rate": 1.0
                }
            ],
            "prompt_template": {
                "role_tag_mapping": {
                    "bot": "assistant",
                    "user": "user",
                    "system": "system"
                },
                "prefix_template": "<|start_header_id|>{role}<|end_header_id|>\n\n",
                "suffix_template": "<|eot_id|>"
            },
            "dataset_type": "chat",
            "max_tokens_count": 4096,
            "keep_end": true,
            "only_answer_loss": true
        },
        "metric_settings": [
            {
                "type": "kl",
                "parameters": {
                    "ref_logits_type": "reference",
                    "need_average": [
                        true,
                        false
                    ]
                }
            },
            {
                "type": "length",
                "parameters": {
                    "need_average": [
                        true,
                        false
                    ]
                }
            },
            {
                "type": "self_bleu",
                "parameters": {
                    "need_average": [
                        true,
                        false
                    ]
                }
            },
            {
                "type": "dist_n",
                "parameters": {
                    "need_average": [
                        true,
                        false
                    ]
                }
            },
            {
                "type": "diversity",
                "parameters": {
                    "need_average": [
                        true,
                        false
                    ]
                }
            },
            {
                "type": "perplexity",
                "parameters": {
                    "need_average": [
                        true,
                        false
                    ]
                }
            }
        ]
    },
    "model_settings": {
        "model_path": "models/llama31-8b",
        "model_type": "causal",
        "transformers_settings": {},
        "model_kwargs": {
            "attn_implementation": "flash_attention_2"
        },
        "liger_kernels_settings": {
            "use_rope": true,
            "use_cross_entropy": true,
            "use_geglu": true
        }
    },
    "tokenizer_settings": {},
    "special_tokens_settings": {
        "bos_token": "<|begin_of_text|>",
        "eos_token": "<|eot_id|>",
        "pad_token": "<|finetune_right_pad_id|>"
    },
    "trainer_settings": {
        "eval_strategy": "steps",
        "per_device_train_batch_size": 1,
        "per_device_eval_batch_size": 1,
        "gradient_accumulation_steps": 16,
        "gradient_checkpointing": true,
        "gradient_checkpointing_kwargs": {
            "use_reentrant": false
        },
        "save_only_model": true,
        "adam_beta1": 0.9,
        "adam_beta2": 0.95,
        "adam_epsilon": 1e-12,
        "eval_steps": 0.25,
        "save_strategy": "no",
        "load_best_model_at_end": false,
        "logging_steps": 1,
        "learning_rate": 1e-06,
        "num_train_epochs": 1,
        "lr_scheduler_type": "linear",
        "warmup_ratio": 0.03,
        "fp16": false,
        "bf16": true,
        "optim": "adamw_torch",
        "weight_decay": 0.0,
        "max_grad_norm": 2,
        "save_total_limit": 1,
        "dataloader_num_workers": 18,
        "deepspeed": "configs/train/deepspeed/ds_config_stage_2.json",
        "use_ref_model": true,
        "use_sft_model": false,
        "sync_ref_settings": {
            "sync_ref_model": false,
            "alpha": 0.6,
            "sync_steps": 1
        },
        "loss_settings": {
            "loss_type": "cal_dpo",
            "beta": 0.1
        },
        "logp_type": "cum_log_prob",
        "ce_coef": 0.0,
        "unll_coef": 0.0,
        "lam": 1.0
    },
    "logging_settings": {
        "project_name": "project_name",
        "entity": "entity",
        "run_name": "run_name"
    },
    "log_path": "train_output"
}