{
    "train_dataset_settings": {
        "sources": [
            {
                "name": "train",
                "records_path": "ultra_interact/UltraInteract_sft_Math_CoT_train.jsonl",
                "system_prompt": "Please reason step by step, and put your final answer within \\boxed{}.",
                "sample_rate": 1
            }
        ],
        "dataset_type": "chat",
        "prompt_template": {
            "role_tag_mapping": {
                "bot": "assistant",
                "user": "user",
                "system": "system"
            },
            "prefix_template": "<|im_start|>{role}\n",
            "suffix_template": "<|im_end|>\n"
        },
        "only_answer_loss": true,
        "only_last_replica_loss": false,
        "max_tokens_count": 2048,
        "keep_end": true
    },
    "val_dataset_settings": {
        "sources": [
            {
                "name": "val",
                "records_path": "ultra_interact/UltraInteract_sft_Math_CoT_valid.jsonl",
                "system_prompt": "Please reason step by step, and put your final answer within \\boxed{}.",
                "sample_rate": 1
            }
        ],
        "dataset_type": "chat",
        "prompt_template": {
            "role_tag_mapping": {
                "bot": "assistant",
                "user": "user",
                "system": "system"
            },
            "prefix_template": "<|im_start|>{role}\n",
            "suffix_template": "<|im_end|>\n"
        },
        "only_answer_loss": true,
        "only_last_replica_loss": false,
        "keep_end": true,
        "max_tokens_count": 2048
    },
    "model_settings": {
        "model_path": "models/qwen2.5-7b",
        "model_type": "causal",
        "transformers_settings": {},
        "model_kwargs": {
            "attn_implementation": "flash_attention_2"
        },
        "liger_kernels_settings": {
            "use_rope": true,
            "use_cross_entropy": true,
            "use_geglu": true
        }
    },
    "tokenizer_settings": {
        "use_fast": true
    },
    "special_tokens_settings": {
        "bos_token": "<|endoftext|>",
        "eos_token": "<|im_end|>",
        "pad_token": "<|endoftext|>"
    },
    "trainer_settings": {
        "eval_strategy": "steps",
        "per_device_train_batch_size": 1,
        "per_device_eval_batch_size": 1,
        "gradient_accumulation_steps": 16,
        "gradient_checkpointing": true,
        "gradient_checkpointing_kwargs": {
            "use_reentrant": false
        },
        "adam_beta1": 0.9,
        "adam_beta2": 0.95,
        "adam_epsilon": 1e-12,
        "eval_steps": 0.25,
        "save_strategy": "no",
        "load_best_model_at_end": false,
        "logging_steps": 1,
        "learning_rate": 6e-06,
        "num_train_epochs": 1,
        "lr_scheduler_type": "linear",
        "warmup_ratio": 0.03,
        "fp16": false,
        "bf16": true,
        "optim": "adamw_torch",
        "weight_decay": 0.0,
        "max_grad_norm": 2,
        "save_total_limit": 1,
        "dataloader_num_workers": 12,
        "deepspeed": "configs/exp/deepspeed/ds_config_stage_2.json"
    },
    "logging_settings": {
        "project_name": "project_name",
        "entity": "entity",
        "task_name": "run_name"
    },
    "log_path": "train_output"
}