# Model arguments
model_name_or_path: "[PATH_TO_MODEL_AFTER_WARMUP_SFT]"
model_revision: main
torch_dtype: bfloat16
attn_implementation: flash_attention_2

# Data training arguments
chat_template: "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {%- if messages[0]['content'] is string %}{{- messages[0]['content'] }}{%- else %}{%- for item in messages[0]['content'] %}{%- if item.text is defined and item.text %}{{- item.text }}{%- endif %}{%- endfor %}{%- endif %}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\n' }}{%- if messages[0]['content'] is string %}{{- messages[0]['content'] }}{%- else %}{%- for item in messages[0]['content'] %}{%- if item.text is defined and item.text %}{{- item.text }}{%- endif %}{%- endfor %}{%- endif %}{{- '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' }}{%- if message.content is string %}{{- message.content }}{%- else %}{%- for item in message.content %}{%- if item.text is defined and item.text %}{{- item.text }}{%- endif %}{%- endfor %}{%- endif %}{{- '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {%- if message.content is string %}{{- '\\n' + message.content }}{%- else %}{{- '\\n' }}{%- for item in message.content %}{%- if item.text is defined and item.text %}{{- item.text }}{%- endif %}{%- endfor %}{%- endif %}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n<think>\n' }}\n{%- endif %}\n"

dataset_name: "[PATH_TO_GENRATED_RL_DATASETS]"

system_prompt: "You are a helpful AI Assistant that provides well-reasoned and detailed responses. You first think about the reasoning process as an internal monologue and then provide the user with the answer. Respond in the following format, with the answer included between the <answer> and </answer> tags: <think>\n...\n</think>\n<answer>\nYour answer\n</answer>"

# Gradient acc
gradient_accumulation_steps: 16
steps_per_generation: 16

# GRPO trainer config
bf16: true
use_vllm: true
vllm_mode: "colocate"
vllm_gpu_memory_utilization: 0.3
vllm_tensor_parallel_size: 2
beta: 0.0
epsilon: 0.2
epsilon_high: 0.28
enable_multi_turn_tools: true

tools:
  - type: "function"
    function:
      name: "get_timeseries_slice"
      description: "Get the current timeseries_slice of one of the time series in a given location, you should call this tool during thinking to better recognize the local fluctuations of a given period"
      parameters:
        type: "object"
        properties:
          metric_name:
            type: "string"
            description: "The name of the metric to get the timeseries slice for"
          start:
            type: "integer"
            description: "The start index of the timeseries slice"
          end:
            type: "integer"
            description: "The end index of the timeseries slice"
        required: ["metric_name", "start", "end"]
  - type: "function"
    function:
      name: "compare_timeseries_slice"
      description: "Compare two slices of timeseries data from potentially different metrics. Use this tool to analyze relationships, correlations, or differences between different timeseries segments."
      parameters:
        type: "object"
        properties:
          metric_name_1:
            type: "string"
            description: "The name or identifier of the first timeseries metric to slice. This should match or be contained in the metric names mentioned in the conversation."
          start_1:
            type: "integer"
            description: "The starting index (inclusive) for the first timeseries slice. Must be >= 0."
          end_1:
            type: "integer"
            description: "The ending index (exclusive) for the first timeseries slice. Must be > start_1."
          metric_name_2:
            type: "string"
            description: "The name or identifier of the second timeseries metric to slice. This should match or be contained in the metric names mentioned in the conversation."
          start_2:
            type: "integer"
            description: "The starting index (inclusive) for the second timeseries slice. Must be >= 0."
          end_2:
            type: "integer"
            description: "The ending index (exclusive) for the second timeseries slice. Must be > start_2."
        required: ["metric_name_1", "start_1", "end_1", "metric_name_2", "start_2", "end_2"]

add_first_think_token: true
do_eval: true
eval_strategy: steps
eval_steps: 10
dataset_train_split: train
dataset_test_split: test
trust_remote_code: true
eval_accumulation_steps: 1
save_only_model: true
gradient_checkpointing: true
gradient_checkpointing_kwargs:
  use_reentrant: false
hub_model_id: ThinkTime-DAPO-Dev
hub_strategy: every_save
learning_rate: 2.0e-06
log_completions: true
log_level: info
logging_first_step: true
logging_steps: 1
logging_strategy: steps
lr_scheduler_type: cosine_with_min_lr
lr_scheduler_kwargs:
  min_lr_rate: 0.1
max_prompt_length: 3000
max_completion_length: 5000
max_steps: 500
num_generations: 8
eval_num_generations: 1
num_iterations: 1
num_train_epochs: 4
output_dir: "[PATH_TO_OUTPUT_DIR_FOR_DAPO]"
overwrite_output_dir: true
per_device_eval_batch_size: 758
per_device_train_batch_size: 2
push_to_hub: false
report_to:
- wandb
reward_funcs:
- format
- rca_len
- rlvr_accuracy
# - tool_count
reward_weights:
- 1.0
- 1.0
- 5.0
save_strategy: "steps"
save_steps: 100
save_total_limit: 3
seed: 42
temperature: 1.0
warmup_ratio: 0.1
eval_on_start: true
