# Model arguments
model_revision: main
torch_dtype: bfloat16
text_vocab_size: 128256
image_vocab_size: 32768
max_prompt_length: 256
max_completion_length: 512
max_steps: -1
## loading arguments
model_path:  #change to the path of the model
llama_tokenizer_path: #change to the path of the llama tokenizer
selftok_config: config/renderer/renderer-eval.yml

selftok_tokenizer_path: #change to the path of the selftok tokenizer
port: 56950

## saving arguments
save_strategy: "steps"
save_steps: 50
output_dir: #change to the path of the output directory
overwrite_output_dir: true
save_only_model: true
mox_path: #change to the path of the mox directory
image_save_path: ../../../gen_img
###################################

# Data training arguments
dataloader_num_workers: 16
dataset_configs:
- all

## dataset arguments
dataset_type: #geneval or DPG

data_source:
- #geneval or DPG

datap: 
- #change to the path of the dataset

###################################

# GRPO trainer config
bf16: true

## vllm config
use_vllm: false
vllm_device: auto
vllm_gpu_memory_utilization: 0.7

## eval config
do_eval: False
eval_strategy: 'no' #steps
eval_steps: 100000

## training config
task_type: "t2i"
per_device_eval_batch_size: 8   
per_device_train_batch_size: 32
num_generations: 32
num_train_epochs: 10
num_iterations: 1
gradient_accumulation_steps: 1
gradient_checkpointing: true
gradient_checkpointing_kwargs:
  use_reentrant: false


## lr scheduler setting
warmup_ratio: 0.002
lr_scheduler_type: cosine
use_self_lr_scheduler: false
learning_rate: 1.5e-06
warmup_ratio: 0.02
warm_up_steps: 50
convert_steps: 250
convert_lr: 8.0e-7
min_lr: 1.0e-7

###################################

# log config
logging_dir: ../../logs
log_level: info
logging_steps: 1
logging_strategy: steps

###################################

# huggingface setting
hub_model_id: Selftok_Zero
hub_strategy: every_save
report_to:
- none
push_to_hub: False

###################################

# generation config
cfg_type: adaptive
seed: 42
temperature: 0.9
weight: 1.0
generate_with_cfg: true
guidance_scale: 10.0
reverse: true
entropy_bound: 2
min_cfg: 1
###################################

# loss config
set_epsilon: true
epsilon_low: 0.2
epsilon_high: 0.4

###################################

# reward config
use_clip_score: false
use_std_reward: true
use_api: false
reward_model_path: #change to the path of the reward model
# reward_list: ["geneval score", "dpg score"] 
reward_list:
- #change to the reward list
