model: microsoft/Phi-3.5-vision-instruct
loss: digit
tag: refcoco
data_path: ../data/data/refcoco_mix_base/annotations.json
image_dir: ../data/data/refcoco_mix_base/images
output_dir: ../data/ckpt
max_length: 2048
