# 1. mv your data to llamafactory data path
BASE_PATH="[PATH_to_THIS_REPO]"

SRC_PROCESSED_DATA_DIR="${BASE_PATH}/data_construction/data/llamafactory_format"
TGT_LLAMA_FACTORY_DATA_DIR="${BASE_PATH}/training/LLaMA-Factory-main/data"

SRC_DATA_NAME="supporting_random_shor2long_musique_short_context_example_infer_with_all_text_by_Qwen2.5-7B-Instruct-wo-yarn_all_EM_selector_success_raw"
NEW_NAME="musique_qwen"
LENGTH=8k

# short-po\s2l-po\expand-po
python ${BASE_PATH}/data_construction/6_llama_factory_data_register.py \
    --src_data_dir ${SRC_PROCESSED_DATA_DIR} \
    --tgt_data_dir ${TGT_LLAMA_FACTORY_DATA_DIR} \
    --src_data_name ${SRC_DATA_NAME} \
    --tgt_data_name ${NEW_NAME} \
    --length ${LENGTH} \
    --data_info_file_path ${TGT_LLAMA_FACTORY_DATA_DIR}/dataset_info.json

SRC_DATA_NAME="8k_musique_reallong_context_example_infer_with_all_text_by_Qwen2.5-7B-Instruct-wo-yarn_all_EM_selector_success"
NEW_NAME="musique_qwen_reallong"
LENGTH=8k

# long-po
python ${BASE_PATH}/data_construction/6_llama_factory_data_register.py \
    --src_data_dir ${SRC_PROCESSED_DATA_DIR} \
    --tgt_data_dir ${TGT_LLAMA_FACTORY_DATA_DIR} \
    --src_data_name ${SRC_DATA_NAME} \
    --tgt_data_name ${NEW_NAME} \
    --length ${LENGTH} \
    --data_info_file_path ${TGT_LLAMA_FACTORY_DATA_DIR}/dataset_info.json