#!/bin/bash

gpu_list="${CUDA_VISIBLE_DEVICES:-0}"
IFS=',' read -ra GPULIST <<< "$gpu_list"

CHUNKS=${#GPULIST[@]}

CKPT=$1

if [ "$2" == "" ]; then
  BASE=llava-next-interleave-qwen-0.5b
else
  BASE=$2
fi

# Add --doc_init only if BASE is llava-next-interleave-qwen-0.5b
DOC_INIT_ARG=""
if [ "$BASE" == "llava-next-interleave-qwen-0.5b" ]; then
  DOC_INIT_ARG="--doc_model_init"
fi

BASE_PATH=''

for IDX in $(seq 0 $((CHUNKS-1))); do
    CUDA_VISIBLE_DEVICES=${GPULIST[$IDX]} python llava/eval/infoseek/embed_doc_infoseek.py \
        --model-path ${BASE_PATH}/checkpoints/$CKPT \
        --model-base ${BASE_PATH}/checkpoints/llava-next-interleave-qwen-0.5b \
        --save_path ${BASE_PATH}/dataset/infoseek/doc_embeds/${CKPT} \
        --document_path ${BASE_PATH}/dataset/infoseek/infoseek_wikipedia/infoseek_kb_wiki_test.json \
        --image_url_to_id_path ${BASE_PATH}/dataset/infoseek/infoseek_wikipedia/image_url_to_id.json \
        --doc_use_image \
        --doc_use_table \
        --is_multimodal \
        --num-chunks $CHUNKS \
        --chunk-idx $IDX \
        --conv qwen_1_5 \
        --batch_size 1 \
        $DOC_INIT_ARG &
done

wait


python llava/eval/merge_embeds.py \
        --embed_path ${BASE_PATH}/dataset/infoseek/doc_embeds/${CKPT}/inter_doc_embed \

python llava/eval/merge_doc_mappings.py \
        --mapping_path ${BASE_PATH}/dataset/infoseek/doc_embeds/${CKPT}/inter_doc_mapping \
