for gold_index in 0 4 9 14 19; do
    python3.9 -u ./scripts/get_qa_responses_from_longchat.py \
        --input-path qa_data/20_ours/selective_context_20_${gold_index}_2x.json \
        --num-gpus 1 \
        --max-new-tokens 100 \
        --batch-size 1 \
        --max-memory-per-gpu 80 \
        --num-gpus 1 \
        --model lmsys/longchat-13b-16k \
        --output-path qa_predictions/20_ours/selective_context_20_${gold_index}_2x-predictions.jsonl.gz
done

for gold_index in 0 4 9 14 19; do
    python3.9 -u ./scripts/evaluate_qa_responses.py \
        --input-path qa_predictions/20_ours/selective_context_20_${gold_index}_2x-predictions.jsonl.gz \
        --output-path qa_predictions/20_ours/selective_context_20_${gold_index}_2x-predictions-scores.jsonl.gz
done