
num_layers=12
hidden_size=768
num_attn_heads=12
init_std=0.02

export CUDA_VISIBLE_DEVICES=4
torchrun --master_port 23999 ./eval_harness.py \
    --task_list arc_easy,arc_challenge,boolqnew,logiqa,sciq,winogrande,piqa,race,social_iqa \
    --load ./check_point \
    --seq-length 2048 \
    --micro-batch-size 1 \
    --tokenizer-model ./pythia \
    --tensor-model-parallel-size 1 \
    --num-layers ${num_layers} \
    --hidden-size ${hidden_size} \
    --num-attention-heads ${num_attn_heads} \
    --max-position-embeddings 2048 \
    --deepspeed \
    --fp16 \
    --no-load-rng \
    --swiglu \
    --use-rotary-position-embeddings \
    --rotary-percent 0.25 \
    --attention-softmax-in-fp32 \
    --no-load-rng \
    --inftype position_beam \
    --beam ${beam}






