#LEARNABLE
WANDB_MODE=online CUDA_VISIBLE_DEVICES=3 python src/imagenet/train_model.py \
    --lr 0.0005 --epochs 300 --batch-size 256 --seed 0 --position-embeddings "learnable" --num-shared-experts 1 --num-routed-experts 0  --topk 0\
    --save-dir /root/weights/imagenet --data-path /root/datasets/imagenet 
#ROPE
WANDB_MODE=online CUDA_VISIBLE_DEVICES=3 python src/imagenet/train_model.py \
    --lr 0.0005 --epochs 300 --batch-size 256 --seed 0 --position-embeddings "rope" --num-shared-experts 1 --num-routed-experts 0  --topk 0\
    --save-dir /root/weights/imagenet --data-path /root/datasets/imagenet 