export CUDA_VISIBLE_DEVICES=MIG-4292f4e1-5ab2-539e-8382-b619c60672e7
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-dfac4f62-f2db-55e4-8c7a-25d3a3f7f78a
python main.py --model luna_transformer --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/EthanolConcentration --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name EthanolConcentration --dropout 0.1 --activation gelu --batch_size 128 --subsample_factor 2 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-e7d7c833-7141-5e89-8c37-5f53cafd315a
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-eb12e9f1-d7b5-5e98-b84e-ccb8f011d51c
python main.py --model luna_transformer --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/Handwriting --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Handwriting --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-37820fec-ed84-55b7-83fc-e1d35afcbf63
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-3d887f40-0dd8-55b3-9a73-5dd12bf0a69c
python main.py --model luna_transformer --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/Heartbeat --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name Heartbeat --dropout 0.1 --activation gelu --batch_size 128 --num_layers 1 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-fcba644e-1d26-5d3c-94b8-556955cfa83f
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-d8a609a7-c3b6-5af5-a62d-f3443f22e8d9
python main.py --model luna_transformer --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/JapaneseVowels --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name JapaneseVowels --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-7a693c97-8d95-5cf5-a8bf-3ccf78d18749
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-7efa83e6-4727-5d65-9d20-a010c7e8ef9b
python main.py --model luna_transformer --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/SelfRegulationSCP1 --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SelfRegulationSCP1 --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --subsample_factor 2 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-47d82139-3a34-5a1f-80fe-36af994f32b7
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-41bf22e3-b1b2-5122-a54a-308d17608850
python main.py --model luna_transformer --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/SpokenArabicDigits --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name SpokenArabicDigits --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 64 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-43efe543-e994-5e75-8cec-1daf478baa59
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer_mra_head --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
export CUDA_VISIBLE_DEVICES=MIG-6fc53879-8209-5074-a3fc-a5daf4e4499d
python main.py --model luna_transformer --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 2010 &
python main.py --model luna_transformer --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 666 &
python main.py --model luna_transformer --data_dir Multivariate_ts/FaceDetection --output_dir experiments --comment "classification from Scratch" --records_file Classification_records.xls --output_dir output_ts --data_class tsra --pattern TRAIN --val_pattern TEST --epochs 400 --lr 0.001 --optimizer RAdam --pos_encoding learnable  --task classification  --key_metric accuracy --name FaceDetection --dropout 0.1 --activation gelu --batch_size 128 --num_layers 3 --num_heads 8 --d_model 128 --dim_feedforward 256 --seed 1608 &
wait
echo "Done"