#!/bin/bash
# This file contains commands to run experiments for different models on POPGym environments:
# - RATE (Recurrent Action Transformer with Memory)
# - DT (Decision Transformer)
# - BC-MLP (Behavioral Cloning with MLP)
# - BC-LSTM (Behavioral Cloning with LSTM)

# === RATE ===
# 0
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-AutoencodeEasy-v0/ --start-seed=1 --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeEasy-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 1
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=312 --data.path-to-dataset=data/POPGym/popgym-AutoencodeHard-v0/ --start-seed=1 --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeHard-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=104 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 2
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-AutoencodeMedium-v0/ --start-seed=1 --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeMedium-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 3
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=66 --data.path-to-dataset=data/POPGym/popgym-BattleshipEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=64 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipEasy-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=22 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 4
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=147 --data.path-to-dataset=data/POPGym/popgym-BattleshipHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=144 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipHard-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=49 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 5
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=102 --data.path-to-dataset=data/POPGym/popgym-BattleshipMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=100 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipMedium-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=34 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 6
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-ConcentrationEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=52 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationEasy-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 7
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-ConcentrationHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=52 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 8
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-ConcentrationMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=104 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationMedium-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 9
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-CountRecallEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=26 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallEasy-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 10
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-CountRecallHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=16 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallHard-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 11
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-CountRecallMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=26 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallMedium-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 12
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-HigherLowerEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerEasy-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 13
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=156 --data.path-to-dataset=data/POPGym/popgym-HigherLowerHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerHard-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=52 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 14
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-HigherLowerMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerMedium-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 15
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeEasy-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 16
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=999 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeHard-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=111 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 17
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=834 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeMedium-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=93 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 18
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=93 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreEasy-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=31 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 19
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=378 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreHard-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=126 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 20
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=219 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreMedium-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=73 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 21
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=15 --data.path-to-dataset=data/POPGym/popgym-MineSweeperEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=16 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperEasy-v0/RATE --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=5 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 22
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=21 --data.path-to-dataset=data/POPGym/popgym-MineSweeperHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=64 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperHard-v0/RATE --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=7 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 23
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=18 --data.path-to-dataset=data/POPGym/popgym-MineSweeperMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=36 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperMedium-v/RATE0 --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=6 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 24
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=10 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=2 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditEasy-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 25
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=603 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=30 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=2 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditHard-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=201 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 26
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=20 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=2 --model.n-layer=2 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditMedium-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 27
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=198 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleEasy-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=66 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 28
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=75 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleHard-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=25 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 29
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=114 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleMedium-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=38 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 30
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumEasy-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 31
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumHard-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 32
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=70 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumMedium-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 33
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleEasy-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 34
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=603 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleHard-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=201 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 35
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=300 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=2 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleMedium-v0/RATE --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 36
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumEasy-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 37
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=102 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumHard-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=34 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 38
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=153 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=5 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumMedium-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=51 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 39
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstEasy-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 40
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=834 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstHard-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=93 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 41
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=417 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=40 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=8 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstMedium-v0/RATE --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=47 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 42
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=4 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousEasy-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 43
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=156 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=4 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousHard-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=52 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 44
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=2 --model.n-layer=4 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousMedium-v0/RATE --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 45
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleEasy-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleEasy --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleEasy-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 46
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=597 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleHard-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleHard --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleHard-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 47
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleMedium-v0/ --end-seed=3 --model-mode=RATE --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleMedium --model.ext-len=0 --model.mem-at-end=True --model.mem-len=100 --model.mrv-act=relu --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=30 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleMedium-v0/RATE --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True

# === DT ===
# 0
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-AutoencodeEasy-v0/ --start-seed=1 --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeEasy-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 1
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=312 --data.path-to-dataset=data/POPGym/popgym-AutoencodeHard-v0/ --start-seed=1 --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeHard-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=104 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 2
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-AutoencodeMedium-v0/ --start-seed=1 --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=128 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.2 --model.env-name=popgym-AutoencodeMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --tensorboard-dir=runs/POPGym/AutoencodeMedium-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 3
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=66 --data.path-to-dataset=data/POPGym/popgym-BattleshipEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=64 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipEasy-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=22 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 4
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=147 --data.path-to-dataset=data/POPGym/popgym-BattleshipHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=144 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipHard-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=49 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 5
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=102 --data.path-to-dataset=data/POPGym/popgym-BattleshipMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=100 --model.d-head=128 --model.d-inner=128 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.3 --model.env-name=popgym-BattleshipMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/BattleshipMedium-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=34 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 6
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-ConcentrationEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=52 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationEasy-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 7
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-ConcentrationHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=52 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 8
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-ConcentrationMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=104 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-ConcentrationMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/ConcentrationMedium-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 9
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-CountRecallEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=26 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallEasy-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 10
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=210 --data.path-to-dataset=data/POPGym/popgym-CountRecallHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=16 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallHard-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=70 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 11
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-CountRecallMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=26 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.05 --model.dropout=0.1 --model.env-name=popgym-CountRecallMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/CountRecallMedium-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 12
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-HigherLowerEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerEasy-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 13
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=156 --data.path-to-dataset=data/POPGym/popgym-HigherLowerHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerHard-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=52 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 14
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-HigherLowerMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-HigherLowerMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/HigherLowerMedium-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 15
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeEasy-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 16
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=999 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeHard-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=111 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 17
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=834 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-LabyrinthEscapeMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthEscapeMedium-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=93 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 18
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=93 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreEasy-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=31 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 19
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=378 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreHard-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=126 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 20
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=219 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=32 --model.dropatt=0.2 --model.dropout=0.1 --model.env-name=popgym-LabyrinthExploreMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/LabyrinthExploreMedium-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=73 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 21
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=15 --data.path-to-dataset=data/POPGym/popgym-MineSweeperEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=16 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperEasy-v0/DT --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=5 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 22
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=21 --data.path-to-dataset=data/POPGym/popgym-MineSweeperHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=64 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperHard-v0/DT --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=7 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 23
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=18 --data.path-to-dataset=data/POPGym/popgym-MineSweeperMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=36 --model.d-head=64 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-MineSweeperMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MineSweeperMedium-v0/DT --text=agent_sweep --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=6 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=3 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 24
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=10 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditEasy-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 25
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=603 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=30 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditHard-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=201 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 26
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=20 --model.d-head=128 --model.d-inner=128 --model.d-model=64 --model.dropatt=0.2 --model.dropout=0.3 --model.env-name=popgym-MultiarmedBanditMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/MultiarmedBanditMedium-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 27
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=198 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleEasy-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=66 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 28
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=75 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleHard-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=25 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 29
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=114 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=32 --model.d-inner=128 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-NoisyPositionOnlyCartPoleMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=2 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleMedium-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=38 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 30
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumEasy-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 31
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumHard-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 32
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=32 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.1 --model.dropout=0.3 --model.env-name=popgym-NoisyPositionOnlyPendulumMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumMedium-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 33
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleEasy-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 34
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=603 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleHard-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=201 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 35
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=256 --model.d-model=256 --model.dropatt=0.2 --model.dropout=0 --model.env-name=popgym-PositionOnlyCartPoleMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleMedium-v0/DT --text=popgym --training.batch-size=32 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 36
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumEasy-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 37
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=102 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumHard-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=34 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 38
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=153 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=1 --model.d-head=256 --model.d-inner=64 --model.d-model=128 --model.dropatt=0.05 --model.dropout=0 --model.env-name=popgym-PositionOnlyPendulumMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=4 --model.n-head-ca=0 --model.n-layer=2 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/PositionOnlyPendulumMedium-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=51 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.01 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 39
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstEasy-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 40
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=834 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstHard-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=93 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 41
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=417 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-RepeatFirstMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatFirstMedium-v0/DT --text=popgym --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=139 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 42
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=54 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=4 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousEasy-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=18 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 43
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=156 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=4 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousHard-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=52 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 44
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=105 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=4 --model.d-head=128 --model.d-inner=256 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.1 --model.env-name=popgym-RepeatPreviousMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=4 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/RepeatPreviousMedium-v0/DT --text=popgym --training.batch-size=256 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=35 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 45
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=201 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleEasy-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleEasy --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleEasy-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 46
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=597 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleHard-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleHard --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleHard-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=67 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True
# 47
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=402 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleMedium-v0/ --end-seed=3 --model-mode=DT --model.act-dim=2 --model.d-head=64 --model.d-inner=32 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=popgym-VelocityOnlyCartpoleMedium --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=10 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=-1 --online-inference.best_checkpoint_metric=ReturnsMean_1.0 --online-inference.desired-return-1=1 --online-inference.episode-timeout=1001 --online-inference.use-argmax=False --start-seed=1 --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleMedium-v0/DT --text=popgym --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=134 --training.epochs=400 --training.final-tokens=10000000 --training.grad-norm-clip=5 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.1 --training.online-inference=True --training.sections=1 --training.use-cosine-decay=True --training.warmup-steps=100 --training.weight-decay=0.001 --wandb.project-name=RATE-POPGym --wandb.wwandb=True

# === BC-MLP ===
# 0
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeEasy-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-AutoencodeEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/AutoencodeEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 1
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeHard-v0/ --data.max-length=312 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=312 --training.sections=1 --model.env-name=popgym-AutoencodeHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/AutoencodeHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 2
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeMedium-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-AutoencodeMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/AutoencodeMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 3
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipEasy-v0/ --data.max-length=66 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=66 --training.sections=1 --model.env-name=popgym-BattleshipEasy --model.state-dim=-1 --model.act-dim=64 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/BattleshipEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 4
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipHard-v0/ --data.max-length=147 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=147 --training.sections=1 --model.env-name=popgym-BattleshipHard --model.state-dim=-1 --model.act-dim=144 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/BattleshipHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 5
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipMedium-v0/ --data.max-length=102 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=102 --training.sections=1 --model.env-name=popgym-BattleshipMedium --model.state-dim=-1 --model.act-dim=100 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/BattleshipMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 6
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationEasy-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-ConcentrationEasy --model.state-dim=-1 --model.act-dim=52 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/ConcentrationEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 7
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationHard-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-ConcentrationHard --model.state-dim=-1 --model.act-dim=52 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 8
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationMedium-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-ConcentrationMedium --model.state-dim=-1 --model.act-dim=104 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 9
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-CountRecallEasy --model.state-dim=-1 --model.act-dim=26 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/CountRecallEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 10
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallHard-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-CountRecallHard --model.state-dim=-1 --model.act-dim=16 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/CountRecallHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 11
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-CountRecallMedium --model.state-dim=-1 --model.act-dim=26 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/CountRecallMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 12
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-HigherLowerEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/HigherLowerEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 13
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerHard-v0/ --data.max-length=156 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-HigherLowerHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/HigherLowerHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 14
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-HigherLowerMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/HigherLowerMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 15
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthEscapeEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 16
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeHard-v0/ --data.max-length=999 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=999 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthEscapeHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 17
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeMedium-v0/ --data.max-length=834 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=834 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthEscapeMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 18
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreEasy-v0/ --data.max-length=93 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=93 --training.sections=1 --model.env-name=popgym-LabyrinthExploreEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthExploreEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 19
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreHard-v0/ --data.max-length=378 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=378 --training.sections=1 --model.env-name=popgym-LabyrinthExploreHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthExploreHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 20
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreMedium-v0/ --data.max-length=219 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=219 --training.sections=1 --model.env-name=popgym-LabyrinthExploreMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/LabyrinthExploreMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 21
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperEasy-v0/ --data.max-length=15 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=15 --training.sections=1 --model.env-name=popgym-MineSweeperEasy --model.state-dim=-1 --model.act-dim=16 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MineSweeperEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 22
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperHard-v0/ --data.max-length=21 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=21 --training.sections=1 --model.env-name=popgym-MineSweeperHard --model.state-dim=-1 --model.act-dim=64 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MineSweeperHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 23
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperMedium-v0/ --data.max-length=18 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=18 --training.sections=1 --model.env-name=popgym-MineSweeperMedium --model.state-dim=-1 --model.act-dim=36 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MineSweeperMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 24
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-MultiarmedBanditEasy --model.state-dim=-1 --model.act-dim=10 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MultiarmedBanditEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 25
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditHard-v0/ --data.max-length=603 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=603 --training.sections=1 --model.env-name=popgym-MultiarmedBanditHard --model.state-dim=-1 --model.act-dim=30 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MultiarmedBanditHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 26
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-MultiarmedBanditMedium --model.state-dim=-1 --model.act-dim=20 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/MultiarmedBanditMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 27
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleEasy-v0/ --data.max-length=198 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=198 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 28
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleHard-v0/ --data.max-length=75 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=75 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 29
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleMedium-v0/ --data.max-length=114 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=114 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 30
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumEasy --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 31
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumHard-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumHard --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 32
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumMedium-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumMedium --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 33
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 34
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleHard-v0/ --data.max-length=603 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=603 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 35
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 36
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumEasy --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyPendulumEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 37
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumHard-v0/ --data.max-length=102 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=102 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumHard --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyPendulumHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 38
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumMedium-v0/ --data.max-length=153 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=153 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumMedium --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/PositionOnlyPendulumMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 39
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-RepeatFirstEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatFirstEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 40
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstHard-v0/ --data.max-length=834 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=834 --training.sections=1 --model.env-name=popgym-RepeatFirstHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatFirstHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 41
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstMedium-v0/ --data.max-length=417 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=417 --training.sections=1 --model.env-name=popgym-RepeatFirstMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatFirstMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 42
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-RepeatPreviousEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatPreviousEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 43
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousHard-v0/ --data.max-length=156 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-RepeatPreviousHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatPreviousHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 44
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-RepeatPreviousMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/RepeatPreviousMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 45
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleEasy-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 46
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleHard-v0/ --data.max-length=597 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=597 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleHard-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 47
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=mlp --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleMedium-v0/MLP --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once

# === BC-LSTM ===
# 0
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeEasy-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-AutoencodeEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/AutoencodeEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 1
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeHard-v0/ --data.max-length=312 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=312 --training.sections=1 --model.env-name=popgym-AutoencodeHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/AutoencodeHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 2
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-AutoencodeMedium-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-AutoencodeMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/AutoencodeMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 3
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipEasy-v0/ --data.max-length=66 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=66 --training.sections=1 --model.env-name=popgym-BattleshipEasy --model.state-dim=-1 --model.act-dim=64 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/BattleshipEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 4
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipHard-v0/ --data.max-length=147 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=147 --training.sections=1 --model.env-name=popgym-BattleshipHard --model.state-dim=-1 --model.act-dim=144 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/BattleshipHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 5
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-BattleshipMedium-v0/ --data.max-length=102 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=102 --training.sections=1 --model.env-name=popgym-BattleshipMedium --model.state-dim=-1 --model.act-dim=100 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/BattleshipMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 6
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationEasy-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-ConcentrationEasy --model.state-dim=-1 --model.act-dim=52 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/ConcentrationEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 7
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationHard-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-ConcentrationHard --model.state-dim=-1 --model.act-dim=52 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 8
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-ConcentrationMedium-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-ConcentrationMedium --model.state-dim=-1 --model.act-dim=104 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/ConcentrationHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 9
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-CountRecallEasy --model.state-dim=-1 --model.act-dim=26 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/CountRecallEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 10
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallHard-v0/ --data.max-length=210 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=210 --training.sections=1 --model.env-name=popgym-CountRecallHard --model.state-dim=-1 --model.act-dim=16 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/CountRecallHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 11
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-CountRecallMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-CountRecallMedium --model.state-dim=-1 --model.act-dim=26 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/CountRecallMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 12
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-HigherLowerEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/HigherLowerEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 13
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerHard-v0/ --data.max-length=156 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-HigherLowerHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/HigherLowerHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 14
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-HigherLowerMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=105 --training.sections=1 --model.env-name=popgym-HigherLowerMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/HigherLowerMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 15
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthEscapeEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 16
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeHard-v0/ --data.max-length=999 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=999 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthEscapeHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 17
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthEscapeMedium-v0/ --data.max-length=834 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=834 --training.sections=1 --model.env-name=popgym-LabyrinthEscapeMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthEscapeMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 18
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreEasy-v0/ --data.max-length=93 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=93 --training.sections=1 --model.env-name=popgym-LabyrinthExploreEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthExploreEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 19
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreHard-v0/ --data.max-length=378 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=378 --training.sections=1 --model.env-name=popgym-LabyrinthExploreHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthExploreHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 20
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-LabyrinthExploreMedium-v0/ --data.max-length=219 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=219 --training.sections=1 --model.env-name=popgym-LabyrinthExploreMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/LabyrinthExploreMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 21
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperEasy-v0/ --data.max-length=15 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=15 --training.sections=1 --model.env-name=popgym-MineSweeperEasy --model.state-dim=-1 --model.act-dim=16 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MineSweeperEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 22
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperHard-v0/ --data.max-length=21 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=21 --training.sections=1 --model.env-name=popgym-MineSweeperHard --model.state-dim=-1 --model.act-dim=64 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MineSweeperHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 23
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MineSweeperMedium-v0/ --data.max-length=18 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=18 --training.sections=1 --model.env-name=popgym-MineSweeperMedium --model.state-dim=-1 --model.act-dim=36 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MineSweeperMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 24
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-MultiarmedBanditEasy --model.state-dim=-1 --model.act-dim=10 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MultiarmedBanditEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 25
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditHard-v0/ --data.max-length=603 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=603 --training.sections=1 --model.env-name=popgym-MultiarmedBanditHard --model.state-dim=-1 --model.act-dim=30 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MultiarmedBanditHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 26
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-MultiarmedBanditMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-MultiarmedBanditMedium --model.state-dim=-1 --model.act-dim=20 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/MultiarmedBanditMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 27
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleEasy-v0/ --data.max-length=198 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=198 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 28
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleHard-v0/ --data.max-length=75 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=75 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 29
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyCartPoleMedium-v0/ --data.max-length=114 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=114 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyCartPoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyCartPoleMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 30
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumEasy --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 31
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumHard-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumHard --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 32
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-NoisyPositionOnlyPendulumMedium-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-NoisyPositionOnlyPendulumMedium --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/NoisyPositionOnlyPendulumMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 33
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 34
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleHard-v0/ --data.max-length=603 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=603 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 35
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyCartPoleMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-PositionOnlyCartPoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyCartPoleMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 36
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=201 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumEasy --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyPendulumEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 37
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumHard-v0/ --data.max-length=102 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=102 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumHard --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyPendulumHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 38
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-PositionOnlyPendulumMedium-v0/ --data.max-length=153 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=153 --training.sections=1 --model.env-name=popgym-PositionOnlyPendulumMedium --model.state-dim=-1 --model.act-dim=1 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/PositionOnlyPendulumMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 39
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-RepeatFirstEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatFirstEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 40
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstHard-v0/ --data.max-length=834 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=834 --training.sections=1 --model.env-name=popgym-RepeatFirstHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatFirstHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 41
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatFirstMedium-v0/ --data.max-length=417 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=417 --training.sections=1 --model.env-name=popgym-RepeatFirstMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatFirstMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 42
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousEasy-v0/ --data.max-length=54 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=54 --training.sections=1 --model.env-name=popgym-RepeatPreviousEasy --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatPreviousEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 43
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousHard-v0/ --data.max-length=156 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-RepeatPreviousHard --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatPreviousHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 44
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-RepeatPreviousMedium-v0/ --data.max-length=105 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-RepeatPreviousMedium --model.state-dim=-1 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/RepeatPreviousMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 45
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleEasy-v0/ --data.max-length=201 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=156 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleEasy --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleEasy-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 46
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleHard-v0/ --data.max-length=597 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=597 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleHard --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleHard-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once
# 47
python3 src/train.py --wandb.project-name=RATE-POPGym --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=data/POPGym/popgym-VelocityOnlyCartpoleMedium-v0/ --data.max-length=402 --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=400 --training.ckpt-epoch=50 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=402 --training.sections=1 --model.env-name=popgym-VelocityOnlyCartpoleMedium --model.state-dim=-1 --model.act-dim=2 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=None --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/POPGym/VelocityOnlyCartpoleMedium-v0/LSTM --model-mode=BC --start-seed=1 --end-seed=3 --text=popgym --online-inference.use-argmax=False --online-inference.episode-timeout=1001 --online-inference.desired-return-1=1.0 --online-inference.best_checkpoint_metric=success_once