#!/bin/bash

# RATE
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=None --data.path-to-dataset=None --end-seed=5 --max-n-final=3 --min-n-final=1 --model-mode=RATE --model.act-dim=4 --model.d-head=32 --model.d-inner=32 --model.d-model=64 --model.dropatt=0 --model.dropout=0.2 --model.env-name=tmaze --model.ext-len=0 --model.mem-at-end=True --model.mem-len=0 --model.mrv-act=relu --model.n-head=4 --model.n-head-ca=2 --model.n-layer=10 --model.num-mem-tokens=15 --model.padding-idx=-10 --model.skip-dec-ffn=True --model.state-dim=4 --online-inference.best_checkpoint_metric=Success_rate --start-seed=1 --tensorboard-dir=runs/TMaze/RATE/T_150 --text=RATE --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=10 --training.context-length=50 --training.epochs=200 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=1000 --training.weight-decay=0.01 --wandb.project-name=RATE-T-Maze --wandb.wwandb=True

# DT
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=None --data.path-to-dataset=None --end-seed=5 --max-n-final=3 --min-n-final=1 --model-mode=DT --model.act-dim=4 --model.d-head=32 --model.d-inner=64 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=tmaze --model.ext-len=0 --model.mem-at-end=False --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=4 --online-inference.best_checkpoint_metric=Success_rate --start-seed=1 --tensorboard-dir=runs/TMaze/DT/T_150 --text=DT --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=10 --training.context-length=50 --training.epochs=200 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=1000 --training.weight-decay=0.001 --wandb.project-name=RATE-T-Maze --wandb.wwandb=True

# RMT
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=None --data.path-to-dataset=None --end-seed=5 --max-n-final=3 --min-n-final=1 --model-mode=RMT --model.act-dim=4 --model.d-head=32 --model.d-inner=64 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=tmaze --model.ext-len=0 --model.mem-at-end=True --model.mem-len=0 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=10 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=4 --online-inference.best_checkpoint_metric=Success_rate --start-seed=1 --tensorboard-dir=runs/TMaze/RMT/T_150 --text=RMT --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=10 --training.context-length=50 --training.epochs=200 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=1000 --training.weight-decay=0.001 --wandb.project-name=RATE-T-Maze --wandb.wwandb=True

# TrXL
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.max-length=None --data.path-to-dataset=None --end-seed=5 --max-n-final=3 --min-n-final=1 --model-mode=TrXL --model.act-dim=4 --model.d-head=32 --model.d-inner=64 --model.d-model=64 --model.dropatt=0.1 --model.dropout=0.2 --model.env-name=tmaze --model.ext-len=0 --model.mem-at-end=True --model.mem-len=450 --model.mrv-act=no_act --model.n-head=8 --model.n-head-ca=0 --model.n-layer=8 --model.num-mem-tokens=0 --model.padding-idx=-10 --model.skip-dec-ffn=False --model.state-dim=4 --online-inference.best_checkpoint_metric=Success_rate --start-seed=1 --tensorboard-dir=runs/TMaze/TrXL/T_150 --text=TrXL --training.batch-size=64 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=10 --training.context-length=50 --training.epochs=200 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0001 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=1000 --training.weight-decay=0.001 --wandb.project-name=RATE-T-Maze --wandb.wwandb=True

# LSDT
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=64 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=200 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.n-layer=2 --model.n-head=2 --model.d-model=64 --model.dropout=0.2 --model.dropatt=0.1 --model.kernel-size=6 --model.convdim=32 --model.padding-idx=-10 --tensorboard-dir=runs/TMaze/LSDT/T_150 --model-mode=LSDT --start-seed=1 --end-seed=5 --text=LSDT --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# BC-MLP
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=64 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=1.0 --training.epochs=50 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=mlp --tensorboard-dir=runs/TMaze/BC_MLP/T_150 --model-mode=BC --start-seed=1 --end-seed=5 --text=BC_MLP --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# BC-LSTM
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=64 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=100 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/TMaze/BC_LSTM/T_150 --model-mode=BC --start-seed=1 --end-seed=5 --text=BC_LSTM --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# CQL-MLP
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=64 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=1.0 --training.epochs=50 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=mlp --tensorboard-dir=runs/TMaze/CQL_MLP/T_150 --model-mode=CQL --model.cql-alpha=5.0 --start-seed=1 --end-seed=5 --text=CQL_MLP --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# CQL-LSTM
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=64 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=100 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/TMaze/CQL_LSTM/T_150 --model-mode=CQL --model.cql-alpha=5.0 --start-seed=1 --end-seed=5 --text=CQL_LSTM --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# DLSTM
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=200 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=lstm --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/TMaze/DLSTM/T_150 --model-mode=DLSTM --start-seed=1 --end-seed=5 --text=DLSTM --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# DGRU
python3 src/train.py --wandb.project-name=RATE-T-Maze --wandb.wwandb=True --data.gamma=1.0 --data.path-to-dataset=None --training.learning-rate=0.0003 --training.lr-end-factor=0.1 --training.beta-1=0.9 --training.beta-2=0.999 --training.weight-decay=0.01 --training.batch-size=128 --training.warmup-steps=100 --training.final-tokens=10_000_000 --training.grad-norm-clip=5.0 --training.epochs=200 --training.ckpt-epoch=10 --training.online-inference=True --training.log-last-segment-loss-only=False --training.use-cosine-decay=False --training.context-length=50 --training.sections=3 --model.env-name=tmaze --model.state-dim=4 --model.act-dim=4 --model.d-model=64 --model.dropout=0.2 --model.padding-idx=-10 --model.backbone=gru --model.lstm_layers=1 --model.bidirectional=False --model.reset_hidden_state_batch=True --tensorboard-dir=runs/TMaze/DGRU/T_150 --model-mode=DLSTM --start-seed=1 --end-seed=5 --text=DGRU --min-n-final=1 --max-n-final=3 --online-inference.best_checkpoint_metric=Success_rate

# DMamba
python3 src/train.py --arch-mode=TrXL --data.gamma=1 --data.path-to-dataset=None --end-seed=5 --max-n-final=3 --min-n-final=1 --model-mode=DMamba --model.act-dim=4 --model.conv-proj=True --model.d-model=64 --model.dropatt=0 --model.dropout=0.2 --model.env-name=tmaze --model.n-head=2 --model.n-layer=2 --model.padding-idx=-10 --model.state-dim=4 --model.token-mixer=mamba --model.window-size=30 --online-inference.best_checkpoint_metric=Success_rate --start-seed=1 --tensorboard-dir=runs/TMaze/DMAMBA/T_150 --text=DMamba --training.batch-size=128 --training.beta-1=0.9 --training.beta-2=0.999 --training.ckpt-epoch=25 --training.context-length=50 --training.epochs=200 --training.final-tokens=10000000 --training.grad-norm-clip=1 --training.learning-rate=0.0003 --training.log-last-segment-loss-only=True --training.lr-end-factor=0.01 --training.online-inference=True --training.sections=3 --training.use-cosine-decay=False --training.warmup-steps=100 --training.weight-decay=0.1 --wandb.project-name=RATE-T-Maze --wandb.wwandb=True