WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_lr1e-3_new" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4




WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_lr1e-3_wd1e-2" CUDA_VISIBLE_DEVICES=4 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 \
train.weight_decay=1e-2


WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_lr1e-3_wd1" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 \
train.weight_decay=1


# small frac
WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.3_lr1e-3" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.3

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.2_lr1e-3" CUDA_VISIBLE_DEVICES=4 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.2

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.1_lr1e-3" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.1

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_lr1e-3_wd" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4


# wd default params
WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_wd_default" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.3_wd_default" CUDA_VISIBLE_DEVICES=4 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.3

# pca
WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_wd_pca_10" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 wd.use_pca=true wd.pca_k=10

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_wd_pca_10_resolution_100" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 wd.use_pca=true wd.pca_k=10 wd.resolution=100

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_wd_pca_10_pair_10" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 wd.use_pca=true wd.pca_k=10 wd.num_pairs=10

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.4_wd_pca_10_pair_10_sharpness" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.4 wd.use_pca=true wd.pca_k=10 wd.num_pairs=10


WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.3_pair_10_sharpness" CUDA_VISIBLE_DEVICES=3 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.3 wd.use_pca=false wd.num_pairs=10

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.3_pair_200_sharpness_every_100" CUDA_VISIBLE_DEVICES=5 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.3 wd.use_pca=false wd.num_pairs=200 train.eval_every=100

WANDB_API_KEY=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 WANDB_NAME="div_p97_frac0.3_pair_200_sharpness" CUDA_VISIBLE_DEVICES=4 \
python train_grokk.py dataset=mod_division_dataset dataset.p=97 dataset.frac_train=0.3 wd.use_pca=false wd.num_pairs=200