# Main training framework for language models

## Training scripts

Here is an example training script. We will release all the scripts upon acceptance.

```
export NGPU=4
export CUDA_VISIBLE_DEVICES=0,1,2,3

export HF_HOME=

conda activate rl

MODEL_CONFIG=configs/synchybrid_qlt_pos_ch64_memgate_noconv_340M.json
TOKENIZER=fla-hub/transformer-1.3B-100B
DUMP=

DATASET=HuggingFaceFW/fineweb-edu
DATASET_NAME=sample-100BT

STEPS=28672

bash train.sh \
  --job.config_file flame/models/fla.toml \
  --job.dump_folder ${DUMP} \
  --model.config ${MODEL_CONFIG} \
  --model.tokenizer_path ${TOKENIZER} \
  --optimizer.name AdamW \
  --optimizer.eps 1e-15 \
  --optimizer.lr 1e-3 \
  --lr_scheduler.warmup_steps 1024 \
  --lr_scheduler.lr_min 0.1 \
  --lr_scheduler.decay_type cosine \
  --training.batch_size 16 \
  --training.seq_len 2048 \
  --training.gradient_accumulation_steps 4 \
  --training.steps ${STEPS} \
  --training.max_norm 1.0 \
  --training.skip_nan_inf \
  --training.dataset ${DATASET} \
  --training.dataset_name ${DATASET_NAME} \
  --training.dataset_split train \
  --training.num_workers 12 \
  --training.prefetch_factor 2 \
  --training.seed 42 \
  --training.compile \
  --checkpoint.interval 2048 \
  --checkpoint.load_step -1 \
  --checkpoint.keep_latest_k 2 \
  --metrics.log_freq 1
```

## Evaluation scripts

```
module load cuda/12.2.0-fasrc01

export NGPU=1
export CUDA_VISIBLE_DEVICES=0

export HF_HOME=

conda activate rl

CHECKPOINT=

python -m evals.harness --model hf \
  --model_args pretrained=$CHECKPOINT,dtype=bfloat16 \
  --tasks wikitext,lambada_openai,piqa,hellaswag,winogrande,arc_easy,arc_challenge,squad_completion,swde \
  --batch_size 1 \
  --num_fewshot 0 \
  --device cuda \
  --show_config
```
