python3 src/sparse_codes_training/experiment.py --base_model_name gpt-neo-125m --reward_function hh_reward --dataset anthropic/hh-rlhf --tied_weights --num_epochs 1 --split train
