#!/bin/bash
#SBATCH --job-name=Pile-github-Data-Qwen2.5-1.5B-hook-mlp-out # Job name
#SBATCH --partition=gpu # Partition name
#SBATCH --qos=YOUR_QOS_NAME
#SBATCH --gres=gpu:h100:1
#SBATCH --ntasks=1 
#SBATCH --cpus-per-task=8
#SBATCH --time=24:00:00 
#SBATCH --output=slurm_output/Pile-github-Data-Qwen2.5-1.5B-hook-mlp-out-%j.out
#SBATCH --error=slurm_output/Pile-github-Data-Qwen2.5-1.5B-hook-mlp-out-%j.err
#SBATCH --requeue 

echo '-------------------------------'
cd ${SLURM_SUBMIT_DIR}
echo ${SLURM_SUBMIT_DIR}
echo Running on host $(hostname)
echo Time is $(date)
echo SLURM_NODES are $(echo ${SLURM_NODELIST})
echo '-------------------------------'
echo -e '\n\n'

export PROCS=${SLURM_CPUS_ON_NODE}

# Set the working directory
cd /path/to/your/project/SAE_Demystified/Pile-Qwen2.5-1.5B-hook-mlp-out-preprocessing

module load miniconda
conda activate INTERP

# Run the Python script
python core/data_preprocess.py --max_length 1024 --batch_size 64 --layer 2 13 26 --dataset timaeus/pile-github --truncate_to_max_length --output_file /path/to/your/scratch/Pile_github-Qwen2.5-1.5B-L2-mlp-out-2048/train_data.h5 /path/to/your/scratch/Pile_github-Qwen2.5-1.5B-L12-mlp-out-2048/train_data.h5 /path/to/your/scratch/Pile_github-Qwen2.5-1.5B-L26-mlp-out-2048/train_data.h5
