export REPO_DIR="/path/to/PopT-Draft"

# Create BrainBERT encoded pretraining data 
output_directory=${REPO_DIR}/saved_examples/all_pretrain_trials 
python3 -m data.write_nsp_pretraining_data \
    +preprocessor=multi_elec_spec_pretrained \
    ++preprocessor.upstream_ckpt=${REPO_DIR}/pretrained_weights/stft_large_pretrained.pth \
    +data=pretraining_subject_data_template \
    +data_prep=pretrain_multi_subj_multi_chan_template \
    ++data_prep.task_name=nsp_pretraining \
    ++data_prep.brain_runs=${REPO_DIR}/trial_selections/all_pretrain_trials.json \
    ++data_prep.electrodes=${REPO_DIR}/electrode_selections/clean_laplacian.json \
    ++data_prep.output_directory=${output_directory} \
    ++data.cached_transcript_aligns=${REPO_DIR}/semantics/saved_aligns \
    ++data.duration=5 \
    ++hydra.run.dir="outputs/\${now:%Y-%m-%d}/\${now:%H-%M-%S}_all_pretrain_trials" 



## Create combined manifest tenc for PopT pretraining 
manifest_directory=${REPO_DIR}/saved_examples/all_pretrain_trials_nsp
python3 -m data.make_pretrain_replace_manifest +data_prep=combine_nsp_datasets ++data_prep.source_dir=${output_directory} ++data_prep.output_dir=${manifest_directory} ++data_prep.task="nsp_negative_any"
