#!/bin/bash 

# hg38

path='/hg38'
cd ${path}

for chr in {1..22} X Y M
do
    file="chr${chr}_tokenized.txt"
    echo ${file}
    awk '{NF-=2; print}' $file > "chr${chr}_tokens.txt"
done

fasta=/datasets/hg38/hg38.fa
cd $WORK_PATH
python seq_split.py --token_path hg38 --fasta ${fasta} --maxlen 512 --nonoverlap_split --tolerance 0.5 \
                    --output_path hg38
