###################################################################################
###################################################################################


VIRTUALENV_NAME=extract_merge1

CODE_DIR=/fruitbasket/users/m/project_code/extract_merge1
DATA_DIR=/fruitbasket/users/m/project_data/extract_merge1
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets

cd $CODE_DIR
workon $VIRTUALENV_NAME
export PYTHONPATH="$PYTHONPATH:$CODE_DIR",
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets

###################################################################################
###################################################################################

EXPS_DIR="${DATA_DIR}/ead1"
DATASETS_DIR="${EXPS_DIR}/datasets"
MODELS_DIR="${EXPS_DIR}/models"
FISHER_DIR="${EXPS_DIR}/fishers"
PER_EXAMPLE_FISHERS_DIR="${EXPS_DIR}/per_example_fishers"

###################################################################################

# EXPRESSIONS_FILE="${DATASETS_DIR}/expressions001.3M.txt"
# EXPRESSIONS_FILE="${DATASETS_DIR}/expressions001.no_rw.3M.txt"
# python scripts1/data_gen/antiderivative/generate_expressions.py \
#     --output_file=$EXPRESSIONS_FILE \
#     --n_examples=3_000_000 \
#     --n_processes=32

# EXPRESSIONS_FILE="${DATASETS_DIR}/expressions001.3M.txt"
# python scripts1/data_gen/antiderivative/generate_expressions.py \
#     --output_file=$EXPRESSIONS_FILE \
#     --n_examples=3_000_000 \
#     --n_processes=1


# generate_expressions() {
#     local identifier=$1
#     #
#     # Limit the process to 20GB of RAM to avoid annoying stuff.
#     ulimit -Sv 20000000
#     python scripts1/data_gen/antiderivative/generate_expressions.py \
#         --output_file="${DATASETS_DIR}/expressions001.3M.${identifier}.txt" \
#         --n_examples=3_000_000 \
#         --n_processes=1

# }


generate_expressions() {
    local identifier=$1
    local cycle_duration=5m
    local kill_after=6m
    local flush_every=50
    #
    while true
    do
        # Limit the process to 20GB of RAM to avoid annoying stuff.
        ulimit -Sv 20000000
        # The SIGINT flushes the output buffer.
        timeout --kill-after=$kill_after $cycle_duration \
        python scripts1/data_gen/antiderivative/generate_expressions.py \
            --output_file="${DATASETS_DIR}/expressions001.3M.${identifier}.txt" \
            --n_examples=3_000_000 \
            --flush_every=$flush_every \
            --n_processes=1
    done
}

# generate_expressions 00
# generate_expressions 01
# generate_expressions 02
# generate_expressions 03

# generate_expressions 04
# generate_expressions 05
# generate_expressions 06
# generate_expressions 07

# generate_expressions 08
# generate_expressions 09
# generate_expressions 10
# generate_expressions 11

###################################################################################


wolfram_ead() {
    local identifier=$1
    local seconds=$2
    local max_evaluators=$3
    # CUDA_VISIBLE_DEVICES= python scripts1/data_gen/antiderivative/generate_ead_indicator_wolfram.py \
    #     --input_file="${DATASETS_DIR}/expressions001.3M.${identifier}.txt" \
    #     --output_file="${DATASETS_DIR}/expressions001_ead.3M.${identifier}.${seconds}s.csv" \
    #     --seconds_per_attempt=$seconds \
    #     --max_evaluators=$max_evaluators \
    #     --wolfram_kernel_path="${HOME}/.local/bin/WolframKernel" \
    #     --examples_per_chunk=256
    CUDA_VISIBLE_DEVICES= python scripts1/data_gen/antiderivative/generate_ead_indicator_wolfram3.py \
        --input_file="${DATASETS_DIR}/expressions001.3M.${identifier}.txt" \
        --output_file="${DATASETS_DIR}/expressions001_ead.3M.${identifier}.${seconds}s.csv" \
        --seconds_per_attempt=$seconds \
        --max_evaluators=$max_evaluators \
        --wolfram_kernel_path="${HOME}/.local/bin/WolframKernel" \
        --examples_per_chunk=8 \
        --flush_every=4
}


# wolfram_ead 00 5 8
# wolfram_ead 01 5 8
# wolfram_ead 02 5 8
# wolfram_ead 03 15 8
# wolfram_ead 04 5 8
wolfram_ead 05 5 8


###################################################################################


CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_ds5s01_35k_dev001" \
    --model=bert-base-uncased \
    --from_pt=true \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=expressions001_ead.3M.01.5s \
    --val_split=expressions001_ead.3M.00.5s \
    --batch_size=32 \
    --n_steps=35_000 \
    --sequence_length=128 \
    --learning_rate=2e-5 \
    --clipnorm=0.1

###################################################################################

MODEL=best_base_ead_infix_ds5s01_35k_dev001
PER_EXAMPLES_FISHERS="${MODEL}.ds5s01.no_embeddings.sparse_dynamic_raw.16k.16k.h5"

CUDA_VISIBLE_DEVICES=0 python ./scripts1/data_gen/save_per_example_fishers_to_disk.py  \
    --output_path="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
    --trained_model="${MODELS_DIR}/${MODEL}"  \
    --tokenizer="bert-base-uncased" \
    --from_pt_trained=false \
    --task=ead/infix \
    --n_examples=16384 \
    --batch_size=4 \
    --expectation_wrt_logits=true \
    --flavor=sparse_dynamic_raw \
    --n_fisher_values_per_example=16384 \
    --sequence_length=128 \
    --include_embeddings=false \
    --split=expressions001_ead.3M.00.5s

###################################################################################


VIRTUALENV_NAME=extract_merge1

CODE_DIR=/fruitbasket/users/m/project_code/extract_merge1
DATA_DIR=/fruitbasket/users/m/project_data/extract_merge1
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets

cd $CODE_DIR
workon $VIRTUALENV_NAME
export PYTHONPATH="$PYTHONPATH:$CODE_DIR",
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets


EXPS_DIR="${DATA_DIR}/ead1"
DATASETS_DIR="${EXPS_DIR}/datasets"
MODELS_DIR="${EXPS_DIR}/models"
FISHER_DIR="${EXPS_DIR}/fishers"
PER_EXAMPLE_FISHERS_DIR="${EXPS_DIR}/per_example_fishers"


MODEL=best_base_ead_infix_ds5s01_35k_dev001
PER_EXAMPLES_FISHERS="${MODEL}.ds5s01.no_embeddings.sparse_dynamic_raw.16k.16k.h5"

DECOMP_FILENAME="nmf_decomp.per_sub_block.16k.16k.256.${PER_EXAMPLES_FISHERS}"

run_per_subset_nmf () {
    local device=$1
    local subset_indices=$2
    #        
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/decomp/run_per_subset_nmf.py  \
        --output_path="${PER_EXAMPLE_FISHERS_DIR}/${DECOMP_FILENAME}" \
        --per_example_fishers="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
        --n_examples=16384 \
        --start_fisher_index=0 \
        --end_fisher_index=16384 \
        --nmf_n_components=256 \
        --reduce_threshold=1 \
        --nmf_max_iter=3000 \
        --nmf_tol=1e-8 \
        --pef_embeddings=false \
        --subset_style=per_sub_block \
        --subset_indices="${subset_indices}" \
        --model="${MODELS_DIR}/${MODEL}" \
        --from_pt=false
}

run_per_subset_nmf128 () {
    local device=$1
    local subset_indices=$2
    #        
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/decomp/run_per_subset_nmf.py  \
        --output_path="${PER_EXAMPLE_FISHERS_DIR}/${DECOMP_FILENAME}" \
        --per_example_fishers="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
        --n_examples=16384 \
        --start_fisher_index=0 \
        --end_fisher_index=16384 \
        --nmf_n_components=128 \
        --reduce_threshold=1 \
        --nmf_max_iter=3000 \
        --nmf_tol=1e-8 \
        --pef_embeddings=false \
        --subset_style=per_sub_block \
        --subset_indices="${subset_indices}" \
        --model="${MODELS_DIR}/${MODEL}" \
        --from_pt=false
}


# 25 sub-blocks total.

run_per_subset_nmf 0 0,4,8,12,16,20 #,24
run_per_subset_nmf 1 1,5,9,13,17,21
run_per_subset_nmf 2 2,6,10,14,18,22
run_per_subset_nmf 3 3,7,11,15,19 #,23

run_per_subset_nmf128 0 24
run_per_subset_nmf128 1 23

run_per_subset_nmf128 0 0
run_per_subset_nmf128 1 10


###################################################################################
###################################################################################


CUDA_VISIBLE_DEVICES= python ./scripts1/data_gen/antiderivative/clean_up_ead_datasets.py \
    --train_files="${DATASETS_DIR}/expressions001_ead.3M.02.5s.csv,${DATASETS_DIR}/expressions001_ead.3M.01.5s.csv" \
    --validation_files="${DATASETS_DIR}/expressions001_ead.3M.00.5s.csv" \
    --output_path="${DATASETS_DIR}/ead_ds_001.csv"

###################################################################################


CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_150k_dev002" \
    --model=bert-base-uncased \
    --from_pt=true \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=ead_ds_001.train \
    --val_split=ead_ds_001.validation \
    --batch_size=32 \
    --n_steps=150_000 \
    --sequence_length=128 \
    --learning_rate=2e-5 \
    --clipnorm=0.1

###################################################################################

MODEL=best_base_ead_infix_150k_dev002
PER_EXAMPLES_FISHERS="${MODEL}.val.no_embeddings.sparse_dynamic_raw.16k.16k.h5"

CUDA_VISIBLE_DEVICES=0 python ./scripts1/data_gen/save_per_example_fishers_to_disk.py  \
    --output_path="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
    --trained_model="${MODELS_DIR}/${MODEL}"  \
    --tokenizer="bert-base-uncased" \
    --from_pt_trained=false \
    --task=ead/infix \
    --n_examples=16384 \
    --batch_size=4 \
    --expectation_wrt_logits=true \
    --flavor=sparse_dynamic_raw \
    --n_fisher_values_per_example=16384 \
    --sequence_length=128 \
    --include_embeddings=false \
    --split=ead_ds_001.validation


###################################################################################

MODEL=best_base_ead_infix_150k_dev002
PER_EXAMPLES_FISHERS="${MODEL}.val.no_embeddings.sparse_dynamic_raw.16k.16k.h5"

DECOMP_FILENAME="nmf_decomp.per_sub_block.16k.16k.256.${PER_EXAMPLES_FISHERS}"

run_per_subset_nmf () {
    local device=$1
    local subset_indices=$2
    #        
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/decomp/run_per_subset_nmf.py  \
        --output_path="${PER_EXAMPLE_FISHERS_DIR}/${DECOMP_FILENAME}" \
        --per_example_fishers="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
        --n_examples=16384 \
        --start_fisher_index=0 \
        --end_fisher_index=16384 \
        --nmf_n_components=256 \
        --reduce_threshold=1 \
        --nmf_max_iter=3000 \
        --nmf_tol=1e-8 \
        --pef_embeddings=false \
        --subset_style=per_sub_block \
        --subset_indices="${subset_indices}" \
        --model="${MODELS_DIR}/${MODEL}" \
        --from_pt=false
}

run_per_subset_nmf32 () {
    local device=$1
    local subset_indices=$2
    #        
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/decomp/run_per_subset_nmf.py  \
        --output_path="${PER_EXAMPLE_FISHERS_DIR}/${DECOMP_FILENAME}" \
        --per_example_fishers="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
        --n_examples=16384 \
        --start_fisher_index=0 \
        --end_fisher_index=16384 \
        --nmf_n_components=32 \
        --reduce_threshold=1 \
        --nmf_max_iter=3000 \
        --nmf_tol=1e-6 \
        --pef_embeddings=false \
        --subset_style=per_sub_block \
        --subset_indices="${subset_indices}" \
        --model="${MODELS_DIR}/${MODEL}" \
        --from_pt=false
}

run_per_subset_nmf 0 0,4,8,12,16,20,24
run_per_subset_nmf 1 1,5,9,13,17,21
run_per_subset_nmf 2 2,6,10,14,18,22
run_per_subset_nmf 3 3,7,11,15,19,23

run_per_subset_nmf256 0 0
run_per_subset_nmf32 0 24
run_per_subset_nmf256 0 10,11
run_per_subset_nmf256 0 0,12

###################################################################################
###################################################################################


CUDA_VISIBLE_DEVICES= python ./scripts1/data_gen/antiderivative/clean_up_ead_datasets.py \
    --train_files="${DATASETS_DIR}/expressions001_ead.3M.04.5s.csv" \
    --validation_files="${DATASETS_DIR}/expressions001_ead.3M.05.5s.csv" \
    --output_path="${DATASETS_DIR}/ead_ds_002.csv"

###################################################################################


CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_75k_dev003" \
    --model=bert-base-uncased \
    --from_pt=true \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=ead_ds_002.train \
    --val_split=ead_ds_002.validation \
    --batch_size=32 \
    --n_steps=75_000 \
    --sequence_length=128 \
    --learning_rate=2e-5 \
    --clipnorm=0.1

###################################################################################

MODEL=best_base_ead_infix_75k_dev003
PER_EXAMPLES_FISHERS="${MODEL}.val.no_embeddings_pooler.sparse_dynamic_raw.16k.16k.h5"

CUDA_VISIBLE_DEVICES=0 python ./scripts1/data_gen/save_per_example_fishers_to_disk.py  \
    --output_path="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
    --trained_model="${MODELS_DIR}/${MODEL}"  \
    --tokenizer="bert-base-uncased" \
    --from_pt_trained=false \
    --task=ead/infix \
    --n_examples=16384 \
    --batch_size=4 \
    --expectation_wrt_logits=true \
    --flavor=sparse_dynamic_raw \
    --n_fisher_values_per_example=16384 \
    --sequence_length=128 \
    --include_embeddings=false \
    --include_pooler=false \
    --split=ead_ds_002.validation


###################################################################################


VIRTUALENV_NAME=extract_merge1

CODE_DIR=/fruitbasket/users/m/project_code/extract_merge1
DATA_DIR=/fruitbasket/users/m/project_data/extract_merge1
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets

cd $CODE_DIR
workon $VIRTUALENV_NAME
export PYTHONPATH="$PYTHONPATH:$CODE_DIR",
export TFDS_DATA_DIR=/fruitbasket/datasets/tensorflow_datasets


EXPS_DIR="${DATA_DIR}/ead1"
DATASETS_DIR="${EXPS_DIR}/datasets"
MODELS_DIR="${EXPS_DIR}/models"
FISHER_DIR="${EXPS_DIR}/fishers"
PER_EXAMPLE_FISHERS_DIR="${EXPS_DIR}/per_example_fishers"


MODEL=best_base_ead_infix_75k_dev003
PER_EXAMPLES_FISHERS="${MODEL}.val.no_embeddings_pooler.sparse_dynamic_raw.16k.16k.h5"

DECOMP_FILENAME="nmf_decomp.per_sub_block.16k.16k.256.${PER_EXAMPLES_FISHERS}"

run_per_subset_nmf () {
    local device=$1
    local n_components=$2
    local subset_indices=$3
    #        
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/decomp/run_per_subset_nmf.py  \
        --output_path="${PER_EXAMPLE_FISHERS_DIR}/${DECOMP_FILENAME}" \
        --per_example_fishers="${PER_EXAMPLE_FISHERS_DIR}/${PER_EXAMPLES_FISHERS}" \
        --n_examples=16384 \
        --start_fisher_index=0 \
        --end_fisher_index=16384 \
        --nmf_n_components=$n_components \
        --reduce_threshold=1 \
        --nmf_max_iter=3000 \
        --nmf_tol=1e-8 \
        --pef_embeddings=false \
        --pef_pooler=false \
        --subset_style=per_sub_block \
        --subset_indices="${subset_indices}" \
        --model="${MODELS_DIR}/${MODEL}" \
        --from_pt=false
}

# run_per_subset_nmf 0 256 0,4,8,12,16,20,2,6,10,14,18,22
# run_per_subset_nmf 1 256 1,5,9,13,17,21,3,7,11,15,19
run_per_subset_nmf 0 64 23


###################################################################################
###################################################################################


CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_150k.random_init.dev001" \
    --hidden_size=768 \
    --num_hidden_layers=12 \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=ead_ds_002.train \
    --val_split=ead_ds_002.validation \
    --batch_size=32 \
    --n_steps=150_000 \
    --sequence_length=128 \
    --learning_rate=3e-5 \
    --clipnorm=0.1


CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_75k.random_init.dev002" \
    --hidden_size=768 \
    --num_hidden_layers=12 \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=ead_ds_002.train \
    --val_split=ead_ds_002.validation \
    --batch_size=128 \
    --n_steps=75_000 \
    --sequence_length=128 \
    --learning_rate=3e-5 \
    --clipnorm=0.1


######################################################
######################################################

CUDA_VISIBLE_DEVICES= python ./scripts1/data_gen/antiderivative/generate_scomp_ds.py \
    --scomp=exex \
    --expressions_csv="${DATASETS_DIR}/ead_ds_002.train.csv" \
    --output_file="${DATASETS_DIR}/scomp_exex_001.0.csv" \
    --n_examples=1_000_000

CUDA_VISIBLE_DEVICES= python ./scripts1/data_gen/antiderivative/generate_scomp_ds.py \
    --scomp=exex \
    --expressions_csv="${DATASETS_DIR}/ead_ds_002.validation.csv" \
    --output_file="${DATASETS_DIR}/scomp_exex_001.1.csv" \
    --n_examples=1_000_000

######################################################

CUDA_VISIBLE_DEVICES= python ./scripts1/data_gen/antiderivative/clean_up_ead_datasets.py \
    --train_files="${DATASETS_DIR}/scomp_exex_001.0.csv" \
    --validation_files="${DATASETS_DIR}/scomp_exex_001.1.csv" \
    --output_path="${DATASETS_DIR}/scomp_exex_001.csv"

######################################################

CUDA_VISIBLE_DEVICES=0 python ./scripts1/training/finetune.py  \
    --output_path="${MODELS_DIR}/best_base_ead_infix_75k_dev003_to_scomp_exex_001_15k" \
    --model="${MODELS_DIR}/best_base_ead_infix_75k_dev003" \
    --from_pt=false \
    --tokenizer=bert-base-uncased \
    --task=ead/infix \
    --split=scomp_exex_001.train \
    --val_split=scomp_exex_001.validation \
    --batch_size=32 \
    --n_steps=15_000 \
    --sequence_length=128 \
    --learning_rate=2e-5 \
    --clipnorm=0.1

######################################################

compute_fisher_exex() {
    local device=$1
    local epoch=$2
    local n_examples=$3
    #
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/ogmm/compute_fisher.py \
        --model="${MODELS_DIR}/best_base_ead_infix_75k_dev003_to_scomp_exex_001_15k_epoch${epoch}" \
        --from_pt=false \
        --task=ead/infix \
        --split=scomp_exex_001.train \
        --batch_size=16 \
        --sequence_length=128 \
        --tokenizer=bert-base-uncased \
        --fisher_path="${FISHER_DIR}/best_base_ead_scomp_exex_15k_001.exex_train.epoch${epoch}.${n_examples}_examples.h5" \
        --n_examples=$n_examples \
        --skip=500_000
}       
    
compute_fisher_exex 0 9 8096

######################################################

compute_fisher_general() {
    local device=$1
    local n_examples=$2
    #
    CUDA_VISIBLE_DEVICES=$device python ./scripts1/ogmm/compute_fisher.py \
        --model="${MODELS_DIR}/best_base_ead_infix_75k_dev003" \
        --from_pt=false \
        --task=ead/infix \
        --split=ead_ds_002.train \
        --batch_size=16 \
        --sequence_length=128 \
        --tokenizer=bert-base-uncased \
        --fisher_path="${FISHER_DIR}/best_base_ead_infix_75k_dev003.ead_ds_002_train.${n_examples}_examples.h5" \
        --n_examples=$n_examples
}       
    
compute_fisher_general 0 8096


######################################################

# Then run the clean-up datasets to deduplicate train/val examples.
# Need to make accessable via the datasets task_ri, maybe already there
#
# Then start running experiments, simplest baseline is to fine-tune trained EAD model,
# then compute Fisher, then merge with original.
#
# There are lots of variations on this and other methods to try as well!


###################################################################################
###################################################################################


USER=m
SERVER=banana

# FILENAME="expressions001_ead.3M.00.5s.csv"
# FILENAME="expressions001_ead.3M.01.5s.csv"
# FILENAME="expressions001_ead.3M.02.5s.csv"
FILENAME="expressions001_ead.3M.04.5s.csv"

rsync -ra -e ssh \
    --exclude "*/__pycache__" \
    --exclude "*/.git" \
    "${USER}@${SERVER}.cs.unc.edu:/fruitbasket/users/m/project_data/extract_merge1/ead1/datasets/${FILENAME}" \
    "${HOME}/Desktop/projects_data/extract_merge1/antiderivative/datasets/${FILENAME}"


###################################################################################
###################################################################################
#
# Finetuning of "${MODELS_DIR}/best_base_ead_infix_ds5s01_35k_dev001"
#
# 3500/3500 [==============================] - 752s 209ms/step - loss: 0.2473 - sparse_categorical_accuracy: 0.8973 - val_loss: 0.1126 - val_sparse_categorical_accuracy: 0.9683
# Epoch 2/10
# 3500/3500 [==============================] - 726s 207ms/step - loss: 0.1002 - sparse_categorical_accuracy: 0.9684 - val_loss: 0.0783 - val_sparse_categorical_accuracy: 0.9751
# Epoch 3/10
# 3500/3500 [==============================] - 726s 207ms/step - loss: 0.0887 - sparse_categorical_accuracy: 0.9726 - val_loss: 0.0875 - val_sparse_categorical_accuracy: 0.9756
# Epoch 4/10
# 3500/3500 [==============================] - 729s 208ms/step - loss: 0.0906 - sparse_categorical_accuracy: 0.9737 - val_loss: 0.0798 - val_sparse_categorical_accuracy: 0.9800
# Epoch 5/10
# 3500/3500 [==============================] - 722s 206ms/step - loss: 0.0816 - sparse_categorical_accuracy: 0.9779 - val_loss: 0.0960 - val_sparse_categorical_accuracy: 0.9805
# Epoch 6/10
# 3500/3500 [==============================] - 724s 207ms/step - loss: 0.0824 - sparse_categorical_accuracy: 0.9791 - val_loss: 0.0826 - val_sparse_categorical_accuracy: 0.9814
# Epoch 7/10
# 3500/3500 [==============================] - 723s 206ms/step - loss: 0.0706 - sparse_categorical_accuracy: 0.9814 - val_loss: 0.0700 - val_sparse_categorical_accuracy: 0.9834
# Epoch 8/10
# 3500/3500 [==============================] - 723s 207ms/step - loss: 0.0661 - sparse_categorical_accuracy: 0.9821 - val_loss: 0.0756 - val_sparse_categorical_accuracy: 0.9863
# Epoch 9/10
# 3500/3500 [==============================] - 721s 206ms/step - loss: 0.0611 - sparse_categorical_accuracy: 0.9839 - val_loss: 0.0708 - val_sparse_categorical_accuracy: 0.9819
# Epoch 10/10
# 3500/3500 [==============================] - 724s 207ms/step - loss: 0.0620 - sparse_categorical_accuracy: 0.9855 - val_loss: 0.0602 - val_sparse_categorical_accuracy: 0.9849
