#!/bin/bash
#SBATCH -J eval_Vanilla_CC3M
#SBATCH --exclusive
#SBATCH --account=bsc70
#SBATCH --qos=acc_bsccs
#SBATCH --output=slurm_output/out.txt
#SBATCH --error=slurm_output/err.txt
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=4
#SBATCH --time=04:00:00
#SBATCH --gres=gpu:4

echo "START TIME: $(date)"

# export SINGULARITY_TMPDIR=/dev/shm/
module purge
module load singularity

# Set environment variables for PyTorch distributed training
export MASTER_ADDR=$(hostname)
export MASTER_PORT=29500  # Ensure this port is free
export WORLD_SIZE=$SLURM_NTASKS  # Total number of processes (4 GPUs)
export NCCL_DEBUG=INFO  # Enable debugging logs for NCCL
export NCCL_P2P_DISABLE=1  # Prevents certain deadlocks
export NCCL_IB_DISABLE=1

export WANDB_MODE=offline

singularity exec --nv /gpfs/projects/bsc70/heka/singularity/synthetic-data/gm_18_08.sif \
    accelerate launch \
    --multi-gpu \
    -m t2i.generate_t2i \
    --ckpt="/gpfs/projects/bsc70/bsc193242/t2i_models/MMDiT-Vanilla-CFG-CC3M/checkpoints/0400000.pt" \
    --sample-dir="/gpfs/projects/bsc70/bsc193242/t2i_models/MMDiT-Vanilla-CFG-CC3M" \
    --vae-ckpt="/gpfs/projects/bsc70/bsc193242/Models/sd-vae-ft-mse" \
    --dataset-path="/gpfs/projects/bsc70/bsc193242/Data/coco256_features" \
    --per-proc-batch-size=64 \
    --num-fid-samples=40192 \
    --cfg-scale=4.0 \
    --projector-embed-dims="768" \
    --num-steps=50 \

singularity exec --nv /gpfs/projects/bsc70/heka/singularity/synthetic-data/gm_18_08.sif \
    accelerate launch \
    --num-processes=1 \
    -m t2i.evaluation.eval \
    --path_original="/gpfs/projects/bsc70/bsc193242/Data/fid_stats_mscoco256_val.npz" \
    --path_generated="/gpfs/projects/bsc70/bsc193242/t2i_models/MMDiT-Vanilla-CFG-CC3M/coco-size-256-cfg-4.0-seed-0-ode" \
    --batch_size=64 \
    --dims=2048 \
    --num_workers=8 \