#!/bin/bash
# this bash script relies on HyperQueue, in HyperQueue, we first create a queue of tasks,
# and next we create  a SLURM job that 'fetches' each task in the queue, this is efficient and fast

# first on the login node do:
# module load 2023 (specific module for my cluster)
# module load HyperQueue/0.19.0
# launch the hyperqueue server:
# nohup hq server start &

# submit to the queue the task array, this does not start execution, but only creates a queue of hq_task.sh:
# hq submit --array 0-1 --stdout=none --stderr=none --pin taskset --cpus=1 --time-limit=45min hq_task.sh

# next, you can submit THIS file using sbatch
# sbatch hyperq_job_hpc.sh

# after/during running, yo can check how many tasks there are still in the queue (in this example for job 1)
# hq task list 1 | grep "WAITING" | wc -l

# when the job is don stoe, you can alsop the hyperqueue server, but you can also keep it running and add a new queue:
# hq server stop

#SBATCH --job-name=HQ_Workers
#SBATCH --partition=genoa
#SBATCH --nodes=1
#SBATCH --tasks-per-node 24
#SBATCH --time=00:05:00

module load 2023
module load HyperQueue/0.19.0
module load OpenMPI/4.1.5-GCC-12.3.0

# Start the worker process on each compute node
srun --overlap hq worker start --manager slurm --idle-timeout=5min