#!/bin/bash

# Check if at least one port argument and GPU device are provided
if [ $# -lt 3 ]; then
    echo "Usage: $0 <gpu_device> <carla_port1> <carla_port2> [additional_training_parameters]"
    exit 1
fi

# Configuration
GPU_DEVICE=$1
CARLA_PORT1=$2
CARLA_PORT2=$3
shift 3
TRAINING_SCRIPT="dreamerv3/train_hansome.py"
COMMON_PARAMS="--dreamerv3.jax.policy_devices 2 --dreamerv3.jax.train_devices 3"
ADDITIONAL_PARAMS="$@"  # Capture all additional parameters passed to the script
LOG_FILE="log_${CARLA_PORT1}_${CARLA_PORT2}.log"

# Clear log file before starting
> $LOG_FILE

# Function to log messages with timestamp
log_with_timestamp() {
    local message=$1
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] $message" >> $LOG_FILE
}

# Function to start or restart CARLA
launch_carla() {
    local port=$1
    if ! pgrep -f "CarlaUE4.sh -RenderOffScreen -carla-port=$port -benchmark -fps=10" > /dev/null; then
        log_with_timestamp "CARLA server is not running on port $port. Starting or restarting..."
        fuser -k ${port}/tcp
        CUDA_VISIBLE_DEVICES=$GPU_DEVICE $CARLA_ROOT/CarlaUE4.sh -RenderOffScreen -carla-port=$port -benchmark -fps=10 &
        while ! nc -z localhost $port; do
            log_with_timestamp "Waiting for CARLA server to start on port $port..."
            sleep 1
        done
        log_with_timestamp "CARLA server is up and running on port $port."
    fi
}

# Function to start the training script
start_training() {
    launch_carla $CARLA_PORT1
    launch_carla $CARLA_PORT2
    local training_command="python -u $TRAINING_SCRIPT $COMMON_PARAMS $ADDITIONAL_PARAMS"
    log_with_timestamp "Starting training with command: $training_command"
    $training_command >> $LOG_FILE 2>&1 &
    echo $!
}

# Function to clean up processes on exit
cleanup() {
    log_with_timestamp "Cleaning up and exiting..."
    fuser -k ${CARLA_PORT1}/tcp
    fuser -k ${CARLA_PORT2}/tcp
    if [ -n "$TRAINING_PID" ]; then
        kill -TERM $TRAINING_PID >/dev/null 2>&1
        wait $TRAINING_PID >/dev/null 2>&1
    fi
    exit
}

# Trap EXIT signal to call the cleanup function
trap cleanup EXIT

# Initialize and start training
log_with_timestamp "Initializing training..."
TRAINING_PID=$(start_training)

# Main loop to monitor and restart if necessary
while true; do
    if ! pgrep -f "$TRAINING_SCRIPT" > /dev/null; then
        log_with_timestamp "Training script crashed. Restarting..."
        TRAINING_PID=$(start_training)
    fi
    launch_carla $CARLA_PORT1
    launch_carla $CARLA_PORT2
    sleep 60
done
