#!/usr/bin/env bash
set -euo pipefail

retry_eval() {
    local desc="$1"; shift
    local max_attempts=5 
    local attempt=1
    while true; do
        echo "Attempt $attempt for $desc"
        if "$@"; then
            echo "$desc succeeded"
            return 0
        fi
        
        if [ $attempt -ge $max_attempts ]; then
            echo "❌ $desc failed after $max_attempts attempts. Skipping..."
            return 1
        fi
        
        echo "Attempt $attempt for $desc failed. Retrying in 5 seconds..."
        sleep 5
        attempt=$((attempt + 1))
    done
}

cleanup() {
    echo "🧹 Cleaning up OpenHands runtime containers and volumes..."
    docker ps -a | grep openhands-runtime | awk '{print $1}' | xargs -r docker rm -f
    docker volume prune -f
    echo "✅ Cleanup completed"
}

BASE_DIR="/root/OpenAgentSafety/workspaces/mcp-tasks/filesystem/multi-turn-tasks"
OUTPUT_DIR="/root/OpenAgentSafety/evaluation/test_output/single_turn_gpt4o_filesystem_4oexp"
AGENT_CFG="agent"
ENV_CFG="env"
SERVER_HOST="localhost"
REMOTE_HOST="64.176.198.19"
AGENT_CFG_FILE="agent_config/config.toml"
ENV_CFG_FILE="agent_config/config.toml"
EXPERIENCE_PATH="/root/OpenAgentSafety/self_exploration/experience_list.json"
for task_dir in "$BASE_DIR"/*; do
    if [[ -d "$task_dir" ]]; then
        echo "Running task: $task_dir"
        # Use 'if' to handle the failure and continue to next task
        if ! retry_eval "multi-turn task $task_dir" \
            poetry run python run_eval.py \
                --agent-llm-config "$AGENT_CFG" \
                --agent-llm-config-file "$AGENT_CFG_FILE" \
                --env-llm-config "$ENV_CFG" \
                --env-llm-config-file "$ENV_CFG_FILE" \
                --outputs-path "$OUTPUT_DIR" \
                --server-hostname "$REMOTE_HOST" \
                --use-experience "$EXPERIENCE_PATH" \
                --task-path "$task_dir"; then
            echo "⚠️  Skipping to next task due to failure..."
        fi
        
        cleanup
        # remove every ghcr.io/all-hands-ai/runtime image, even if referenced multiple times
        docker images | awk '/ghcr.io\/all-hands-ai\/runtime/ {print $3}' | xargs -r docker rmi -f

        # sanity check
        docker images | grep ghcr.io/all-hands-ai/runtime || true
        echo "Task processed: $task_dir"
        echo "----------------------------------------"
    fi
done