#!/bin/bash

# Default values
DATA_DIR="data/flare_subsampled"
EXP_NAME=""  # Default value removed
TASKS=""  # Empty means all tasks
MODEL="gpt-4o"
SAMPLE_SIZES="100"
TEST_SETS="test"
PROMPT_BEFORE_EXEMPLARS="prompts/prompts_before_exemplars.txt"
PROMPT_AFTER_EXEMPLARS="prompts/zsr_prompt.txt"
PROMPT_TYPE="zsr_prompt"
RESULTS_DIR="results"
MAX_PARALLEL=4
MODE="all"  # all, create, submit, resubmit, process
SEED=42  # Default seed value
TOKEN_SUBSTITUTION_STRATEGY="many2one"
ONE2MANY_TIMESTEP="5"

# Function to show usage
show_usage() {
    echo "Usage: $0 [options]"
    echo "Options:"
    echo "  --data_dir <dir>           Data directory (default: data/flare)"
    echo "  --exp_name <name>          Experiment name (required)"
    echo "  --tasks <tasks>            Comma-separated list of tasks (empty for all tasks)"
    echo "  --model <model>            Model name (required)"
    echo "  --sample_sizes <sizes>     Comma-separated list of sample sizes (default: 100)"
    echo "  --test_sets <sets>         Comma-separated list of test sets (default: test)"
    echo "  --prompt_before_exemplars <file> Prompt template file before examples (default: base_prompt_before_exemplars.txt)"
    echo "  --prompt_after_exemplars <file> Prompt template file after examples (default: prompt_after_exemplars_zero_shot.txt)"
    echo "  --prompt_type <type>         Prompt type (default: zsr_prompt)"
    echo "  --results_dir <dir>        Results directory (default: results)"
    echo "  --max_parallel <n>         Maximum parallel jobs (default: 4)"
    echo "  --mode <mode>              Workflow mode: all, create, submit, resubmit, process (default: all)"
    echo "  --seed <seed>              Random seed for reproducibility (default: 42)"
    echo "  --token_substitution_strategy <strategy> Token substitution strategy (default: many2one)"
    echo "  --one2many_timestep <timestep> Timestep for one2many token substitution (default: 5)"
}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --data_dir)
            DATA_DIR="$2"
            shift 2
            ;;
        --exp_name)
            EXP_NAME="$2"
            shift 2
            ;;
        --tasks)
            TASKS="$2"  # Comma-separated list of tasks, empty for all tasks
            shift 2
            ;;
        --model)
            MODEL="$2"
            shift 2
            ;;
        --sample_sizes)
            SAMPLE_SIZES="$2"
            shift 2
            ;;
        --test_sets)
            TEST_SETS="$2"
            shift 2
            ;;
        --prompt_before_exemplars)
            PROMPT_BEFORE_EXEMPLARS="$2"
            shift 2
            ;;
        --prompt_after_exemplars)
            PROMPT_AFTER_EXEMPLARS="$2"
            shift 2
            ;;
        --prompt_type)
            PROMPT_TYPE="$2"
            shift 2
            ;;
        --results_dir)
            RESULTS_DIR="$2"
            shift 2
            ;;
        --max_parallel)
            MAX_PARALLEL="$2"
            shift 2
            ;;
        --mode)
            MODE="$2"
            shift 2
            ;;
        --seed)
            SEED="$2"
            shift 2
            ;;
        --token_substitution_strategy)
            TOKEN_SUBSTITUTION_STRATEGY="$2"
            shift 2
            ;;
        --one2many_timestep)
            ONE2MANY_TIMESTEP="$2"
            shift 2
            ;;
        *)
            echo "Unknown parameter: $1"
            exit 1
            ;;
    esac
done

# Check required parameters
if [[ -z "$EXP_NAME" ]]; then
    echo "Error: exp_name is required"
    show_usage
    exit 1
fi

# Create experiment directories
BATCH_JOBS_DIR="batch_jobs/$EXP_NAME"
BATCH_REQUESTS_DIR="batch_requests/$EXP_NAME"
mkdir -p "$BATCH_JOBS_DIR" "$BATCH_REQUESTS_DIR"

# Function to get all available tasks
get_all_tasks() {
    local tasks=()
    for dir in "$DATA_DIR"/*/; do
        if [ -d "$dir" ]; then
            tasks+=($(basename "$dir"))
        fi
    done
    echo "${tasks[@]}"
}

# Check mode and required parameters
case $MODE in
    create|all)
        if [[ -z "$MODEL" ]]; then
            echo "Error: model is required for create mode"
            show_usage
            exit 1
        fi
        # Tasks can be empty (in that case, all tasks will be processed)
        ;;
    submit|resubmit|process)
        # Model name not required
        ;;
    *)
        echo "Error: Invalid mode. Must be one of: all, create, submit, resubmit, process"
        show_usage
        exit 1
        ;;
esac

# Get list of tasks to process
if [[ -z "$TASKS" ]]; then
    # Get all available tasks
    TASK_LIST=($(get_all_tasks))
    echo "No tasks specified, using all available tasks: ${TASK_LIST[@]}"
else
    # Use specified tasks
    IFS=',' read -ra TASK_LIST <<< "$TASKS"
    echo "Processing specified tasks: ${TASK_LIST[@]}"
fi

# Function to create jobs
create_jobs() {
    echo "Creating batch jobs..."

    for task in "${TASK_LIST[@]}"; do
        echo "Creating job for task: $task"

        # Construct create command
        CREATE_CMD="bash ./src/openai/create_batch_job.sh \
            --exp_name $EXP_NAME \
            --task $task \
            --model $MODEL \
            --data_dir $DATA_DIR \
            --sample_sizes $SAMPLE_SIZES \
            --test_sets $TEST_SETS \
            --prompt_before_exemplars $PROMPT_BEFORE_EXEMPLARS \
            --prompt_after_exemplars $PROMPT_AFTER_EXEMPLARS \
            --seed $SEED \
            --token_substitution_strategy $TOKEN_SUBSTITUTION_STRATEGY \
            --one2many_timestep $ONE2MANY_TIMESTEP"

        echo "Executing command: $CREATE_CMD"
        eval $CREATE_CMD

        if [ $? -ne 0 ]; then
            echo "Error: create_batch_job.sh failed"
            exit 1
        fi
    done
}

# Function to submit new jobs
submit_jobs() {
    echo "Submitting new batch jobs..."

    # Check for existing metadata files
    metadata_files=($(find "$BATCH_JOBS_DIR" -maxdepth 1 -name "*_metadata.json"))
    if [ ${#metadata_files[@]} -gt 0 ]; then
        echo "Error: Found existing metadata files. This suggests jobs are already submitted."
        echo "Use --mode process to check their status,"
        echo "or --mode resubmit to retry failed jobs."
        exit 1
    fi

    # Warning if failed_jobs.txt exists
    if [ -f "$BATCH_JOBS_DIR/failed_jobs.txt" ]; then
        echo "Warning: There are failed jobs from previous runs."
        echo "Use --mode resubmit to retry failed jobs."
        echo "Continue submitting new jobs? [y/N]"
        read answer
        if [[ ! "$answer" =~ ^[Yy]$ ]]; then
            exit 1
        fi
    fi

    bash ./src/openai/submit_batch_jobs.sh --max_parallel $MAX_PARALLEL --jobs_dir "$BATCH_JOBS_DIR"
}

# Function to resubmit failed jobs
resubmit_jobs() {
    echo "Resubmitting failed jobs..."
    if [ ! -f "$BATCH_JOBS_DIR/failed_jobs.txt" ]; then
        echo "No failed jobs to resubmit"
        exit 1
    fi
    bash ./src/openai/submit_batch_jobs.sh --max_parallel $MAX_PARALLEL --jobs_dir "$BATCH_JOBS_DIR"
}

# Function to process results
process_results() {
    echo "Processing batch results..."
    python src/openai/process_batch_results.py --results_dir "$RESULTS_DIR" --exp_name "$EXP_NAME" --prompt_type "$PROMPT_TYPE"
}

# Execute based on mode
case $MODE in
    create)
        create_jobs
        ;;
    submit)
        submit_jobs
        ;;
    resubmit)
        resubmit_jobs
        ;;
    process)
        process_results
        ;;
    all)
        create_jobs
        submit_jobs
        process_results
        ;;
    *)
        echo "Error: Invalid mode. Must be one of: all, create, submit, resubmit, process"
        show_usage
        exit 1
        ;;
esac

echo "Workflow completed!"
