# Default values
LANGUAGE=""  # Empty means all tasks
MODEL=""  # Model name
SAMPLE_SIZES="100"  # Different sample sizes for evaluation
VALIDATION_SETS="long"  # Types of validation sets
PROMPT_TEMPLATE="io_prompt,zsr_prompt"  # Path to base prompt file (io_prompt,zsr_prompt)
VALIDATION_SIZE="100"  # Number of validation examples (default: all available data)
RESULTS_DIR="results"  # Directory to store results
MIN_LENGTH=""  # Minimum sequence length
MAX_PARALLEL=4  # Maximum parallel jobs
MODE="all"  # Mode of execution (create, submit, resubmit, process, all)
ENCODING_TECHNIQUE="many_to_one,one_to_one,one_to_many"  # Encoding technique (many_to_one, one_to_one, one_to_many)
MIN_LONG_VALIDATION_LENGTH=""  # Minimum length for long validation sequences
SEED=42  # Random seed for reproducibility
DATASET="flare_subsampled"  # Dataset parameter (must be 'flare')
CHECK_TOKENIZER=""

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        --language)
            LANGUAGE="$2"  # Comma-separated list of tasks
            shift 2
            ;;
        --model)
            MODEL="$2"  # Store model name
            shift 2
            ;;
        --prompt_template)
            PROMPT_TEMPLATE="$2"  # Set path to base prompt file
            shift 2
            ;;
        --results_dir)
            RESULTS_DIR="$2"  # Directory to store results
            shift 2
            ;;
        --seed)
            SEED="$2"  # Set random seed for reproducibility
            shift 2
            ;;
        --encoding_technique)
            ENCODING_TECHNIQUE="$2"  # Set encoding technique
            shift 2
            ;;
        --k)
            K="$2"  # Set k value
            shift 2
            ;;
        *)
            echo "Unknown parameter: $1"  # Handle unknown parameters
            show_usage
            exit 1
            ;;
    esac
done

if [[ -z "$MODEL" ]]; then
    echo "Error: --model is required"
    show_usage
    exit 1
fi

if [[ -z "$DATASET" ]]; then
    echo "Error: --dataset is required"
    show_usage
    exit 1
fi

# Function to show usage
show_usage() {
    echo "Usage: $0 [options]"
    echo "Options:"
    echo "  --language <tasks>         Comma-separated list of tasks (empty for all languages)"
    echo "  --model <model>            Model name (required)"
    echo "  --prompt_template <file>   Prompt template (required to be one of 'io_prompt' or 'zsr_prompt')"
    echo "  --seed <seed>              Random seed for reproducibility (default: 42)"
    echo "  --encoding_technique       Must be one of 'many_to_one' 'one_to_one' 'one_to_many'"
    echo "  --dataset <dataset>        Dataset name (required, must be 'flare')"
}

# Create necessary directories
MODEL_DIR="$RESULTS_DIR/$MODEL"  # Directory for the model's experiments
mkdir -p "$MODEL_DIR"  # Create experiments and model directory if not exists


# Function to get all available tasks
DATASET_DIR=""
get_all_languages() {
    local tasks=()
    for dir in $DATASET_DIR/$DATASET/*/; do
        if [ -d "$dir" ]; then
            tasks+=($(basename "$dir"))
        fi
    done
    echo "${tasks[@]}"
}

# Get list of the specific languages to be processed (if none then all languages in the dataset)
if [[ -z "$LANGUAGE" ]]; then
    # Get all available tasks
    LANGUAGES_LIST=($(get_all_languages))
    echo "No tasks specified, using all available languages: ${LANGUAGES_LIST[@]}"
else
    # Use specified tasks
    IFS=',' read -ra LANGUAGES_LIST <<< "$LANGUAGE"
    echo "Processing specified languages: ${LANGUAGES_LIST[@]}"
fi

# Get list of the specific languages to be processed (if none then all languages in the dataset)
if [[ -z "$MODEL" ]]; then
    # Get all available tasks
    MODELS_LIST=('deepseek-chat' 'gpt-4o-mini')
    echo "No model specified, using all models: ${MODELS_LIST[@]}"
else
    # Use specified tasks
    IFS=',' read -ra MODELS_LIST <<< "$MODEL"
    echo "Processing specified models: ${MODELS_LIST[@]}"
fi

# Get list of the specific encoding techniques to be processed (if none then all encoding techniques)
if [[ -z "$ENCODING_TECHNIQUE" ]]; then
    # Use default encoding techniques
    ENCODING_TECHNIQUES_LIST=("one_to_one" "one_to_many" "many_to_one")
    echo "No encoding techniques specified, using default encoding techniques: ${ENCODING_TECHNIQUES_LIST[@]}"
else
    # Use specified encoding techniques
    IFS=',' read -ra ENCODING_TECHNIQUES_LIST <<< "$ENCODING_TECHNIQUE"
    echo "Processing specified encoding techniques: ${ENCODING_TECHNIQUES_LIST[@]}"
fi

if [[ -z "$K" ]]; then
    K_LIST=(2 3 4 5)
else
    # Use specified encoding techniques
    IFS=',' read -ra K_LIST <<< "$K"
fi

# Get list of the specific prompt templates to be processed (if none then use default)
if [[ -z "$PROMPT_TEMPLATE" ]]; then
    PROMPT_TEMPLATES_LIST=("io_prompt" "zsr_prompt")
    echo "No prompt templates specified, using default prompt templates: ${PROMPT_TEMPLATES_LIST[@]}"
else
    IFS=',' read -ra PROMPT_TEMPLATES_LIST <<< "$PROMPT_TEMPLATE"
    echo "Processing specified prompt templates: ${PROMPT_TEMPLATES_LIST[@]}"
fi

create_folders() {
    echo "Creating folders..."
    for prompt_template in "${PROMPT_TEMPLATES_LIST[@]}"; do
        PROMPT_DIR="$MODEL_DIR/$prompt_template"

        for encoding in "${ENCODING_TECHNIQUES_LIST[@]}"; do
            ENCODING_DIR="$PROMPT_DIR/${encoding}"
            mkdir -p "$ENCODING_DIR"
            if [[ "$encoding" == "one_to_many" ]]; then
                # for one_to_many, suffix each k
                for k in "${K_LIST[@]}"; do
                    K_DIR="$ENCODING_DIR/k=${k}"
                    mkdir -p "$K_DIR"
                    for language in "${LANGUAGES_LIST[@]}"; do
                        mkdir -p "$K_DIR/$language"
                    done
                done

            else
                # all other encodings get a single folder
                ENCODING_DIR="$PROMPT_DIR/$encoding"
                mkdir -p "$ENCODING_DIR"

                for language in "${LANGUAGES_LIST[@]}"; do
                    mkdir -p "$ENCODING_DIR/$language"
                done
            fi

        done
    done
}


# Function to execute DeepSeek workflow
execute_workflow() {
    echo "Executing $MODEL workflow..."
    for prompt_template in "${PROMPT_TEMPLATES_LIST[@]}"; do
        echo "With prompt template: $prompt_template"
        for encoding in "${ENCODING_TECHNIQUES_LIST[@]}"; do
            echo "With encoding: $encoding"
            for language in "${LANGUAGES_LIST[@]}"; do
                if [[ "$encoding" == "one_to_many" ]]; then
                    for k in "${K_LIST[@]}"; do
                        echo "Running pipeline for task: $language with k=$k"
                        bash ./src/deepseek/run_experiment.sh \
                            --language "$language" \
                            --model "$MODEL" \
                            --prompt_template "$prompt_template" \
                            --encoding_technique "$encoding" \
                            --seed "$SEED" \
                            --k "$k"
                    done
                else
                    echo "Running pipeline for task: $language"
                    bash ./src/deepseek/run_experiment.sh \
                        --language "$language" \
                        --model "$MODEL" \
                        --prompt_template "$prompt_template" \
                        --encoding_technique "$encoding" \
                        --seed "$SEED" \
                        --k 1
                fi
            done
        done
    done
}


for model in "${MODELS_LIST[@]}"; do
    case $model in
        deepseek-chat)
            echo "Running DeepSeek-chat workflow..."
            create_folders
            execute_workflow
            ;;
        deepseek-reasoner)
            echo "Running DeepSeek-reasoner workflow..."
            create_folders
            execute_workflow
            ;;
        *)
            echo "Error: Invalid model. Must be one of: deepseek-chat, deepseek-reasoner"
            show_usage
            exit 1
            ;;
    esac
done


echo "Workflow completed!"
