#!/bin/bash

# --- Configure VLMEvalKit and cache ---
export VLMEVALKIT_DIR="Your_VLMEvalKit_path"
export HF_HOME="Your_hf_home"
export HF_DATASETS_CACHE="${HF_HOME}/datasets"
export PYTORCH_KERNEL_CACHE_PATH="Your_cache_path"

# --- GPU Configuration ---
export CUDA_VISIBLE_DEVICES='0'
export OPENAI_API_KEY="Your openai_key"

# --- Path and Model Definition ---
BASE_MODEL_NAME_OR_PATH="Your_model_path"
EVAL_OUTPUT_DIR="Your_output_path"

# --- Optimized file structure definition ---
# Main results directory
RESULTS_BASE_DIR="${EVAL_OUTPUT_DIR}/Intervening_layer_experiments"
RESULTS_DIR="${RESULTS_BASE_DIR}/results"          # Store CSV result files
LOGS_DIR="${RESULTS_BASE_DIR}/Intervening_layer_logs"      # Store all log files
TEMP_DIR="${RESULTS_BASE_DIR}/temp"                # Temporary directory
ANALYSIS_DIR="${RESULTS_BASE_DIR}/analysis"        # Analysis results directory
CACHE_DIR="${RESULTS_BASE_DIR}/cache"              # Cache files directory

# Create all necessary directories
mkdir -p "${RESULTS_DIR}"
mkdir -p "${LOGS_DIR}"
mkdir -p "${TEMP_DIR}"
mkdir -p "${ANALYSIS_DIR}"
mkdir -p "${CACHE_DIR}"

# Create timestamp directory for current experiment
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
CURRENT_LOG_DIR="${LOGS_DIR}/${TIMESTAMP}"
mkdir -p "${CURRENT_LOG_DIR}"

echo "📁 File structure initialization completed:"
echo "   Result files: ${RESULTS_DIR}"
echo "   Log files: ${CURRENT_LOG_DIR}"
echo "   Temporary files: ${TEMP_DIR}"
echo "   Analysis results: ${ANALYSIS_DIR}"
echo "   Cache files: ${CACHE_DIR}"

cleanup_all_cache() {
    echo "🧹 Cleaning all possible caches..."
    
    # Clean result files (optional, as needed)
    # rm -f "${RESULTS_DIR}"/*.csv
    
    # Clean temporary files and cache
    rm -rf "${TEMP_DIR}"/*
    rm -rf "${CACHE_DIR}"/*
    
    # Recreate directories
    mkdir -p "${TEMP_DIR}"
    mkdir -p "${CACHE_DIR}"
    
    echo "✅ Cache cleanup completed"
}

# --- Layer and Module Configuration ---
LAYERS=($(seq 1 31))
MODULES=("self_attn")
tasks=(
    "MMStar"
    "MMBench_DEV_EN"
    "MMMU_DEV_VAL"
)

# --- GPU Count ---
IFS=',' read -ra GPUS_ARRAY <<< "$CUDA_VISIBLE_DEVICES"
NPROC_PER_NODE=${#GPUS_ARRAY[@]}
echo "Using ${NPROC_PER_NODE} GPU(s) for evaluation"

# --- Switch to VLMEvalKit project directory ---
cd "${VLMEVALKIT_DIR}" || { echo "Error: Cannot switch to VLMEvalKit directory: ${VLMEVALKIT_DIR}"; exit 1; }

echo "Starting to evaluate fusion models with VLMEvalKit (traversing all layers)..."

# --- Function: Find available port ---
find_free_port() {
    local base_port=54321
    local port=$((base_port + RANDOM % 1000))
    while netstat -tuln 2>/dev/null | grep -q ":$port "; do
        port=$((port + 1))
        if [ $port -gt 65535 ]; then
            port=$((base_port + RANDOM % 1000))
        fi
    done
    echo $port
}

# --- Function: Check if result file exists ---
check_result_exists() {
    local weight="$1"
    local layer="$2"
    local module="$3"
    local dataset="$4"
    
    local expected_result="${RESULTS_DIR}/llava_${weight}_eval_${layer}_${module}_${dataset}.csv"
    
    if [ -f "${expected_result}" ]; then
        return 0  # Exists
    else
        return 1  # Does not exist
    fi
}

# Clean cache before starting
cleanup_all_cache

# --- Create experiment configuration file ---
cat > "${CURRENT_LOG_DIR}/experiment_config.json" << EOF
{
    "timestamp": "$(date -Iseconds)",
    "base_model": "${BASE_MODEL_NAME_OR_PATH}",
    "weights_value": "${weights}",
    "layers": [$(IFS=','; echo "${LAYERS[*]}")],
    "modules": ["$(IFS='","'; echo "${MODULES[*]}")"],
    "tasks": ["$(IFS='","'; echo "${tasks[*]}")"],
    "total_experiments": $((${#LAYERS[@]} * ${#MODULES[@]} * ${#tasks[@]})),
    "gpu_config": "${CUDA_VISIBLE_DEVICES}",
    "nproc_per_node": ${NPROC_PER_NODE}
}
EOF

# --- Main evaluation loop ---
total_experiments=$((${#LAYERS[@]} * ${#MODULES[@]} * ${#tasks[@]}))
current_experiment=0
success_count=0
skip_count=0
fail_count=0

echo ""
echo "🔬 Intervening_layer Experiment Configuration"
echo "==============================================="
echo "Base Model: ${BASE_MODEL_NAME_OR_PATH}"
echo "Weight Parameter: ${weights}"
echo "Total Experiments: ${total_experiments}"
echo "Layer Range: 0-31 (Total ${#LAYERS[@]} layers)"
echo "Test Modules: ${MODULES[*]}"
echo "Test Datasets: ${tasks[*]}"
echo "==============================================="
echo "📁 File Storage Structure:"
echo "   Result Files: ${RESULTS_DIR}"
echo "   Log Files: ${CURRENT_LOG_DIR}"
echo "   Analysis Results: ${ANALYSIS_DIR}"
echo "==============================================="

# Record start time
start_time=$(date +%s)

# Create log files
main_log="${CURRENT_LOG_DIR}/main_experiment.log"
failed_log="${CURRENT_LOG_DIR}/failed_experiments.log"
success_log="${CURRENT_LOG_DIR}/successful_experiments.log"
progress_log="${CURRENT_LOG_DIR}/progress.log"

echo "# Cut Layer Experiment Main Log - $(date)" > "${main_log}"
echo "# Failed Experiments Record - $(date)" > "${failed_log}"
echo "# Successful Experiments Record - $(date)" > "${success_log}"
echo "# Progress Record - $(date)" > "${progress_log}"

# Traverse all layers
for layer in "${LAYERS[@]}"; do
    for module in "${MODULES[@]}"; do
        for task in "${tasks[@]}"; do
            current_experiment=$((current_experiment + 1))
            
            echo ""
            echo "🔬 Experiment ${current_experiment}/${total_experiments}"
            echo "==============================================="
            echo "🔹 Layer: ${layer}"
            echo "🔹 Module: ${module}" 
            echo "🔹 Task: ${task}"
            echo "🔹 Time: $(date '+%Y-%m-%d %H:%M:%S')"
            
            # Record to main log
            echo "$(date '+%Y-%m-%d %H:%M:%S') - Experiment ${current_experiment}/${total_experiments}: Layer${layer}, Module${module}, Task${task}" >> "${main_log}"
            
            # Check if result already exists
            if check_result_exists "${weights}" "${layer}" "${module}" "${task}"; then
                echo "⏭️  Result already exists, skipping experiment"
                skip_count=$((skip_count + 1))
                echo "$(date '+%Y-%m-%d %H:%M:%S') - Skipped: Layer${layer}, Module${module}, Task${task} - Result already exists" >> "${main_log}"
                
                # Display progress
                if [ $current_experiment -gt 0 ]; then
                    progress=$((current_experiment * 100 / total_experiments))
                    elapsed_time=$(($(date +%s) - start_time))
                    estimated_total_time=$((elapsed_time * total_experiments / current_experiment))
                    remaining_time=$((estimated_total_time - elapsed_time))
                    
                    echo "📊 Progress: ${progress}% | Success: ${success_count} | Skipped: ${skip_count} | Failed: ${fail_count}"
                    echo "⏱️  Elapsed Time: $(date -d@${elapsed_time} -u +%H:%M:%S) | Estimated Remaining: $(date -d@${remaining_time} -u +%H:%M:%S)"
                    
                    # Record progress
                    echo "$(date '+%Y-%m-%d %H:%M:%S') - Progress: ${progress}% (${current_experiment}/${total_experiments}) | Success: ${success_count} | Skipped: ${skip_count} | Failed: ${fail_count}" >> "${progress_log}"
                fi
                continue
            fi
            
            # Find available port
            port=$(find_free_port)
            echo "🌐 Using port: ${port}"
            
            # Create temporary working directory for current experiment
            experiment_temp_dir="${TEMP_DIR}/exp_${layer}_${module}_${task}_${current_experiment}"
            mkdir -p "${experiment_temp_dir}"
            
            echo "🚀 Starting experiment execution..."
            echo "$(date '+%Y-%m-%d %H:%M:%S') - Starting execution: Layer${layer}, Module${module}, Task${task}" >> "${main_log}"
            
            # Execute evaluation, save output to dedicated log file
            experiment_log="${CURRENT_LOG_DIR}/experiment_${layer}_${module}_${task}.log"
            
            torchrun --nproc-per-node=${NPROC_PER_NODE} --master-port=${port} run.py \
                --data "$task" \
                --model "${BASE_MODEL_NAME_OR_PATH}" \
                --verbose \
                --work-dir "${experiment_temp_dir}" \
                --cut_layer "${layer}" \
                --cut_module "${module}" \
                > "${experiment_log}" 2>&1
            
            eval_exit_code=$?
            
            # Check execution result
            if [ $eval_exit_code -ne 0 ]; then
                echo "❌ Experiment failed"
                echo "   Error code: ${eval_exit_code}"
                echo "   Detailed log: ${experiment_log}"
                
                # Record failure
                fail_info="$(date '+%Y-%m-%d %H:%M:%S') - Layer${layer}, Module${module}, Task${task}, Error code${eval_exit_code}"
                echo "${fail_info}" >> "${failed_log}"
                echo "${fail_info}" >> "${main_log}"
                fail_count=$((fail_count + 1))
            else
                echo "✅ Experiment execution completed"
                
                # Verify if result file was generated
                if check_result_exists "${weights}" "${layer}" "${module}" "${task}"; then
                    echo "✅ Result file confirmed to be generated"
                    success_count=$((success_count + 1))
                    success_info="$(date '+%Y-%m-%d %H:%M:%S') - Layer${layer}, Module${module}, Task${task} - Success"
                    echo "${success_info}" >> "${success_log}"
                    echo "${success_info}" >> "${main_log}"
                else
                    echo "⚠️  Warning: Experiment completed but expected result file not found"
                    echo "$(date '+%Y-%m-%d %H:%M:%S') - Warning: Layer${layer}, Module${module}, Task${task} - Result file not found" >> "${main_log}"
                fi
            fi
            
            # Clean temporary directory
            if [ -d "${experiment_temp_dir}" ]; then
                rm -rf "${experiment_temp_dir}"
            fi
            
            # Calculate and display progress
            progress=$((current_experiment * 100 / total_experiments))
            elapsed_time=$(($(date +%s) - start_time))
            
            if [ $current_experiment -gt 0 ]; then
                estimated_total_time=$((elapsed_time * total_experiments / current_experiment))
                remaining_time=$((estimated_total_time - elapsed_time))
                
                echo "📊 Progress: ${progress}% (${current_experiment}/${total_experiments})"
                echo "📈 Success: ${success_count} | Skipped: ${skip_count} | Failed: ${fail_count}"
                echo "⏱️  Elapsed Time: $(date -d@${elapsed_time} -u +%H:%M:%S)"
                echo "⏱️  Estimated Remaining: $(date -d@${remaining_time} -u +%H:%M:%S)"
                echo "⏱️  Estimated Completion: $(date -d@$(($(date +%s) + remaining_time)))"
                
                # Record progress
                echo "$(date '+%Y-%m-%d %H:%M:%S') - Progress: ${progress}% (${current_experiment}/${total_experiments}) | Success: ${success_count} | Skipped: ${skip_count} | Failed: ${fail_count} | Remaining Time: $(date -d@${remaining_time} -u +%H:%M:%S)" >> "${progress_log}"
            fi
            
            # Brief wait to ensure resource release
            echo "⏳ Waiting for resource release..."
            sleep 5
        done
    done
done

# --- Experiment completion statistics ---
end_time=$(date +%s)
total_time=$((end_time - start_time))

echo ""
echo "🎉 All Cut Layer experiments completed!"
echo "==============================================="
echo "📊 Experiment Statistics:"
echo "   Total Experiments: ${total_experiments}"
echo "   Successful Experiments: ${success_count}"
echo "   Skipped Experiments: ${skip_count}"
echo "   Failed Experiments: ${fail_count}"
echo "   Success Rate: $(( (success_count + skip_count) * 100 / total_experiments ))%"
echo ""
echo "⏱️  Time Statistics:"
echo "   Total Duration: $(date -d@${total_time} -u +%H:%M:%S)"
echo "   Average Time Per Experiment: $(( total_time / total_experiments )) seconds"
echo "   Start Time: $(date -d@${start_time})"
echo "   End Time: $(date -d@${end_time})"
echo ""

# Record final statistics
final_stats="${CURRENT_LOG_DIR}/final_statistics.json"
cat > "${final_stats}" << EOF
{
    "experiment_completed": "$(date -Iseconds)",
    "total_experiments": ${total_experiments},
    "successful_experiments": ${success_count},
    "skipped_experiments": ${skip_count},
    "failed_experiments": ${fail_count},
    "success_rate": $(( (success_count + skip_count) * 100 / total_experiments )),
    "total_duration_seconds": ${total_time},
    "total_duration_formatted": "$(date -d@${total_time} -u +%H:%M:%S)",
    "average_time_per_experiment": $(( total_time / total_experiments )),
    "start_time": "$(date -d@${start_time} -Iseconds)",
    "end_time": "$(date -d@${end_time} -Iseconds)"
}
EOF

echo "📁 File locations:"
echo "   Result files: ${RESULTS_DIR}"
echo "   Main log file: ${main_log}"
echo "   Detailed log directory: ${CURRENT_LOG_DIR}"
echo "   Final statistics: ${final_stats}"

if [ $fail_count -gt 0 ]; then
    echo "   Failure record: ${failed_log}"
fi

echo "==============================================="

# --- Automatically run result analysis ---
echo ""
echo "🔍 Starting experiment result analysis..."

if command -v python3 &> /dev/null; then
    python3 "${ANALYSIS_DIR}/analyze_cut_layer_results.py" "${RESULTS_DIR}" "${ANALYSIS_DIR}"
    analysis_exit_code=$?
    
    if [ $analysis_exit_code -eq 0 ]; then
        echo "✅ Result analysis completed"
        echo "📊 Analysis results saved in: ${ANALYSIS_DIR}"
    else
        echo "⚠️  Error occurred during result analysis, exit code: ${analysis_exit_code}"
    fi
else
    echo "⚠️  Python3 not found, skipping automatic analysis"
    echo "   You can manually run: python3 ${ANALYSIS_DIR}/analyze_cut_layer_results.py ${RESULTS_DIR} ${ANALYSIS_DIR}"
fi

# --- Display final result overview ---
echo ""
echo "📋 Generated file overview:"

# Display result files
result_files_count=$(ls "${RESULTS_DIR}"/llava_*_eval_*.csv 2>/dev/null | wc -l)
if [ $result_files_count -gt 0 ]; then
    echo "   📊 Result files (${result_files_count} items):"
    ls "${RESULTS_DIR}"/llava_*_eval_*.csv 2>/dev/null | head -5 | sed 's|.*/|     - |'
    if [ $result_files_count -gt 5 ]; then
        echo "     - ... and $((result_files_count - 5)) more files"
    fi
else
    echo "   ⚠️  No result files found"
fi

# Display log files
log_files_count=$(ls "${CURRENT_LOG_DIR}"/*.log 2>/dev/null | wc -l)
if [ $log_files_count -gt 0 ]; then
    echo "   📝 Log files (${log_files_count} items):"
    echo "     - Main log: $(basename "${main_log}")"
    echo "     - Failure record: $(basename "${failed_log}")"
    echo "     - Success record: $(basename "${success_log}")"
    echo "     - Progress record: $(basename "${progress_log}")"
    if [ $log_files_count -gt 4 ]; then
        echo "     - ... and $((log_files_count - 4)) more experiment logs"
    fi
fi

# Display analysis files
analysis_files_count=$(ls "${ANALYSIS_DIR}"/* 2>/dev/null | wc -l)
if [ $analysis_files_count -gt 0 ]; then
    echo "   📈 Analysis files (${analysis_files_count} items):"
    ls "${ANALYSIS_DIR}"/* 2>/dev/null | head -5 | sed 's|.*/|     - |'
    if [ $analysis_files_count -gt 5 ]; then
        echo "     - ... and $((analysis_files_count - 5)) more files"
    fi
fi

echo ""
echo "🎊 All Cut Layer experiments completed!"
echo ""
echo "📖 Quick view of results:"
echo "   Summary data: cat ${ANALYSIS_DIR}/cut_layer_summary.csv"
echo "   Analysis report: cat ${ANALYSIS_DIR}/cut_layer_analysis_report.md"
echo "   Experiment statistics: cat ${final_stats}"
echo ""
echo "📁 Complete file structure:"
echo "   ${RESULTS_BASE_DIR}/"
echo "   ├── results/          # CSV result files"
echo "   ├── cut_layer_logs/   # All log files"
echo "   │   └── ${TIMESTAMP}/ # Logs for this experiment"
echo "   ├── analysis/         # Analysis results and reports"
echo "   ├── temp/             # Temporary files (cleaned)"
echo "   └── cache/            # Cache files (cleaned)"