#!/bin/bash

# Script to run the Genetic Algorithm for permutation optimization

# --- Configuration ---
# Dataset and Model parameters (should match the experiment setup)
# DATASET_BASE_NAME="square_mod19" # Example: relu, square_mod19, index
gpu_id=0
DATASET_BASE_NAME="relu"
TARGET_LEN=5
DATASET_NAME="n=${TARGET_LEN}/data" # Example: n50, n31, m19
# DATASET_NAME="n=${TARGET_LEN}_m=2/data"
DATASET_PATH_PREFIX="data/data/small/${DATASET_BASE_NAME}/${DATASET_NAME}" # Path to .train and .test files
INPUT_PREFIX_LEN=$TARGET_LEN
GPT2_N_HEAD=1
GPT2_N_LAYER=1
GPT2_N_EMBD=512 # Should be consistent with pre-trained or intended model
MAX_SEQ_LENGTH=128

# TrainingArguments (minimal set needed for GA evaluation context)
PER_DEVICE_EVAL_BATCH_SIZE=256 # Adjust based on GPU memory
N_EPOCHS=1
FP16=false # Set to true if using fp16, ensure GPU compatibility
DATALOADER_NUM_WORKERS=0 # Can increase if I/O is a bottleneck

# GA Parameters
POPULATION_SIZE=128  # Number of individuals (permutations) in the population
NUM_GENERATIONS=10   # Number of generations to run
CROSSOVER_PROB=0.7 # initial crossover probability
MUTATION_PROB=0.4 # initial mutation probability
MUTATION_INDPB=0.2 # Independent probability for each gene (element in permutation) to be mutated
TOURNAMENT_SIZE=2
GA_SEED=42
PERMUTATION_SELECT_NUM=1
DECAY_END_GENERATION_RATIO=0.8
DECAY_START_GENERATION=0



OUTPUT_DIR="data/results/small/${DATASET_BASE_NAME}/ga_optimization_${DATASET_NAME}_tl=${TARGET_LEN}_pop=${POPULATION_SIZE}_gen=${NUM_GENERATIONS}_cross=${CROSSOVER_PROB}_mut=${MUTATION_PROB}_mut_indpb=${MUTATION_INDPB}_tournament=${TOURNAMENT_SIZE}_n_epochs=${N_EPOCHS}_decay_linear"

# WandB Logging
WANDB_PROJECT="ga_permutation_optimization"
# WANDB_ENTITY="your_wandb_entity" # Optional: specify your wandb entity
WANDB_RUN_NAME="ga_${DATASET_NAME}_tl${TARGET_LEN}_pop${POPULATION_SIZE}_gen${NUM_GENERATIONS}_cross=${CROSSOVER_PROB}_mut=${MUTATION_PROB}_mut_indpb=${MUTATION_INDPB}_tournament=${TOURNAMENT_SIZE}_n_epochs=${N_EPOCHS}"

mkdir -p ${OUTPUT_DIR}

# --- Activate Virtual Environment (if any) ---
# source /path/to/your/venv/bin/activate

# --- Run the GA optimization script ---
CUDA_VISIBLE_DEVICES=${gpu_id} nohup python3 src/main_ga_optimization.py \
    --dataset_name "${DATASET_NAME}" \
    --dataset_path_prefix "${DATASET_PATH_PREFIX}" \
    --target_len ${TARGET_LEN} \
    --input_prefix_len ${INPUT_PREFIX_LEN} \
    --gpt2_n_head ${GPT2_N_HEAD} \
    --gpt2_n_layer ${GPT2_N_LAYER} \
    --gpt2_n_embd ${GPT2_N_EMBD} \
    --max_seq_length ${MAX_SEQ_LENGTH} \
    --output_dir "${OUTPUT_DIR}" \
    --per_device_eval_batch_size ${PER_DEVICE_EVAL_BATCH_SIZE} \
    --per_device_train_batch_size ${PER_DEVICE_EVAL_BATCH_SIZE} \
    --fp16 ${FP16} \
    --dataloader_num_workers ${DATALOADER_NUM_WORKERS} \
    --num_train_epochs ${N_EPOCHS} \
    --do_train false \
    --do_eval false \
    --remove_unused_columns false \
    --permutation_select_num ${PERMUTATION_SELECT_NUM} \
    --permutation_type "random" \
    --population_size ${POPULATION_SIZE} \
    --num_generations ${NUM_GENERATIONS} \
    --initial_crossover_prob ${CROSSOVER_PROB} \
    --initial_mutation_prob ${MUTATION_PROB} \
    --decay_start_generation ${DECAY_START_GENERATION} \
    --decay_end_generation_ratio ${DECAY_END_GENERATION_RATIO} \
    --mutation_indpb ${MUTATION_INDPB} \
    --tournament_size ${TOURNAMENT_SIZE} \
    --ga_seed ${GA_SEED} \
    --wandb_project_ga "${WANDB_PROJECT}" \
    --wandb_run_name_ga "${WANDB_RUN_NAME}" > ${OUTPUT_DIR}/ga_optimization_results.log 2>&1 &
    # Add --wandb_entity "${WANDB_ENTITY}" if you have set it


echo "GA optimization script finished."
echo "Results and logs saved in: ${OUTPUT_DIR}" 