INTERVENTION_START=50
INTERVENTION_END=1050
INTERVENTION_STEP=200
BASE_DIR="experiments_random_final"

ACCEPTANCE_PROB=0.3
NUM_SAMPLES=2000

SEED=123

export CUDA_VISIBLE_DEVICES=0

# Only one machine per config!

for NUM_STATES in 40; do
    for NUM_SYMBOLS in 10; do

    # Process for the current seed only
    IDENTIFIER=${NUM_STATES}st_${NUM_SYMBOLS}sym_redo
    AUTOMATON_OUTPUT_DIR="${BASE_DIR}/data/${IDENTIFIER}/machine/${SEED}"

    # Create directories if they don't exist
    mkdir -p "${AUTOMATON_OUTPUT_DIR}"

    echo "Generating automaton topology for seed ${SEED}"
    python src/intervention_sampling/generate_automaton_topology.py \
        --output_dir "${AUTOMATON_OUTPUT_DIR}" \
        --save_automaton \
        --num_states ${NUM_STATES} \
        --num_symbols ${NUM_SYMBOLS} \
        --accept_prob ${ACCEPTANCE_PROB} \
        --seed ${SEED} \
        --topology_seed ${SEED}

    for TOP_SEED in {1..10} ; do

        TOPOLOGY_OUTPUT_DIR="${BASE_DIR}/data/${IDENTIFIED}/topology/${SEED}/${TOP_SEED}"
        mkdir -p "${TOPOLOGY_OUTPUT_DIR}"

        echo "Preprocessing weighted automaton for seed ${SEED}"
        python src/intervention_sampling/preprocess_weighted_automaton.py \
            --input_dir "${AUTOMATON_OUTPUT_DIR}" \
            --output_dir "${TOPOLOGY_OUTPUT_DIR}" \
            --accept_prob ${ACCEPTANCE_PROB} \
            --seed ${TOP_SEED} 

        # Interventions
        # verify with state
        for INTERVENTION in state; do
            if [[ $INTERVENTION = "symbol" ]]; then
                NUM_TGTS=${NUM_SYMBOLS}
            elif [[ $INTERVENTION = "state" ]]; then
                NUM_TGTS=10 #${NUM_STATES}
            elif [[ $INTERVENTION = "vanilla" ]]; then
                NUM_TGTS=1
            fi

            for TARGET in $(seq 0 $((${NUM_TGTS} - 1))); do

                if [[ $INTERVENTION = "state" && $TARGET -eq 0 ]]; then
                    continue
                fi

                if [[ $INTERVENTION = "symbol" ]]; then
                    target_args=(--target_symbol "$TARGET" )
                elif [[ $INTERVENTION = "state" ]]; then
                    target_args=(--target_state "$TARGET")
                elif [[ $INTERVENTION = "arc" ]]; then
                    # Left here until we do arcs
                    target_args=(--target_transition "$TARGET")
                fi

                for SEMIRING in alo; do

                    if [[ $SEMIRING = "alo" ]]; then
                        semiring_args=(--at_least_once_semiring --intervention_count 1 --validation_num_occurrences 1)
                        semiring_flag=(--at_least_once_semiring)
                    elif [[ $SEMIRING = "binning" ]]; then
                        semiring_args=(--intervention_count "${NUM_SAMPLES}" --validation_num_occurrences "${NUM_SAMPLES}")
                        semiring_flag=()
                    fi

                    LIFTED_OUTPUT_DIR="${BASE_DIR}/data/${SEMIRING}/${IDENTIFIER}/lifted/${SEED}/${TOP_SEED}/${INTERVENTION}/${TARGET}"
                    SAMPLER_OUTPUT_DIR="${BASE_DIR}/data/${SEMIRING}/${IDENTIFIER}/sampler/${SEED}/${TOP_SEED}/${INTERVENTION}/${TARGET}"
                    
                    # Create directories if they don't exist
                    mkdir -p "${LIFTED_OUTPUT_DIR}"
                    mkdir -p "${SAMPLER_OUTPUT_DIR}"

                    echo "Lifting weighted automaton for ${INTERVENTION}, target ${TARGET}, seed ${SEED}"
                    python src/intervention_sampling/lift_weighted_automaton.py \
                        --input_dir ${TOPOLOGY_OUTPUT_DIR} \
                        --output_dir ${LIFTED_OUTPUT_DIR} \
                        --intervention_type ${INTERVENTION} \
                        "${semiring_args[@]}" \
                        "${target_args[@]}"
                        
                    echo "Lifted machine (${INTERVENTION}, target ${TARGET}, seed ${SEED})"

                    echo "Creating sampler for ${INTERVENTION}, target ${TARGET}, seed ${SEED}"
                    python src/intervention_sampling/create_sampler.py \
                        --input_dir ${LIFTED_OUTPUT_DIR} \
                        --output_dir ${SAMPLER_OUTPUT_DIR} \
                        --max_occ_count ${NUM_SAMPLES} \
                        --seed ${TOP_SEED} \
                        "${semiring_flag[@]}"

                    echo "Created sampler for ${INTERVENTION}, target ${TARGET}, seed ${SEED}"

                    DATA_OUTPUT=${BASE_DIR}/data/datasets/${SEMIRING}/${IDENTIFIER}/${SEED}/${TOP_SEED}/${INTERVENTION}/${TARGET}
                    
                    for ((i=INTERVENTION_START; i<=INTERVENTION_END; i+=INTERVENTION_STEP)); do
                        train_dir=${DATA_OUTPUT}/train/${i}
                        train_out=${train_dir}/main.tok
                        val_dir=${DATA_OUTPUT}/validation/${i}
                        val_out=${val_dir}/main.tok
                        test_dir=${DATA_OUTPUT}/test/
                        test_out=${test_dir}/main.tok

                        # Create directories if they don't exist
                        mkdir -p "${train_dir}"
                        mkdir -p "${val_dir}"
                        mkdir -p "${test_dir}"

                        echo "Sampling data for ${INTERVENTION}, target ${TARGET}, seed ${SEED}, intervention count ${i}"
                        python src/intervention_sampling/sample_and_prepare_data.py \
                            --input_dir ${SAMPLER_OUTPUT_DIR} \
                            --dataset_size ${NUM_SAMPLES} \
                            --num_val ${NUM_SAMPLES} \
                            --num_test 1000 \
                            --sampling_seed ${i} \
                            --intervention_count ${i} \
                            --output_type text \
                            --training_output ${train_out} \
                            --validation_output ${val_out} \
                            --test_output ${test_out} 
                    done
                done
            done
        done
    done

    # Interventions for vanilla, 100
    i=1250
    for TOP_SEED in {1..400}; do

        TOPOLOGY_OUTPUT_DIR="${BASE_DIR}/data/${IDENTIFIED}/topology/${SEED}/${TOP_SEED}"
        if [ -d "${TOPOLOGY_OUTPUT_DIR}" ]; then
            echo "File exists."
        else
            mkdir -p "${TOPOLOGY_OUTPUT_DIR}"
            echo "Preprocessing weighted automaton for seed ${SEED}"
            python src/intervention_sampling/preprocess_weighted_automaton.py \
                --input_dir "${AUTOMATON_OUTPUT_DIR}" \
                --output_dir "${TOPOLOGY_OUTPUT_DIR}" \
                --accept_prob ${ACCEPTANCE_PROB} \
                --seed ${TOP_SEED} 
        fi

        INTERVENTION=vanilla
        
        LIFTED_OUTPUT_DIR="${BASE_DIR}/data/${SEMIRING}/${IDENTIFIER}/lifted/${SEED}/${TOP_SEED}/${INTERVENTION}/${TARGET}"
        SAMPLER_OUTPUT_DIR="${BASE_DIR}/data/${SEMIRING}/${IDENTIFIER}/sampler/${SEED}/${TOP_SEED}/${INTERVENTION}/${TARGET}"
        
        # Create directories if they don't exist
        mkdir -p "${LIFTED_OUTPUT_DIR}"
        mkdir -p "${SAMPLER_OUTPUT_DIR}"

        echo "Lifting weighted automaton for ${INTERVENTION}, target ${TARGET}, seed ${SEED}"
        python src/intervention_sampling/lift_weighted_automaton.py \
            --input_dir ${TOPOLOGY_OUTPUT_DIR} \
            --output_dir ${LIFTED_OUTPUT_DIR} \
            --intervention_type vanilla \
            "${semiring_args[@]}" \
            "${target_args[@]}"
            
        echo "Lifted machine (${INTERVENTION}, target ${TARGET}, seed ${SEED})"

        echo "Creating sampler for ${INTERVENTION}, target ${TARGET}, seed ${SEED}"
        python src/intervention_sampling/create_sampler.py \
            --input_dir ${LIFTED_OUTPUT_DIR} \
            --output_dir ${SAMPLER_OUTPUT_DIR} \
            --max_occ_count ${NUM_SAMPLES} \
            --seed ${SEED} \
            "${semiring_flag[@]}"

        echo "Created sampler for vanilll, target ${TARGET}, seed ${SEED}"

        DATA_OUTPUT=${BASE_DIR}/data/datasets/vanilla/${IDENTIFIER}/${SEED}/${TOP_SEED}
        
        train_dir=${DATA_OUTPUT}/train/${i}
        train_out=${train_dir}/main.tok
        val_dir=${DATA_OUTPUT}/validation/${i}
        val_out=${val_dir}/main.tok
        test_dir=${DATA_OUTPUT}/test/
        test_out=${test_dir}/main.tok

        # Create directories if they don't exist
        mkdir -p "${train_dir}"
        mkdir -p "${val_dir}"
        mkdir -p "${test_dir}"

        echo "Sampling data for ${INTERVENTION}, target ${TARGET}, seed ${SEED}, intervention count ${i}"
        python src/intervention_sampling/sample_and_prepare_data.py \
            --input_dir ${SAMPLER_OUTPUT_DIR} \
            --dataset_size ${NUM_SAMPLES} \
            --num_val ${NUM_SAMPLES} \
            --num_test 1000 \
            --sampling_seed ${i} \
            --output_type text \
            --training_output ${train_out} \
            --validation_output ${val_out} \
            --test_output ${test_out} 

        done
    done
done
