#!/bin/bash

dataset="$1"
num_train="$2"
task="$3"
m="$4"
purification="$5"
gt="$6"
seeds=(1234 9640 3914)

# if purification is not provided
if [ -z "$purification" ]; then
    fnames="${dataset}_seed*_num_train${num_train}_m${m}_${task}.log"
else
    if [ -z "$gt" ]; then
        fnames="${dataset}_seed*_num_train${num_train}_m${m}_${task}_purification.log"
    else
        fnames="${dataset}_seed*_num_train${num_train}_m${m}_${task}_purification_no_gt.log"
    fi
fi

total_acc=0
total_class_acc=0
accuracies=()

# grep for last occurrence of "Best classifier acc: " in each file
for file in $fnames; do
    echo "Processing file: $file"
    class_acc=$(grep -h "Best classifier acc: " "$file" | tail -n 1 | sed "s/.*Best classifier acc: //; s/ .*//; s/%//")
    acc=$(grep -h "Best acc: " "$file" | tail -n 1 | sed "s/.*Best acc: //; s/ .*//; s/%//")
    total_acc=$(echo "$total_acc + $acc" | bc)
    total_class_acc=$(echo "$total_class_acc + $class_acc" | bc)
    accuracies+=("$class_acc")
done

# calculate average
num_files=$(ls $fnames | wc -l)

if [ $num_files -gt 0 ]; then
    avg_acc=$(echo "$total_acc / $num_files" | bc -l)
    avg_class_acc=$(echo "$total_class_acc / $num_files" | bc -l)
    
    # calculate standard deviation
    sum_squared_diff=0
    for acc in "${accuracies[@]}"; do
        diff=$(echo "$acc - $avg_class_acc" | bc -l)
        squared_diff=$(echo "$diff * $diff" | bc -l)
        sum_squared_diff=$(echo "$sum_squared_diff + $squared_diff" | bc -l)
    done
    std_dev=$(echo "scale=2; sqrt($sum_squared_diff / $num_files)" | bc -l)
else
    echo "No files found matching pattern: $fnames"
    exit 1
fi
printf "Average acc: %.2f\n" "$avg_acc"
printf "Average class acc: %.2f ± %.2f\n" "$avg_class_acc" "$std_dev"