#!/bin/bash
#
# M76 - Unified Log, Commit, and Case Mapper
#
# This script directly generates a complete mapping of verbatim log files
# to their initial commit hash and the associated case study ID from that commit.

# --- Configuration ---
LOG_SOURCE_DIR=".ai-cli-log"
OUTPUT_CSV="case-study/M76_Quantitative_Study_of_the_CHAC_Workbench/output/log_to_case_map.csv"

echo "Starting Unified Log Mapping..."
echo "Log Source: $LOG_SOURCE_DIR"
echo "---------------------------------"

# Create the final CSV header
echo "log_file,commit_hash,case_study_id" > "$OUTPUT_CSV"

# --- Main Loop ---
file_count=0
processed_count=0
for logfile in $(find "$LOG_SOURCE_DIR" -name "*.txt"); do
    processed_count=$((processed_count + 1))
    echo -ne "Processing file $processed_count/50... ($logfile)\r"

    # 1. Find the initial commit hash for the log file
    commit_hash=$(git log --diff-filter=A --pretty=format:%H -- "$logfile" | tail -n 1)

    if [ -z "$commit_hash" ]; then
        echo -e "\nWarning: Could not find initial commit for $logfile"
        echo "$logfile,COMMIT_NOT_FOUND,CASE_NOT_FOUND" >> "$OUTPUT_CSV"
        continue
    fi

    # 2. Find the associated case study in that commit
    files_in_commit=$(git show --name-only --pretty="" "$commit_hash")
    case_study_dir=$(echo "$files_in_commit" | grep -o -E 'case-study/[AM][0-9]+[^/]+' | head -n 1)

    if [ -n "$case_study_dir" ]; then
        case_study_id=$(basename "$case_study_dir")
        echo "$logfile,$commit_hash,$case_study_id" >> "$OUTPUT_CSV"
        file_count=$((file_count + 1))
    else
        # This is an important case to log for later review
        echo "$logfile,$commit_hash,CASE_NOT_FOUND" >> "$OUTPUT_CSV"
    fi
done

echo -e "\n---------------------------------"
echo "Mapping complete."
echo "Successfully mapped $file_count out of $processed_count log files."
echo "Final map saved to: $OUTPUT_CSV"
