#!/bin/bash
#
# M76 - High-Fidelity Log, Commit, and Case Mapper (v2)
#
# This script generates a complete mapping of verbatim log files to their
# initial commit hash and associated case study ID. It intelligently handles
# commits that may be associated with multiple case studies.

# --- Configuration ---
LOG_SOURCE_DIR=".ai-cli-log"
OUTPUT_CSV="case-study/M76_Quantitative_Study_of_the_CHAC_Workbench/output/log_to_case_map_v2.csv"

echo "Starting High-Fidelity Log Mapping..."
echo "Log Source: $LOG_SOURCE_DIR"
echo "---------------------------------"

# Create the final CSV header
echo "log_file,commit_hash,case_study_id,status" > "$OUTPUT_CSV"

# --- Main Loop ---
file_count=0
processed_count=0
total_files=$(find "$LOG_SOURCE_DIR" -name "*.txt" | wc -l)

find "$LOG_SOURCE_DIR" -name "*.txt" | while read -r logfile; do
    processed_count=$((processed_count + 1))
    echo -ne "Processing file $processed_count/$total_files... ($logfile)\r"

    # 1. Find the initial commit hash for the log file
    commit_hash=$(git log --diff-filter=A --pretty=format:%H -- "$logfile" | tail -n 1)

    if [ -z "$commit_hash" ]; then
        echo -e "\nWarning: Could not find initial commit for $logfile"
        echo "\"$logfile\",\"COMMIT_NOT_FOUND\",\"\",\"NOT_FOUND\"" >> "$OUTPUT_CSV"
        continue
    fi

    # 2. Find all unique associated case studies in that commit
    files_in_commit=$(git show --name-only --pretty="" "$commit_hash")
    # This regex finds paths like "case-study/A01_..." or "case-study/M12_..."
    # It then extracts just the directory name, sorts them, and gets unique entries.
    case_study_dirs=$(echo "$files_in_commit" | grep -o -E 'case-study/[AM][0-9]+[^/]+' | sort -u)
    
    num_cases=$(echo "$case_study_dirs" | wc -l)

    # 3. Apply decision logic
    if [ "$num_cases" -eq 1 ]; then
        # Clear, one-to-one mapping
        case_study_id=$(basename "$case_study_dirs")
        echo "\"$logfile\",\"$commit_hash\",\"$case_study_id\",\"CLEAR\"" >> "$OUTPUT_CSV"
        file_count=$((file_count + 1))
    elif [ "$num_cases" -gt 1 ]; then
        # Ambiguous mapping, multiple cases found
        # Join the case IDs with a semicolon
        ambiguous_ids=$(echo "$case_study_dirs" | xargs -n 1 basename | tr '\n' ';')
        # Remove trailing semicolon
        ambiguous_ids=${ambiguous_ids%;}
        echo "\"$logfile\",\"$commit_hash\",\"$ambiguous_ids\",\"AMBIGUOUS\"" >> "$OUTPUT_CSV"
    else
        # No case study found in the commit
        echo "\"$logfile\",\"$commit_hash\",\"\",\"NOT_FOUND\"" >> "$OUTPUT_CSV"
    fi
done

echo -e "\n---------------------------------"
echo "Mapping complete."
echo "Successfully mapped $file_count log files with CLEAR status."
echo "Final map saved to: $OUTPUT_CSV"
