import json
import os
import re # Import the re module for regular expressions

def calculate_averages(json_file_path):
    """

    """
    scores_data = {
        "helpfulness": [],
        "clarity": [],
        "factuality": [],
        "depth": [],
        "engagement": []
    }

    try:
        with open(json_file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: File not found at {json_file_path}")
        return None
    except json.JSONDecodeError as e:
        print(f"Error: Could not decode JSON from {json_file_path}. Please check overall JSON file structure. Error: {e}")
        return None

    for entry in data:
        current_id = entry.get('id', 'N/A')
        if "result" in entry and isinstance(entry["result"], str):
            result_str = entry["result"]

            try:
                
                for aspect in scores_data.keys():
                    if aspect in result_obj and "score" in result_obj[aspect]:
                        try:
                            scores_data[aspect].append(int(result_obj[aspect]["score"]))
                        except ValueError:
                            print(f"Warning: Non-integer score found for {aspect} in entry {current_id}. Skipping.")
            except json.JSONDecodeError as e:
                print(f"Warning: 'result' field in entry {current_id} is not a valid JSON string. Skipping. Error: {e}")
                # You can uncomment the following line to see the problematic string
                # print(f"Problematic result string for ID {current_id}: {result_str}")
        elif "parsed_result" in entry and isinstance(entry["parsed_result"], dict):
            parsed_result_obj = entry["parsed_result"]
            for aspect in scores_data.keys():
                if aspect in parsed_result_obj and "score" in parsed_result_obj[aspect]:
                    try:
                        scores_data[aspect].append(int(parsed_result_obj[aspect]["score"]))
                    except ValueError:
                        print(f"Warning: Non-integer score found for {aspect} in entry {current_id}. Skipping.")
        else:
            print(f"Warning: Neither 'result' (as string) nor 'parsed_result' (as dict) found or valid in entry {current_id}. Skipping.")

    calculated_averages = {}
    overall_scores = []

    for aspect, scores in scores_data.items():
        if scores:
            avg_score = sum(scores) / len(scores)
            calculated_averages[aspect] = round(avg_score, 4)
            overall_scores.append(avg_score)
        else:
            calculated_averages[aspect] = None # No scores found for this aspect

    if overall_scores:
        calculated_averages["overall_average"] = round(sum(overall_scores) / len(overall_scores), 4)
    else:
        calculated_averages["overall_average"] = None

    return calculated_averages

def save_to_json_file(data, output_file_path):
    """

    """
    try:
        with open(output_file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=4, ensure_ascii=False)
        print(f"Results saved to {output_file_path}")
    except IOError:
        print(f"Error: Could not write to file {output_file_path}")

if __name__ == "__main__":
    input_json_file = ''
    output_json_file_name = 'calculated_averages.json'
    output_directory = os.path.dirname(input_json_file)

    output_json_file = os.path.join(output_directory, output_json_file_name)
    averages = calculate_averages(input_json_file)

    if averages:
        print("\nCalculated Averages:")
        for key, value in averages.items():
            print(f"  {key}: {value}")
        
        save_to_json_file(averages, output_json_file)