
import json
import os
import csv
from collections import defaultdict
import re

def analyze_assets_dependency_free(input_dir, relevance_output_path, mapping_output_path):
    """
    Analyzes JSON asset files for two purposes:
    1. Calculates the average relevance of concepts for each case study.
    2. Creates a mapping of case study IDs to their source filenames.
    Uses only standard Python libraries.

    Args:
        input_dir (str): The directory containing the JSON block files.
        relevance_output_path (str): The path to save the concept relevance CSV file.
        mapping_output_path (str): The path to save the source filename mapping CSV file.
    """
    aggregated_data = defaultdict(lambda: defaultdict(list))
    source_mapping = set() # Use a set to store unique (case_study, source_filename) pairs

    try:
        json_files = [f for f in os.listdir(input_dir) if f.endswith('.json')]
        if not json_files:
            print(f"Error: No JSON files found in '{input_dir}'")
            return
    except FileNotFoundError:
        print(f"Error: Input directory not found at '{input_dir}'")
        return

    for filename in json_files:
        file_path = os.path.join(input_dir, filename)
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
        except (json.JSONDecodeError, IOError) as e:
            print(f"Warning: Could not read or parse {filename}. Skipping. Error: {e}")
            continue

        for record in data:
            source_filename = record.get("source_filename")
            relevance_elements = record.get("relevance_to_m75_elements", [])

            if not source_filename:
                continue

            match = re.match(r"^(A\d+|M\d+)", source_filename)
            if not match:
                continue
            
            case_study_id = match.group(1)

            # Add to the source mapping
            source_mapping.add((case_study_id, source_filename))

            if case_study_id == "A05":
                case_study_id = "A05_literature_review"

            for element in relevance_elements:
                term = element.get("term")
                relevance = element.get("relevance")
                if term is not None and relevance is not None:
                    aggregated_data[case_study_id][term].append(relevance)

    # --- 1. Process and write the relevance report ---
    final_results = []
    for case_study, concepts in aggregated_data.items():
        for concept, scores in concepts.items():
            if scores:
                average_relevance = sum(scores) / len(scores)
                final_results.append({
                    "case_study": case_study,
                    "concept": concept,
                    "average_relevance": round(average_relevance, 2)
                })

    if final_results:
        final_results.sort(key=lambda item: (item['case_study'], -item['average_relevance']))
        try:
            output_dir = os.path.dirname(relevance_output_path)
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            with open(relevance_output_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
                fieldnames = ["case_study", "concept", "average_relevance"]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(final_results)
            print(f"Relevance analysis complete. Results saved to '{relevance_output_path}'")
        except IOError as e:
            print(f"Error: Failed to write relevance report at '{relevance_output_path}'. Error: {e}")
    else:
        print("No relevance data processed. Relevance report will not be created.")

    # --- 2. Process and write the source filename mapping report ---
    if source_mapping:
        # Convert set of tuples to list of dicts for writing
        mapping_list = [{"case_study": item[0], "source_filename": item[1]} for item in source_mapping]
        mapping_list.sort(key=lambda item: (item['case_study'], item['source_filename']))
        
        try:
            output_dir = os.path.dirname(mapping_output_path)
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            with open(mapping_output_path, 'w', newline='', encoding='utf-8-sig') as csvfile:
                fieldnames = ["case_study", "source_filename"]
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                writer.writerows(mapping_list)
            print(f"Source file mapping complete. Results saved to '{mapping_output_path}'")
        except IOError as e:
            print(f"Error: Failed to write mapping file at '{mapping_output_path}'. Error: {e}")
    else:
        print("No source mapping data processed. Mapping file will not be created.")


if __name__ == '__main__':
    project_root = os.getcwd()
    INPUT_DIR = os.path.join(project_root, "case-study/A08_Analyze_and_Integrate_A07_Asset_Package/asset_analysis_outputs")
    RELEVANCE_CSV = os.path.join(project_root, "case-study/A08_Analyze_and_Integrate_A07_Asset_Package/case_study_concept_relevance.csv")
    MAPPING_CSV = os.path.join(project_root, "case-study/A08_Analyze_and_Integrate_A07_Asset_Package/case_study_source_filename_mapping.csv")
    
    analyze_assets_dependency_free(INPUT_DIR, RELEVANCE_CSV, MAPPING_CSV)
