import json
import os
from collections import Counter


def load_json_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)


def calculate_factor_statistics():
    current_dir = os.path.dirname(os.path.abspath(__file__))
    base_dir = os.path.dirname(current_dir)
    factor_file = os.path.join(base_dir, 'result', 'lamp3_only_factor.json')
    feature_file = os.path.join(base_dir, 'result', 'lamp3_feature.json')

    factor_data = load_json_file(factor_file)
    feature_data = load_json_file(feature_file)

    statistics = {}

    user_profiles = {}
    for user_data in feature_data:
        user_id = user_data.get('user_id')
        if user_id:
            user_profiles[user_id] = user_data.get('profile')

    for user_data in factor_data:
        user_id = user_data.get('user_id')
        factorization = user_data.get('factorization')
        
        if user_id not in user_profiles:
            continue
            
        user_profile = user_profiles[user_id]
        
        user_total_samples = len(user_profile)
        
        overall_scores = []
        overall_score_counts = Counter()
        
        for profile_item in user_profile:
            score = int(profile_item.get('score', 0))
            if score:
                overall_scores.append(score)
                overall_score_counts[score] += 1
                
        user_average_score = sum(overall_scores) / len(overall_scores) if overall_scores else 0
        
        overall_score_stats = {}
        if user_total_samples > 0:
            sorted_overall_scores = sorted(overall_score_counts.items(), key=lambda x: x[1], reverse=True)
            for score, count in sorted_overall_scores:
                probability = (count / user_total_samples) * 100
                overall_score_stats[score] = f"{count}/{user_total_samples}({probability:.1f}%)"
        
        user_stats = {
            "total_profile_samples": user_total_samples,
            "average_score": round(user_average_score, 2),
            "score_distribution": overall_score_stats,
            "factors": {}
        }
        
        factors = factorization.get('selected_factors')
        factor_features = factorization.get('factors')
        
        for factor in factors:
            features = factor_features.get(factor)
            
            factor_items = []
            for feature in features:
                item_id = feature.get('item_id')
                if item_id:
                    factor_items.append(item_id)
            
            score_counts = Counter()
            total_samples = 0
            factor_scores = []
            
            for profile_item in user_profile:
                item_id = profile_item.get('item_id')
                if item_id in factor_items:
                    score = int(profile_item.get('score', 0))
                    if score:
                        score_counts[score] += 1
                        total_samples += 1
                        factor_scores.append(score)
            
            score_stats = {}
            
            if total_samples > 0:
                sorted_scores = sorted(score_counts.items(), key=lambda x: x[1], reverse=True)
                
                for score, count in sorted_scores:
                    probability = (count / total_samples) * 100
                    score_stats[score] = f"{count}/{total_samples}({probability:.1f}%)"
                
                factor_average_score = sum(factor_scores) / len(factor_scores)
            else:
                factor_average_score = 0
            
            user_stats["factors"][factor] = {
                "total_samples": total_samples,
                "average_score": round(factor_average_score, 2),
                "score_distribution": score_stats
            }
        
        statistics[user_id] = user_stats
    
    return statistics


def save_statistics(statistics):
    current_dir = os.path.dirname(os.path.abspath(__file__))
    base_dir = os.path.dirname(current_dir)
    output_file = os.path.join(base_dir, 'result', 'lamp3_factor.json')
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(statistics, f, indent=2)
    
    print(f"Factor statistics saved to {output_file}")


if __name__ == "__main__":
    statistics = calculate_factor_statistics()
    save_statistics(statistics)