import pandas as pd

def calculate_label_statistics(csv_path):
    """
    计算每个标签下的期刊数目和记录数目
    
    Args:
        csv_path: CSV文件路径
    """
    # 读取CSV文件
    df = pd.read_csv(csv_path)
    
    # 分割Field列中的标签
    df['Field'] = df['Field'].str.split(';')
    
    # 展开标签列
    df_exploded = df.explode('Field')
    
    # 计算每个标签下的期刊数目
    journal_counts = df_exploded.groupby('Field')['Journal'].nunique().reset_index()
    journal_counts.columns = ['Field', 'Journal_Count']
    
    # 计算每个标签下的记录数目
    record_counts = df_exploded.groupby('Field')['Complete_Records'].sum().reset_index()
    record_counts.columns = ['Field', 'Record_Count']
    
    # 合并结果
    label_statistics = pd.merge(journal_counts, record_counts, on='Field')
    
    return label_statistics

def main():
    csv_path = 'Dataset/statistics/classified.csv'
    label_statistics = calculate_label_statistics(csv_path)
    
    # 打印结果
    print("\n=== 标签统计信息 ===")
    print(label_statistics.to_string(index=False))
    
    # 保存结果到CSV
    output_path = 'Dataset/statistics/journal_label.csv'
    label_statistics.to_csv(output_path, index=False)
    print(f"\n结果已保存到: {output_path}")

if __name__ == "__main__":
    main() 