import json
import openpyxl

INPUT = "./single_turn_slogan_validation_set_20240322_standard_53B_chat_answer_bc2_33b_V913_final.json"
TYPE = 0 # 0：多个答案按多个回答计数 1：多个答案取平均值 

with open(INPUT, "r") as f:
    data = json.load(f)

valid_num = 0

score_info = {
    "汉字":{
        "大于": {},
        "小于": {},
        "严格等于": {}
    },
    "英文单词":{
        "大于": {},
        "小于": {},
        "严格等于": {}
    }
}

perfect_info = {
    "汉字":{
        "大于": {},
        "小于": {},
        "严格等于": {}
    },
    "英文单词":{
        "大于": {},
        "小于": {},
        "严格等于": {}
    }
}

for d in data:
    if d["rewards"]["content"][0] != 0 and len(d["rewards"]["count"]) == len(d["rewards"]["content"]):
        valid_num += 1
        lidu_tag = d["tags"]["粒度"]
        relation_tag = d["tags"]["关系"]
        count_tag = d["tags"]["字数"] 
        if count_tag not in score_info[lidu_tag][relation_tag].keys():
            score_info[lidu_tag][relation_tag][count_tag] = []
        if count_tag not in perfect_info[lidu_tag][relation_tag].keys():
            perfect_info[lidu_tag][relation_tag][count_tag] = 0

        if TYPE == 0:
            for score in d["rewards"]["count"]:
                score_info[lidu_tag][relation_tag][count_tag].append(score)
                if score == 1.0:
                    perfect_info[lidu_tag][relation_tag][count_tag] += 1
        elif TYPE == 1:
            score = sum(d["rewards"]["count"])/len(d["rewards"]["count"])
            score_info[lidu_tag][relation_tag][count_tag].append(score)
            if score == 1.0:
                perfect_info[lidu_tag][relation_tag][count_tag] += 1

for lidu_type in score_info.keys():
    for relation_type in score_info[lidu_type].keys():
        for count_tag in score_info[lidu_type][relation_type].keys():
            score_info[lidu_type][relation_type][count_tag] = [sum(score_info[lidu_type][relation_type][count_tag]), len(score_info[lidu_type][relation_type][count_tag])]

# print("测试文件：", INPUT)      
# print("有效且保留的回答个数：", valid_num)
# print("TYPE：", TYPE)
# for lidu_type in score_info.keys():
#     for relation_type in score_info[lidu_type].keys():
#         print(lidu_type," ",relation_type,"个数：",sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()]))
#         print(lidu_type," ",relation_type,"均分：",sum([score_info[lidu_type][relation_type][count_tag][0] for count_tag in score_info[lidu_type][relation_type].keys()])/\
#                                                    sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()]) )
#         print(lidu_type," ",relation_type,"满分：",float(sum([perfect_info[lidu_type][relation_type][count_tag] for count_tag in perfect_info[lidu_type][relation_type].keys()]))/\
#                                                    sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()]) )
#         for count_tag in score_info[lidu_type][relation_type].keys():
#             print(lidu_type," ",relation_type," ",count_tag,"个数：",score_info[lidu_type][relation_type][count_tag][1])
#             print(lidu_type," ",relation_type," ",count_tag,"均分：",score_info[lidu_type][relation_type][count_tag][0]/score_info[lidu_type][relation_type][count_tag][1])
#             print(lidu_type," ",relation_type," ",count_tag,"满分：",float(perfect_info[lidu_type][relation_type][count_tag])/score_info[lidu_type][relation_type][count_tag][1])
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.append(["类型", "关系", "字数", "个数", "均分", "满分"])
for lidu_type in score_info.keys():
    for relation_type in score_info[lidu_type].keys():
        sheet.append([lidu_type, relation_type, "", sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()]),
                    sum([score_info[lidu_type][relation_type][count_tag][0] for count_tag in score_info[lidu_type][relation_type].keys()])/\
                        sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()]),
                    float(sum([perfect_info[lidu_type][relation_type][count_tag] for count_tag in perfect_info[lidu_type][relation_type].keys()]))/\
                        sum([score_info[lidu_type][relation_type][count_tag][1] for count_tag in score_info[lidu_type][relation_type].keys()])])
        for count_tag in score_info[lidu_type][relation_type].keys():
            sheet.append([lidu_type, relation_type, count_tag, score_info[lidu_type][relation_type][count_tag][1], 
                        score_info[lidu_type][relation_type][count_tag][0]/score_info[lidu_type][relation_type][count_tag][1],
                        float(perfect_info[lidu_type][relation_type][count_tag])/score_info[lidu_type][relation_type][count_tag][1]])
workbook.save('0_0_count.xlsx')