import pandas as pd

import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--log_file", type=str, required=True)

args = parser.parse_args()

log_file = args.log_file

with open(log_file, "r") as f:
    lines = f.readlines()

cnt = 0
acc_list = []
novelty_list = []
wacc_list = []
validty_list = []
avg_acc = []
similarity_list = []
wsr_list = []

final_data = []

name = ""
for line in lines:
    if "=== " in line:
        if cnt%3 == 0 and cnt != 0:
            avg_acc.append(round(sum(acc_list) / len(acc_list), 2))
            final_data.append((f"avg_acc", avg_acc[-1]))
            acc_list = []
            novelty_list = []
            wacc_list = []
            validty_list = []
            similarity_list = []
            wsr_list = []
        name = line.replace("=== Evaluating ", "").replace(" ===\n", "").strip()
        cnt += 1
        print(line)
        continue
    else:
        if "Accuracy: " in line:
            acc = line.replace("Accuracy: ", "").strip()
            # 保留两位小数
            acc = round(float(acc), 2)
            acc_list.append(acc)
            final_data.append((f"{name}_acc", acc))
            continue
        elif "Novelty: " in line:
            novelty = line.replace("Novelty: ", "").strip()
            novelty = round(float(novelty), 2)
            novelty_list.append(novelty)
            final_data.append((f"{name}_novelty", novelty))
            wacc_list.append(round(acc_list[-1] * novelty, 2))
            final_data.append((f"{name}_wacc", wacc_list[-1]))
            continue
        elif "Validty: " in line:
            validty = line.replace("Validty: ", "").strip()
            validty = round(float(validty), 2)
            validty_list.append(validty)
            final_data.append((f"{name}_validty", validty))
            continue
        elif "Success Rate: " in line:
            success_rate = line.replace("Success Rate: ", "").strip()
            success_rate = round(float(success_rate), 2)
            acc_list.append(success_rate)
            final_data.append((f"{name}_success_rate", success_rate))
            continue
        elif "Similarity: " in line:
            similarity = line.replace("Similarity: ", "").strip()
            similarity = round(float(similarity), 2)
            similarity_list.append(similarity)
            wsr_list.append(round(similarity * acc_list[-1], 2))
            final_data.append((f"{name}_similarity", similarity))
            final_data.append((f"{name}_wsr", wsr_list[-1]))
            continue
        
        
        
if cnt%3 == 0:
    avg_acc.append(round(sum(acc_list) / len(acc_list), 2))
    final_data.append(("avg_acc", avg_acc[-1]))

all_avg_acc = round(sum(avg_acc) / len(avg_acc), 2)
final_data.append((f"all_avg_acc", all_avg_acc))

df = pd.DataFrame(final_data, columns=["name", "value"])

save_file = log_file.replace(".log", ".csv")
df.to_csv(save_file, index=False)

def extract_evaluation_data(log_file_path):
    """
    从log文件中提取评估数据和对应的index_true
    
    Args:
        log_file_path: log文件路径
        
    Returns:
        dict: 包含评估类型和对应index_true的字典
    """
    evaluation_dict = {}
    current_eval = None
    
    with open(log_file_path, 'r') as f:
        for line in f:
            line = line.strip()
            
            # 检查是否是新的评估部分
            if line.startswith('=== Evaluating'):
                current_eval = line.split('Evaluating')[1].strip().split('===')[0].strip()
                evaluation_dict[current_eval] = None
                
            # 提取index_true
            elif line.startswith('index_true:'):
                if current_eval:
                    # 将字符串形式的列表转换为实际的列表
                    index_list = eval(line.split('index_true:')[1].strip())
                    evaluation_dict[current_eval] = index_list
                    
    return evaluation_dict

# 使用示例
result = extract_evaluation_data(log_file)
print(result)