import argparse
import re

def process_file_info(input_lines):
    result = []
    
    # 正则表达式模式
    pattern = re.compile(r"(?P<path>.+)\n"
                         r"jailbreak success rate:\s(?P<success_rate>\d+\.\d+)%\n"
                         r"success:\s(?P<success>\d+)\s+all:\s(?P<total>\d+)")
    
    # 处理每组三行信息
    for info in input_lines:
        match = pattern.match("\n".join(info))
        
        if match:
            path = match.group("path")
            success_rate = float(match.group("success_rate"))
            success = match.group("success")
            total = match.group("total")
            
            # 提取模型名称（例如 qwen2.5_attack_results 中的 qwen2.5）
            model_name = path.split("/")[1].split("_")[0]
            
            # 提取selfx的位置，可以是数字或包含空格的名称
            self_name = re.search(r"attack_results/([^/]+)/", path)
            self_name = self_name.group(1) if self_name else "N/A"
            
            # 提取参数量（例如 72B）
            param_size = re.search(r"(\d+B)", path)
            param_size = param_size.group(1) if param_size else "N/A"
            
            # 提取参数量后的数字（例如 72B后面的0）
            num_after_param = path.split("/")[-2]
            
            result.append({
                "model_name": model_name,
                "self_name": self_name,
                "param_size": param_size,
                "num_after_param": num_after_param,
                "success_rate": success_rate,
                "success": success,
                "total": total
            })
    
    return result

def read_file_in_chunks(file_path, chunk_size=3):
    """逐步读取文件，每次读取 chunk_size 行"""
    with open(file_path, 'r') as file:
        while True:
            lines = [file.readline().strip() for _ in range(chunk_size)]
            # 如果读到空行，则表示文件结束
            if not lines[0]:
                break
            yield lines

def process_best_results(results):
    """根据相同的组合模型名称、self_name、param_size 和 num_after_param 取最大成功率的结果"""
    best_results = {}
    
    for item in results:
        key = (item['model_name'], item['self_name'], item['param_size'], item['num_after_param'])
        
        # 如果该组合还没有出现，或者当前的成功率更高，则更新
        if key not in best_results or item['success_rate'] > best_results[key]['success_rate']:
            best_results[key] = item
    
    return list(best_results.values())

def main():
    # 设置命令行参数解析
    parser = argparse.ArgumentParser(description="Process file with 3-line data chunks.")
    parser.add_argument('--file', help="The path to the file to be processed.")
    args = parser.parse_args()
    
    # 读取文件并处理
    file_path = args.file
    all_results = []
    
    for lines in read_file_in_chunks(file_path):
        result = process_file_info([lines])
        all_results.extend(result)
    
    # 获取每组组合的最佳成功率结果
    best_results = process_best_results(all_results)
    
    # 打印最佳结果
    for item in best_results:
        print(item)

if __name__ == "__main__":
    main()
