import json
from typing import List, Dict
import os

def load_jsonl_data(file_path: str) -> List[Dict]:
    """加载JSONL文件数据"""
    data = []
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                if line.strip():
                    data.append(json.loads(line.strip()))
    except Exception as e:
        print(f"加载文件 {file_path} 时出错: {e}")
    return data

dirs = ["/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-8k/valid",
         "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/dapo-add1k-remove-upper-refined-repetition-penalty-max9k-redo/valid"]

index = 2000
samples = []
for dir in dirs:
    for file in os.listdir(dir):
        if file.endswith(".jsonl"):
            file_path = os.path.join(dir, file)
            data = load_jsonl_data(file_path)
            model_name = dir.split("/")[-2]
            
            # breakpoint()
            samples.append(
                {
                    "id": index,
                    "model": f"{model_name}-{data[index]['step']}",
                    "prompt": data[index]["input"],
                    "output": data[index]["output"],
                    "correctness": data[index]["score"],
                    # "answer": data[0]["answer"],
                    # "data_source": data[0]["data_source"]
                }
            )

with open("/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/openai/samples_redo.json", "w") as f:
    json.dump(samples, f, ensure_ascii=False, indent=2)

