import wandb
import pandas as pd

# --- 批量处理配置 ---
# 在这里配置需要处理的所有实验
EXPERIMENTS_TO_PROCESS = [
    # {
    #     "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/1o6qud6h",
    #     "step_offset": 60,
    #     "new_run_name": "add1k-new-60steps-continue-12k"
    # },
    # {
    #     "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/5xptihva", 
    #     "step_offset": 60,
    #     "new_run_name": "add1k-new-60steps-continue-overlong-filter"
    # },
    # {
    #     "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/xw82ja08",
    #     "step_offset": 60, 
    #     "new_run_name": "add1k-new-60steps-continue"
    # },
    # {
    #     "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/0gvs1fpt",
    #     "step_offset": 60,
    #     "new_run_name": "add1k-new-60steps-continue-max16k"
    # },
    # {
    #     "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/m0ur9f39",
    #     "step_offset": 130,
    #     "new_run_name": "add1k-60step-overlong70step-add-round2"
    # },
    {
        "run_path": "astrid_tuning_llm/verl-qwen3-4b-base/85jv43oc",
        "step_offset": 150,
        "new_run_name": "add1k-new-60steps-overlong70step-addround2-20step-continue"
    }
]
# --- 脚本开始 ---
print(f"正在连接 W&B API...")
api = wandb.Api()

# 处理所有配置的实验
total_experiments = len(EXPERIMENTS_TO_PROCESS)
print(f"准备处理 {total_experiments} 个实验...")

for i, exp_config in enumerate(EXPERIMENTS_TO_PROCESS, 1):
    run_path = exp_config["run_path"]
    step_offset = exp_config["step_offset"]
    new_run_name = exp_config["new_run_name"]
    
    print(f"\n{'='*60}")
    print(f"正在处理第 {i}/{total_experiments} 个实验: {new_run_name}")
    print(f"原始实验路径: {run_path}")
    print(f"步数偏移: {step_offset}")
    print(f"{'='*60}")
    
    try:
        # 1. 获取原始实验的数据
        print(f"正在从 '{run_path}' 下载历史数据...")
        original_run = api.run(run_path)
        
        # 将所有历史数据下载到 pandas DataFrame
        history_df = original_run.history()
        print(f"成功下载 {len(history_df)} 个数据点。")
        
        # 2. 初始化一个新的实验，用于存放修正后的数据
        project_name = original_run.project
        entity_name = original_run.entity
        new_run = wandb.init(
            project=project_name,
            entity=entity_name,
            name=new_run_name,
            config=original_run.config  # 复制原始实验的配置
        )
        print(f"创建了一个新的实验: {new_run.url}")
        
        # 3. 遍历旧数据，修正 step 并上传到新实验
        print("正在上传修正后的数据...")
        
   
        for index, row in history_df.iterrows():
            # 提取所有需要记录的指标 (排除 wandb 内部列，如 _step, _runtime)
            metrics_to_log = {key: value for key, value in row.items() if not key.startswith('_')}
    
        
            # 计算新的 step
            # 确保原始 _step 列存在且为数字
            if '_step' in row and pd.notna(row['_step']):
                new_step = int(row['_step']) + step_offset
                # 使用修正后的 step 来记录数据
               
                new_run.log(metrics_to_log, step=new_step)
        
        # 4. 结束新的实验记录
        new_run.finish()
        print(f"✅ 实验 '{new_run_name}' 处理完成！")
        
    except wandb.errors.CommError as e:
        print(f"❌ 错误：找不到实验 '{run_path}'。请检查您的 entity, project, 和 run_id 是否正确。")
        print(f"错误详情: {e}")
        continue
    except Exception as e:
        print(f"❌ 处理实验 '{new_run_name}' 时发生未知错误: {e}")
        continue

print(f"\n{'='*60}")
print(f"🎉 批量处理完成！共处理了 {total_experiments} 个实验。")
print("请刷新您的 W&B 项目页面，新的实验应该已经出现了。")
print(f"{'='*60}")