import os

# 配置路径
root_dir = '/mnt/data/lsy/ZZQ/mmseg_busi_voc_v2'
image_dir = os.path.join(root_dir, 'JPEGImages')
label_dir = os.path.join(root_dir, 'SegmentationClass')
list_dir = os.path.join(root_dir, 'ImageSets/Segmentation')

# 检查的列表文件
list_files = ['train_80.txt', 'val.txt', 'test.txt']

for list_file in list_files:
    list_path = os.path.join(list_dir, list_file)
    if not os.path.exists(list_path):
        print(f'[⚠️] {list_file} 不存在，跳过')
        continue

    with open(list_path, 'r') as f:
        ids = [line.strip() for line in f.readlines()]

    valid_ids = []
    missing = 0

    for img_id in ids:
        img_path = os.path.join(image_dir, f'{img_id}.png')
        label_path = os.path.join(label_dir, f'{img_id}.png')  # 通常 label 是 png
        if os.path.exists(img_path) and os.path.exists(label_path):
            valid_ids.append(img_id)
        else:
            print(f'[❌] 缺失: {img_id}')
            missing += 1

    # 保存清理后的新 list 文件
    new_list_file = os.path.join(list_dir, f'{os.path.splitext(list_file)[0]}_cleaned.txt')
    with open(new_list_file, 'w') as f:
        for vid in valid_ids:
            f.write(f'{vid}\n')

    print(f'[✅] {list_file}: {len(valid_ids)} valid, {missing} missing. Saved to {new_list_file}')
    