"""
this python script is used to fix the source of the dataset which cannot get some attributes while spidering.
"""
import pandas as pd
import os
from datetime import datetime
import argparse
import json
def extract_empty_records(input_path: str, output_path: str):
    """
    提取CSV文件中包含空值的记录
    
    Args:
        input_path: 输入CSV文件路径
        output_path: 输出CSV文件路径
    """
    # 读取CSV文件
    df = pd.read_csv(input_path)
    
    # 找出包含空值的行
    empty_records = df[df.isna().any(axis=1)]
    
    # 如果找到空记录
    if not empty_records.empty:

        # 保存结果
        empty_records.to_csv(output_path, index=False)
        return empty_records
    else:
        print("No empty records found!")
        return None

# 使用示例
if __name__ == "__main__":
    
    
    input_path = './CNS_cover/Data/science_0221.csv'
    output_path = './CNS_cover/Data/science_empty_records.csv'
    
    extract_empty_records(input_path, output_path)