# 用于后处理GPT返回的内容，进一步筛选有用的字段
import json
import re
from utils import read_jsonl, save_jsonl
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--json-path", type=str, required=True)
parser.add_argument("--output-path", type=str, required=True)
args = parser.parse_args()

if __name__ == '__main__':
    print("-----------------------")
    print("Slogan post gpt process.")
    data = []
    read_jsonl(args.json_path, data)

    new_data = []
    error_count = 0
    empty_count = 0
    for d in data:
        try:
            content = json.loads(d["gpt_decom"])
            if len(content["内容"]) == 0:
                empty_count += 1
                continue
            d["decom_dic"] = {"内容": content["内容"]}
            new_data.append(d)
        except:
            error_count += 1
    print("error count", error_count)
    print("empty count", empty_count)
    print("left data", len(new_data))

    save_jsonl(new_data, args.output_path)
    