from meta_researcher.tool.tools.search_engine.log import logger
import re

import pandas as pd
from bs4 import BeautifulSoup, Tag
from bs4.filter import SoupStrainer

CONTENT_PROCESS_MAP = {}
DYNAMIC_WEBSITES = set()

URL_BLACK_LISTS = [
    "pdf",
    "douyin.com",
    "taobao",
    "toutiao.com",
    "weibo",
    "xiaomiui.net",
    "ask.zol.com.cn",
    "tieba.baidu.com",
    "detail.zol.com.cn",
    "mobile.zol.com.cn",
    "www.bilibili.com/video",
    "car.autohome.com.cn/",
    "dev.mi.com",
    "iot.mi.com",
    "www.mi.com/global",
    "carwith.link.mi.com",
    "ngabbs.com",
    "aiqicha.baidu.com",
]

AUTH_STOCK_WEBSITES = [
    "quote.eastmoney.com",
    "q.stock.sohu.com",
    "finance.sina.com.cn",
    "www.hexun.com",
    "cn.investing.com",
    "www.cninfo.com.cn",
]

AUTH_WEATHER_WEBSITES = [
    "www.weather.com.cn",
    "m.weather.com.cn",
    "www.msn.cn",
    "tianqi.moji.com",
    "www.tianqi.com",
]
DYNAMIC_WEBSITES.update(AUTH_STOCK_WEBSITES)
DYNAMIC_WEBSITES.update(AUTH_WEATHER_WEBSITES)


def find_all_tag(html: BeautifulSoup, filter: list[dict | list]):
    all_filters: list[SoupStrainer] = []
    for f in filter:
        if isinstance(f, list):
            all_filters.append(SoupStrainer(f))
        else:
            for k, v in f.items():
                all_filters.append(SoupStrainer(attrs={k: v}))

    def _check_tag(tag: Tag):
        for f in all_filters:
            if f.matches_tag(tag):
                return True
        return False

    return html.find_all(_check_tag)


def remove_html_content(html: BeautifulSoup, filter: list):
    for f in find_all_tag(html, filter):
        f.extract()
    return html


def get_html_content(html: BeautifulSoup, filter: list, min_match=1):
    main_content = find_all_tag(html, filter)
    if len(main_content) >= min_match:
        ret = BeautifulSoup()
        for c in main_content:
            ret.append(c)
        return ret
    return html


def extract_main_content(raw_content, url: list[str] = [""], process_map: dict = {}):
    soup = BeautifulSoup(raw_content, features="html.parser")
    special_url = ""
    try:
        for p, func in process_map.items():
            if any(p in u for u in url):
                special_url = p
                soup = func(soup, raw_content, url)
                if isinstance(soup, str):
                    return soup.strip(), special_url, None
                break

        soup = get_html_content(soup, [["article"]])

        flags = re.I
        general_filter = [
            ["script", "style", "nav", "head", "img", "footer", "iframe", "header"],
            {
                "class": [
                    "reprint",
                    "prev-next articles",
                    "progress-bar",
                    "toc-title",
                    "post-info",
                    re.compile("topbar", flags=flags),
                    re.compile("nav", flags=flags),
                    re.compile("header", flags=flags),
                    re.compile("sidebar", flags=flags),
                    re.compile("footer", flags=flags),
                    re.compile("feedback", flags=flags),
                    re.compile("share", flags=flags),
                    re.compile("breadcrumb", flags=flags),
                ],
                "id": [
                    re.compile("nav", flags=flags),
                    re.compile("breadcrumb", flags=flags),
                    "footer",
                ],
            },
        ]
        soup = remove_html_content(soup, general_filter)

    except Exception as e:
        logger.warning(f"error of extract main content: {e}")

    try:
        text = soup.get_text(separator="", strip=True)  # type: ignore
    except:
        text = ""
    return text.strip(), special_url, soup


def register_website_postprocess(host: list[str] | str, dynamic_website=False):
    def register_task_cls(cls):
        hosts = [host] if isinstance(host, str) else host
        for h in hosts:
            pattern = h.strip()
            if pattern in CONTENT_PROCESS_MAP:
                logger.warning(f"Cannot register duplicate website postprocess ({h})")
            CONTENT_PROCESS_MAP[pattern] = cls
            if dynamic_website:
                DYNAMIC_WEBSITES.add(pattern)
        return cls

    return register_task_cls


@register_website_postprocess(host="juejin.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    掘金
    """
    filter = {
        "class": ["meta-box", "author-info-block"],
    }
    return remove_html_content(body, [filter])


@register_website_postprocess(host="chejiahao.autohome.com.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    汽车之家-车家号
    """
    filter = {
        "class": [
            "topbar",
            "infoHeader",
            "footer_auto",
            "articleTag",
            "statement",
            "carType",
            "tagBot",
            "share",
            "default-all",
            "great_video_wrapper",
            "rightBox",
            "homeRight",
            "defaultAll",
            "pswp",
            "look_full_text",
            "repotr",
            re.compile("AuthorInfo"),
        ],
        "id": ["floatDiv", "pubilc"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.autohome.com.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    汽车之家-车问答
    """
    main_content = body.find_all(class_=re.compile("core_content__"))
    if len(main_content) > 0:
        return main_content[0]
    else:
        return body


@register_website_postprocess(host="www.yiche.com/baike")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    易车网
    https://m.yiche.com/baike/28145514.htm
    """
    body = get_html_content(body, [{"class": ["tit", "content"]}], min_match=2)
    filter = {
        "class": ["endPopularize", "tag-box"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="skip_it.www.xiaomiev.com/car-config")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    小米汽车配置表
    """
    tables = pd.read_html(raw_content)  # type: ignore

    p = tables[0]
    raw, col = p.shape

    products = []
    prices = []
    for i in range(1, col):
        product, price = p.iloc[0, i].split("¥")  # type: ignore
        products.append(product)
        prices.append("¥" + price)  # type: ignore

    p.iloc[0, 1:] = prices  # type: ignore

    all_tables = pd.concat(tables)
    all_tables.columns = ["参数"] + products

    rets = ""
    rets += "Xiaomi SU7 参数配置表\n"
    rets += "注：● 表示标配 ○ 表示选配 - 表示无此配置\n"
    rets += "\n" + all_tables.to_markdown(index=False)

    rets = re.sub(r"[ ]{3,}", "  ", rets)
    rets = re.sub(r"-{4,}", "----", rets)
    return rets


@register_website_postprocess(host="www.mi.com/service")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    小米service
    """
    filter = {
        "class": [
            "site-info",
            "info-text",
            "breadcrumbs",
            re.compile("logo"),
            re.compile("modal"),
        ],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="news.qq.com/rain")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    腾讯网
    """
    filter = {
        "class": ["article-top-content", "image-carousel"],
        "id": [re.compile("Author")],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.sohu.com/a")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    搜狐网
    https://www.sohu.com/a/719751035_121734548
    """
    return get_html_content(
        body,
        [{"class": ["content-main--title", "content-main-detail", "article"]}],
        min_match=2,
    )


@register_website_postprocess(host="www.163.com/dy/article")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    163 news
    https://www.163.com/dy/article/HVM1QFU40552ZFRL.html
    """
    return get_html_content(body, [{"class": ["post_title", "post_body"]}], min_match=2)


@register_website_postprocess(host="www.xchuxing.com/article")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    新出行
    https://www.xchuxing.com/article/142618
    """
    body = get_html_content(body, [{"class": ["content-main"]}], min_match=1)
    filter = {
        "class": [
            "original-type",
            "hot-type",
            "flex",
            "cate-tags",
            "last-edit",
            "acticle-status",
        ]
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="finance.sina.com.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    新浪新闻
    https://finance.sina.com.cn/tob/2024-12-17/doc-inczuerp4811691.shtml
    """
    body = get_html_content(body, [{"class": ["main-title", "article-content-left"]}], min_match=2)
    filter = {
        "class": [
            "wx-info",
            "keywords",
            "hero-items",
            re.compile("related"),
            "appendQr_normal_txt",
            "vip-class",
            "app-viplive",
            "most-read",
        ]
    }
    remove_html_content(body, [filter])
    return body


# @register_website_postprocess(host="stock.finance.sina.com.cn", dynamic_website=True)
# def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
#     """
#     https://stock.finance.sina.com.cn/hkstock/quotes/01810.html
#     """
#     filter = [
#         {
#             "class": ["stock_detail"],
#         }
#     ]
#     body = get_html_content(body, filter, min_match=1)
#     filter = [
#         {
#             "class": ["Financeapp-pics", "appCheckLink", "self_select", "deta02", "deta04", "deta05", "suigu_content"],
#         }
#     ]
#     remove_html_content(body, filter)
#     return body


@register_website_postprocess(host="club.autohome.com.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    汽车之家论坛
    """
    filter = {
        "class": [
            "toolbar",
            "post-handle",
            "post-site",
            "pagination-container",
            "go-top-wrap",
            "diversion-fixed-wrap",
            re.compile("bbs|reply|comment"),
        ]
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host=["auto.zol.com.cn"])
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    ZOL论坛
    https://auto.zol.com.cn/928/9287934.html
    """
    body = get_html_content(body, [{"id": ["article-content"]}], min_match=1)
    filter = {
        "class": ["crawl-none"],
        "id": [
            "description",
            "author",
            "url",
            "isBasedOnUrl",
            "wordCount",
            "genre",
            "isOriginal",
            "indexUrl",
        ],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="post.smzdm.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    什么值得买
    https://post.smzdm.com/p/66006070/
    """
    filter = {"class": ["xilie"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.smzdm.com/zy/detail")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    什么值得买
    https://www.smzdm.com/zy/detail/avzlp51/
    """
    filter = {
        "class": ["crumbs", "actical-detail", "module-content", "see-all"],
        "id": ["feed-side"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.smzdm.com/p")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.smzdm.com/p/94954346/
    """
    filter = [
        {
            "articleid": True,
            "class": ["baoliao-block", "introduce-item"],
        }
    ]
    body = get_html_content(body, filter)
    filter = [
        ["a"],
        {
            "class": ["baoliao-tips"],
        },
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="www.thepaper.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    澎湃新闻
    https://www.thepaper.cn/newsDetail_forward_29670465
    """
    body = get_html_content(body, [{"class": ["custom-text", "main-title"]}], min_match=2)
    filter = {"class": ["praiseIcon", re.compile("index_beian")]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="jingyan.baidu.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    百度经验
    """
    filter = {
        "class": [
            "exp-info",
            "wgt-thumbs",
            "prompt",
            "origin-notice",
            "read-whole-mask",
        ]
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.xiaomitong123.com/baike")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    小米通百科
    https://www.xiaomitong123.com/baike/12133.html
    """
    body = get_html_content(body, [{"class": ["news-article-box"]}], min_match=1)
    filter = {"class": ["info", "turn-pages"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="news.mydrivers.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    快科技
    https://news.mydrivers.com/1/1019/1019616.htm
    """
    body = get_html_content(body, [{"class": ["news_bt", "news_info"]}], min_match=2)
    filter = {"class": ["zhuanzai", "url", "jubao_text"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.maigoo.com/goomai")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    www.maigoo.com
    """
    filter = {
        "class": ["mod_desc", "md_citiao_cols"],
    }
    body = get_html_content(body, [filter], min_match=2)
    return body


@register_website_postprocess(host="ithome.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    IT之家
    """
    filter = {
        "class": ["cv", "info", "newserror", "related_post", "ad-tips"],
        "id": ["top", "nav", "fls"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="product.pconline.com.cn/itbk")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    IT百科
    https://product.pconline.com.cn/itbk/top/1606/16060944.html
    """
    body = get_html_content(body, [{"class": ["art-hd", "art-bd"]}])
    filter = {"class": ["pubDate"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.chenglix.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    程力
    https://www.chenglix.com/xingyezixun/619415.html
    """
    return get_html_content(body, [{"class": "info-content-body"}])


@register_website_postprocess(host="www.chiphell.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    www.chiphell.com
    https://www.chiphell.com/thread-2658957-1-1.html
    """
    filter = [
        ["a", "ignore_js_op"],
        {
            "class": ["pls", "pti", "pob", "pi", "pstatus"],
            "id": ["toptb", "qmenu_menu", "hd", "ft", "pt", "pgt", "f_pst"],
        },
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="www.cnblogs.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    博客园
    """
    filter = {"class": ["postDesc"], "id": ["footer", "sideBar", "navigator"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="baijiahao.baidu.com")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    百家号
    https://baijiahao.baidu.com/s?id=1797748124440091903&wfr=spider&for=pc
    """
    body = get_html_content(body, [{"data-testid": "article"}])
    filter = {"data-testid": ["report-btn"]}
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.gov.cn")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    政府网站
    https://www.gov.cn/zhengce/zhengceku/202307/content_6891752.htm
    """
    filter = {
        "class": [
            "pages_print",
            "editor",
            "mhide",
            "related",
            "BreadcrumbNav",
            "header",
            "siteurl",
            "footer_wrap",
            "back_top",
            re.compile("yh"),
        ],
        "id": ["pageBreak", "div_div"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="zhidao.baidu.com/question")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    百度知道
    https://zhidao.baidu.com/question/220675203.html
    """
    filter = [{"class": ["ask-title"], "id": [re.compile("best-content")]}]
    body = get_html_content(body, filter)
    filter = [
        {
            "class": ["wgt-best-mask"],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="aikahao.xcar.com.cn/item")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://aikahao.xcar.com.cn/item/1715122.html
    """
    filter = [
        {
            "class": ["detail_list_p", "detail_title"],
        }
    ]
    body = get_html_content(body, filter, min_match=2)
    return body


@register_website_postprocess(host="cul.sohu.com/a")
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    www.maigoo.com
    """
    filter = {
        "class": ["content-main"],
    }
    body = get_html_content(body, [filter], min_match=1)
    filter = {
        "class": ["content-main-desc--see", "statement", "return-home", "comment-box"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.xiaomi.cn/post", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.xiaomi.cn/post/2435262
    """
    return body


@register_website_postprocess(host="www.xiaomiyoupin.com/detail", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.xiaomiyoupin.com/detail?gid=139923
    """
    filter = {
        "class": ["sku-container"],
    }
    body = get_html_content(body, [filter], min_match=1)
    filter = {
        "class": [
            "text-content",
            "promotion-box",
            "service-line",
            "address-line",
            "size-line",
            "count-line",
            "btn-line",
            "market-price",
        ],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.mi.com/shop", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.mi.com/shop/buy/detail?product_id=20559&cfrom=search
    """
    filter = {
        "class": ["product-con"],
    }
    body = get_html_content(body, [filter], min_match=1)
    filter = [
        ["del"],
        {
            "class": ["text-content", "product-address", "after-sale-info"],
        },
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="www.mi.com", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.mi.com/prod/redmi-turbo-4
    """
    filter = [
        {
            "class": ["site-info", "breadcrumbs", "modal-hide"],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="xiaoai.mi.com", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://xiaoai.mi.com/?from=aihome.run
    """
    return body


@register_website_postprocess(host="www.dongchedi.com/article", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.dongchedi.com/article/7450350796065849893
    """
    filter = {
        "class": ["article-bottom"],
    }
    remove_html_content(body, [filter])
    return body


@register_website_postprocess(host="www.weather.com.cn/weather", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.weather.com.cn/weather/101200101.shtml
    """
    filter = [
        {
            "class": [
                "article-bottom",
                "weather_li_head",
                "around",
                "hd-img",
                "greatEvent",
                "right-div",
                "btn",
                "livezs",
                "tq_zx",
            ],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="tianqi.2345.com", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://tianqi.2345.com/today-57494.htm
    """
    filter = [
        {
            "class": [
                "unuse-explorer-pop",
                "fix-qrcode-box",
                "location-change",
                "mess-box",
                "mod-left-wrap",
                "bottom-info",
                "right-div",
            ],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="www.icauto.com.cn", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.icauto.com.cn/ask/cars/144460.html
    https://www.icauto.com.cn/car/0/8069.html
    """
    filter = [
        {
            "class": ["article-body-main"],
        }
    ]
    body = get_html_content(body, filter)
    filter = [
        {
            "class": [
                "topwarp",
                "foot",
                "ask-tuijian",
                "tb",
                "col-c",
                "ask-module-top-title",
            ],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="news.yiche.com", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://news.yiche.com/hao/wenzhang/89008247/
    """
    filter = [{"class": ["news-detail"]}]
    body = get_html_content(body, filter, min_match=1)
    filter = [
        {
            "class": [
                "news-detail-position",
                "news-detail-seotips",
                "news-detail-describe",
                "news-detail-icons",
            ],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="baike.baidu.com/item", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://baike.baidu.com/item/2024%E5%B9%B4%E7%BE%8E%E5%9B%BD%E6%80%BB%E7%BB%9F%E9%80%89%E4%B8%BE/62264630
    """
    filter = [{"class": [re.compile("topToolsWrap"), "copyright"], "data-tag": ["ref"]}]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="web.vip.miui.com", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://web.vip.miui.com/page/info/mio/mio/detail?postId=824796
    """
    return body


@register_website_postprocess(host="wh.bendibao.com", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://wh.bendibao.com/news/2024124/183936.shtm
    """
    filter = [{"class": ["top-title", "article-content"]}]
    body = get_html_content(body, filter, min_match=2)
    return body


@register_website_postprocess(host="www.guancha.cn", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.guancha.cn/sports/2022_06_17_645050.shtml
    """
    filter = [{"class": ["all-txt"]}]
    body = get_html_content(body, filter, min_match=1)
    return body


@register_website_postprocess(host="tradingeconomics.com", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://zh.tradingeconomics.com/russia/gdp-growth-annual
    """
    filter = [
        {
            "class": ["table"],
            "id": ["sidebar"],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="www.pcauto.com.cn", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.pcauto.com.cn/ask/678604.html
    """
    filter = [{"class": ["ask-detail-content", "artText"]}]
    body = get_html_content(body, filter, min_match=1)
    filter = [
        {
            "class": ["notice"],
        }
    ]
    remove_html_content(body, filter)
    return body


@register_website_postprocess(host="user.guancha.cn/main", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://user.guancha.cn/main/content?id=1363406
    """
    filter = [["h1"], {"class": ["article-txt-content"]}]
    body = get_html_content(body, filter, min_match=1)
    return body


@register_website_postprocess(host="www.csai.cn", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://www.csai.cn/licai/1422880.html
    """
    filter = [{"id": ["title", "wenzhang_main"]}]
    body = get_html_content(body, filter, min_match=2)
    return body


@register_website_postprocess(host="pconline.com", dynamic_website=False)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://mobile.pconline.com.cn/1778/17781672.html
    """
    filter = [["h2"], {"class": ["context-box"]}]
    body = get_html_content(body, filter, min_match=2)
    return body


@register_website_postprocess(host="zhihu.com", dynamic_website=True)
def _process_webpage(body: BeautifulSoup, raw_content=None, url=None):
    """
    https://zhuanlan.zhihu.com/p/1890804221336614802
    """
    return body
