import os
import contextlib
from dataclasses import dataclass
import io
import time
import typing
from bs4 import BeautifulSoup
from urllib.parse import urlparse
from markitdown import StreamInfo, MarkItDown
import threading

thread_local = threading.local()


def get_markitdown() -> MarkItDown:
    import onnxruntime as ort

    _default_session_options = ort.capi._pybind_state.get_default_session_options()  # type:ignore

    def get_default_session_options_new():
        _default_session_options.inter_op_num_threads = 1
        _default_session_options.intra_op_num_threads = 1
        return _default_session_options

    ort.capi._pybind_state.get_default_session_options = get_default_session_options_new  # type:ignore
    x = typing.cast(MarkItDown, getattr(thread_local, "markit", None))
    if x is None:
        x = MarkItDown()
        thread_local.markit = x
    return x


base = "https://www.imdb.com"
headers = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:140.0) Gecko/20100101 Firefox/140.0",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "Accept-Language": "en-US,en;q=0.5",
    "Alt-Used": "www.imdb.com",
    "Upgrade-Insecure-Requests": "1",
    "Sec-Fetch-Dest": "document",
    "Sec-Fetch-Mode": "navigate",
    "Sec-Fetch-Site": "cross-site",
    "Priority": "u=0, i",
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}


def format_imdb(text):
    html = BeautifulSoup(text, features="html.parser")
    html = html.find(class_="ipc-page-background")
    assert html is not None

    for i in (
        "hero__photo-link",
        "hero__video-link",
        "hero-media__slate",
        "hero-rating-bar__user-rating",
        "hero-subnav-bar-left-block",
        "hero-subnav-bar-right-block",
        "plot-l",
        "plot-xs_to_m",
        "videos-section",
    ):
        for j in html.find_all(attrs={"data-testid": i}):
            j.decompose()
    for i in (
        "StaticFeature_FAQ",
        "StaticFeature_Contribution",
        "DynamicFeature_TopPicks",
    ):
        for j in html.find_all(attrs={"cel_widget_id": i}):
            j.decompose()
    for c in ("right-rail-more-to-explore", "ipc-responsive-button__text"):
        for i in html.find_all(class_=c):
            i.decompose()
    for a in html.find_all("a"):
        a.replace_with(a.text)

    for a in html.find_all("img"):
        a.decompose()
    for a in html.find_all("svg"):
        a.decompose()
    for a in html.find_all("button"):
        a.decompose()
    for a in html.find_all(attrs={"id": "ProUpsellLink"}):
        a.decompose()
    # do not prettfy
    stream = io.BytesIO(html.encode())
    return str(
        get_markitdown().convert_stream(
            stream,
            stream_info=StreamInfo(
                extension="html",
                mimetype="text/html",
            ),
        )
    )


@dataclass
class Url:
    url: str


@dataclass
class DNT:
    pass


@dataclass
class NetworkError:
    pass


def is_movie(i):
    return "podcast" not in i.text.lower()


async def find_imdb_page(session, year, name):
    async with session.get(
        "https://api.search.brave.com/res/v1/web/search",
        params={"q": f"site:imdb.com {name} ({year}) movie"},
        headers={
            "Accept": "application/json",
            "Accept-Encoding": "gzip",
            "X-Subscription-Token": os.environ["BRAVE_KEY"],
        },
    ) as req:
        res = await req.json()
        try:
            for res in res["web"]["results"]:
                url: str = res["url"]
                if 'title' in url:
                    return url, res["title"]
            assert False
        except KeyError: 
            try:
                if res['type'] == 'ErrorResponse' and res['error']['code'] == 'RATE_LIMITED':
                    time.sleep(1)
                    return await find_imdb_page(session, year, name)
            except:
                print(res)
                raise



def movie_fn(movie: int) -> str:
    return f"{movie:07d}"
