import hashlib
import json
import os
import re
import shutil
import sys
import time
import traceback
import urllib.parse
from contextlib import contextmanager
from typing import Literal

import requests
import logging
logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)


def get_basename_from_url(path_or_url: str) -> str:
    if re.match(r"^[A-Za-z]:\\", path_or_url):
        # "C:\\a\\b\\c" -> "C:/a/b/c"
        path_or_url = path_or_url.replace("\\", "/")

    # "/mnt/a/b/c" -> "c"
    # "https://github.com/here?k=v" -> "here"
    # "https://github.com/" -> ""
    basename = urllib.parse.urlparse(path_or_url).path
    basename = os.path.basename(basename)
    basename = urllib.parse.unquote(basename)
    basename = basename.strip()

    # "https://github.com/" -> "" -> "github.com"
    if not basename:
        basename = [x.strip() for x in path_or_url.split("/") if x.strip()][-1]

    return basename


def get_file_type(path: str):
    f_type = get_basename_from_url(path).split(".")[-1].lower()
    if f_type in [
        "pdf",
        "docx",
        "doc",
        "pptx",
        "txt",
        "html",
        "csv",
        "tsv",
        "xlsx",
        "xls",
    ]:
        # Specially supported file types
        return f_type

def hash_sha256(text: str) -> str:
    hash_object = hashlib.sha256(text.encode())
    key = hash_object.hexdigest()
    return key

def is_http_url(path_or_url: str) -> bool:
    if path_or_url.startswith("https://") or path_or_url.startswith("http://"):
        return True
    return False




def print_traceback(is_error: bool = True):
    tb = "".join(traceback.format_exception(*sys.exc_info(), limit=3))
    if is_error:
        logger.error(tb)
    else:
        logger.warning(tb)

def read_text_from_file(path: str) -> str:
    try:
        with open(path, encoding="utf-8") as file:
            file_content = file.read()
    except UnicodeDecodeError:
        print_traceback(is_error=False)
        from charset_normalizer import from_path

        results = from_path(path)
        file_content = str(results.best())
    return file_content





def sanitize_windows_file_path(file_path: str) -> str:
    # For Linux and macOS.
    if os.path.exists(file_path):
        return file_path

    # For native Windows, drop the leading '/' in '/C:/'
    win_path = file_path
    if win_path.startswith("/"):
        win_path = win_path[1:]
    if os.path.exists(win_path):
        return win_path

    # For Windows + WSL.
    if re.match(r"^[A-Za-z]:/", win_path):
        wsl_path = f"/mnt/{win_path[0].lower()}/{win_path[3:]}"
        if os.path.exists(wsl_path):
            return wsl_path

    # For native Windows, replace / with \.
    win_path = win_path.replace("/", "\\")
    if os.path.exists(win_path):
        return win_path

    return file_path

def sanitize_chrome_file_path(file_path: str) -> str:
    if os.path.exists(file_path):
        return file_path

    # Dealing with "file:///...":
    new_path = urllib.parse.urlparse(file_path)
    new_path = urllib.parse.unquote(new_path.path)
    new_path = sanitize_windows_file_path(new_path)
    if os.path.exists(new_path):
        return new_path

    return sanitize_windows_file_path(file_path)




def save_url_to_local_work_dir(url: str, save_dir: str, save_filename: str = "") -> str:
    if not save_filename:
        save_filename = get_basename_from_url(url)
    new_path = os.path.join(save_dir, save_filename)
    if os.path.exists(new_path):
        os.remove(new_path)
    logger.info(f"Downloading {url} to {new_path}...")
    start_time = time.time()
    if not is_http_url(url):
        url = sanitize_chrome_file_path(url)
        shutil.copy(url, new_path)
    else:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            with open(new_path, "wb") as file:
                file.write(response.content)
        else:
            raise ValueError("Can not download this file. Please check your network or the file link.")
    end_time = time.time()
    logger.info(f"Finished downloading {url} to {new_path}. Time spent: {end_time - start_time} seconds.")
    return new_path