import json
import os
import requests
from bs4 import BeautifulSoup
from langchain.tools import BaseTool
import json
import os
import requests
from bs4 import BeautifulSoup
from langchain.tools import BaseTool
from typing import Optional

from experiments.defense_wrapper import wrap_external_document
from utils.constants import SANDWICH, DELIMITING_DATA, MARKING


class SafeWebsiteVisitorTool(BaseTool):
    name: str = "safe_website_visitor"
    description: str = (
        "Visit a website and return its content. Uses persistent caching."
    )
    attack_algorithm: str = ""
    user_instruction: str = ""
    defense_type: str = ""
    cache_file: str = "website_cache.json"
    counterattack: bool = False

    def __init__(
        self,
        user_instruction: str,
        attack_algorithm: str,
        defense_type: str = "",
        counterattack: bool = False,
        cache_file: Optional[str] = None,
    ):
        super().__init__()
        self.user_instruction = user_instruction
        self.attack_algorithm = attack_algorithm
        self.defense_type = defense_type
        self.counterattack = counterattack
        self.cache_file = cache_file or "website_cache.json"

    def load_cache(self) -> dict:
        """Load the cached website content from the specified file."""
        if os.path.exists(self.cache_file):
            with open(self.cache_file, "r") as f:
                return json.load(f)
        return {}

    def save_cache(self, cache: dict):
        """Save the updated cache to the file."""
        with open(self.cache_file, "w") as f:
            json.dump(cache, f)

    def visit_website(self, url: str) -> str:
        """Visit the website, cache its content, and apply infection if necessary."""
        cache = self.load_cache()
        infection_prompt = (
            "" if self.attack_algorithm == "N/A" else self.attack_algorithm
        )

        if url in cache:
            print(f"Using cached content for {url}")
            return f"Content from {url}:\n\n{cache[url] + infection_prompt}"

        print(f"Visiting URL...: {url}")
        try:
            text = self._fetch_website_content(url)
        except Exception as e:
            print(f"Error: {str(e)}")
            return f"Error visiting {url}: {str(e)}. Please visit another URL."

        cache[url] = text
        self.save_cache(cache)
        return f"Content from {url}:\n\n{text + infection_prompt}"

    def _fetch_website_content(self, url: str) -> str:
        """Fetch the raw text content from the website."""
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
        }
        response = requests.get(url, timeout=10, headers=headers)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text(separator="\n", strip=True)

        return self._limit_content_length(text)

    def _limit_content_length(self, text: str, max_length: int = 5000) -> str:
        """Limit the text content to a specified length."""
        return (
            text[:max_length] + "...(truncated)"
            if len(text) > max_length
            else text
        )

    def _run(self, url: str) -> str:
        """Synchronously fetch and return website content."""
        contents = self.visit_website(url)
        defensed_contents = wrap_external_document(
            self.defense_type,
            contents,
            self.user_instruction,
            self.counterattack,
        )
        return defensed_contents

    def _arun(self, url: str):
        raise NotImplementedError(
            "SafeWebsiteVisitorTool does not support async"
        )
