import os 
import base64
import time
import json
import logging
import random
import threading
import uuid
import shutil
from selenium import webdriver
from selenium.webdriver.common.by import By
import openai
from webagent.azure_gpt_4o import Openai

# API retry configuration
API_MAX_RETRY = 10
API_RETRY_SLEEP = 5

GPT5_API = [{
}]

oai_client = Openai(
    apis=GPT5_API
)

judge_single_most_strict_revised = """
You are an expert evaluator tasked with assessing the quality of an HTML webpage generated by a large language model. You will be given a screenshot of the rendered HTML webpage and the original user instructions.

You will be provided with:
    - The general topic of the generated webpage: {topic}
    - The original user instruction: {user_instruction}
    - Image A, representing the output of the model to evaluate

Your objective is to assign precise, rigorous scores, using the full 0–100 range. Only award high scores for webpages that are absolutely flawless, meeting all design and functional expectations. Penalize harshly for even the smallest imperfections—there is zero tolerance for errors.

Key Evaluation Areas:

1. Instructional Alignment (20 points)  
   Evaluate how closely the webpage follows the user's instructions. Only this in aspect, your criteria can be relatively low, since we expect some flexibility in interpretation and should more pay more attention in another two aspects (Visual Design and Aesthetics and Structural Coherence and Usability below).  

   Score levels and their explanations: 
   - Good alignment (10–20): The webpage almost matches the user’s instructions. 
   - Severe misalignment (0–9): The page fails to meet basic requirements. Major elements are missing or misrepresented.

2. Visual Design and Aesthetics (50 points)  
   Assess the overall professionalism and polish of the design. Only award high marks for designs that look flawless, balanced, and intentional.  

   Some golden rules you should obey when scoring: 
    - Always cherish **detailed, refined, and innovative design**. A highly refined design is always better than a plain one, which means we value pages with **highly rich design elements** more than simple and plain designs. This includes an exquisite transparent dynamic background, elements or special effects floating in the background, gradient color text, rich yet beautiful color matching, and so on.
    - NO PLACEHOLDERS! Always cherish **real images and expressive (real or abstract) icons**, instead of placeholders. A website with rich, real, and appropriate images or icons should score higher(85 or above), while a website with placeholders or broken images should score below 50. Abstract modern icon are also preferable, but they should be well-designed and are NOT placeholders. 
    - Simplicity is not a lack of content. A simple design can still be rich and engaging if it uses space, color, and typography effectively. 
    - The overall impression is important. Make sure the webpage has NO broken/partially visible words or elements. NO partially loaded elements.

    Score levels and their explanations: 
   - Perfect design (40-50): The design is exceptionally professional, with a well-executed color palette, typography, and spacing. The page has a polished and intentional feel.  
   - Minor flaws (20-39): The design is good, but there are small issues (e.g., slight inconsistency in font sizes or spacing). These should still impact the score significantly.  
   - Significant flaws (10–19): The design has major issues (e.g., poor readability, awkward layout, or jarring color choices).  
   - Unacceptable design (0–9): The page is unprofessional, with severe flaws such as overlapping text, unreadable fonts, or broken layouts / images.

3. Structural Coherence and Usability (30 points)  
   The page must have a logical and intuitive structure. Even the smallest structural mistake (misalignment, broken flow, or inconsistent layout) will severely affect the score.  

   Key scoring rules:
    - Overall impression comes first. This stresses the importance of adopting a modern, concise, refined framework. Encourage websites to use modern, beautiful design frameworks instead of simple, mediocre designs. Webpages with appropriate use framework can score above 85, while those with poor or no framework should score below 50. 
    - Highlight the integrity of the overall structure. Check carefully whether the page has a complete structural layout, with no missing elements or broken sections. If the page has any broken sections, it should score below 50. 

   - Flawless structure (20–30): The page has a perfect structure: well-organized, logical flow, and easy navigation.  
   - Minor structural issues (15–19): The structure is good, but there are small usability issues (e.g., slightly misaligned sections or awkward navigation).  
   - Major structural problems (10–14): The page has significant usability flaws, such as broken layouts or confusing content organization.  
   - Unusable structure (0–9): The page has severe structural issues, making it difficult to use or navigate effectively.

Fine-Grained Scoring Guidelines:

- Strict threshold for high scores: Only give scores above 90 if the webpage is absolutely flawless. If there is even a minor issue (e.g., a single broken element, misalignment, or poorly chosen font), do not award high marks. Scores 95+ should be reserved for near perfection.
- Minor flaws are heavily penalized: If the webpage has any noticeable flaw (such as text overlapping an image, improper spacing, or a lack of balance), this will result in low overall score!  (e.g., 10–30)
- Zero tolerance for bad design: If the webpage looks unprofessional (e.g., excessive white space, unaligned content/text, unreadable text, or poor contrast), the overall score should be 0-30!

Example Evaluation:

For a webpage with:
- Perfect alignment with instructions (everything is present and correct),
- Excellent visual design, but with slightly misaligned text,
- Clear structure with one misaligned image,

You might score:
- Instructional Alignment: 20/20 (perfect alignment with instructions),
- Visual Design: 35/50 (good design but minor flaw—misaligned text),
- Structural Coherence: 20/30 (minor misalignment of an image),
- Total Score: 75/100 (not good, but OK).

Final Output Format(alignment_score, aesthetic_score, structure_score are just the abbreviation of Instructional Alignment score, Visual Design and Aesthetics score, and Structural Coherence and Usability score):

{{
  "alignment_score": <score out of 20>,
  "aesthetics_score": <score out of 50>,
  "structure_score": <score out of 30>,
  "total_score": <sum out of 100>,
  "feedback": "<brief summary of strengths and weaknesses, with justification for the scores>"
}}

Strict Scoring Principles:
- Minor mistakes are penalized severely. A single misplaced element, broken layout, or poor design choice will dramatically affect the score.
- High scores (90+) should only be given for perfect webpages with no errors. If there is any imperfection, the score should drop significantly.
- No mercy for bad design. Webpages that are visually unappealing or hard to use must receive low scores (0–9) regardless of other factors.
"""

def get_response(messages, max_tokens=1024):
    for _ in range(API_MAX_RETRY):
        try:
            output = oai_client.gpt5_call(
                    conv=messages,
                    max_tokens=max_tokens,
                    reasoning_effort="low"
                    )
                    
            break
        except openai.RateLimitError as e:
            print(type(e), e)
            time.sleep(API_RETRY_SLEEP)
        except openai.BadRequestError as e:
            print(type(e), e)
            # break
        except KeyError:
            print(type(e), e)
                    # break

    return output

def encode_image(image_path):  
    if os.path.exists(image_path):  
        with open(image_path, "rb") as image_file:  
            # print("encode success!")
            result = base64.b64encode(image_file.read()).decode('utf-8')  
            # print(f"result:{result}", flush=True)
            return result
    else:  
        # Log the missing image path  
        logging.info(f"Image not found: {image_path}")  
        # print(f"Image not found: {image_path}", flush=True)
        # Default base64 string for a 1x1 pixel transparent PNG image  
        return ""

def judge_website_static_single(html, category, instruction, window_width=1920, window_height=1200):
    
    # Create unique identifiers for concurrent execution
    thread_id = threading.current_thread().ident
    unique_id = str(uuid.uuid4())[:8]
    timestamp_ns = int(time.time_ns())
    
    # Chrome options optimized for concurrency
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    # Enable GPU and WebGL support for headless mode
    options.add_argument("--enable-gpu")
    options.add_argument("--use-gl=swiftshader")  # Use software rendering for WebGL
    options.add_argument("--enable-webgl")
    options.add_argument("--enable-accelerated-2d-canvas")
    options.add_argument("--disable-gpu-sandbox")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins")
    options.add_argument("--disable-javascript-harmony-shipping")
    options.add_argument("--disable-background-timer-throttling")
    options.add_argument("--disable-backgrounding-occluded-windows")
    options.add_argument("--disable-renderer-backgrounding")
    options.add_argument("--disable-features=TranslateUI")
    options.add_argument("--disable-ipc-flooding-protection")
    # Additional WebGL and media support
    options.add_argument("--disable-web-security")
    options.add_argument("--allow-running-insecure-content")
    options.add_argument("--ignore-gpu-blacklist")
    options.add_argument("--enable-features=VaapiVideoDecoder")
    options.add_argument("--disable-features=VizDisplayCompositor")
    options.add_argument(
        "--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    )
    
    # Create unique base directory for this instance
    base_dir = f"./webagent/tmp/{timestamp_ns}_{thread_id}_{unique_id}"
    
    try:
        os.makedirs(base_dir, exist_ok=True)
        
        # Use unique user data directory to avoid conflicts
        user_data_dir = os.path.join(base_dir, "chrome_user_data")
        options.add_argument(f"--user-data-dir={user_data_dir}")
        
        # Add random remote debugging port to avoid conflicts
        debug_port = random.randint(9222, 29999)
        options.add_argument(f"--remote-debugging-port={debug_port}")
        
        html_tmp_file = os.path.abspath(os.path.join(base_dir, "tmp.html"))
        
        with open(html_tmp_file, "w", encoding="utf-8") as f:
            f.write(html)
        
        # Create webdriver and take screenshot
        driver = None
        try:
            driver = webdriver.Chrome(options=options)
            driver.set_window_size(window_width, window_height)
            driver.get(f"file://{html_tmp_file}")
            
            # Wait for page to load and handle any alerts
            time.sleep(1.0)
            
            # Handle any alert dialogs (like WebGL not supported)
            try:
                alert = driver.switch_to.alert
                alert.accept()  # Accept the alert to dismiss it
                print("Alert dismissed:", alert.text)
                time.sleep(0.5)  # Give a moment after dismissing alert
            except:
                pass  # No alert present, continue normally
            
            # Take screenshot
            img_path = os.path.join(base_dir, 'screenshot.png')
            driver.save_screenshot(img_path)
            
            # Encode image to base64
            img_base64 = encode_image(img_path)
            
        finally:
            if driver:
                try:
                    driver.quit()
                except:
                    pass
        
        # Prepare messages for GPT evaluation
        messages = [
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": [
                {"type": "text", "text": f"{judge_single_most_strict_revised.format(topic=category, user_instruction=instruction)}. \nThe Image of the answer of the assistant you need to judge is: "},
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}}
            ]}
        ]

        response = get_response(messages)
        response = response.strip()
        if response.startswith('```json'):
            response = response.replace('```json', '').replace('```', '').strip()
        print(response)
        try:
            result = json.loads(response)
            return int(result['total_score'])
        except:
            print("❗warning: failed to parse the response, returning 50...")
            return 50
            
    except Exception as e:
        logging.error(f"Error in judge_website_static_single: {e}")
        return 50
    finally:
        # Clean up temporary files
        try:
            if os.path.exists(base_dir):
                shutil.rmtree(base_dir, ignore_errors=True)
        except:
            pass





def judge_website_batch(html_list, instructions, API_INFO=None, timeout_seconds=300, window_width=1920, window_height=1200):
    """
    Batch version of judge_website_static_single that reuses a single browser instance.
    
    Args:
        html_list: List of HTML strings to evaluate
        instructions: List of instruction strings for evaluation (should match length of html_list)
        API_INFO: API configuration (not used in static scoring but kept for compatibility)
        timeout_seconds: Timeout for batch processing (not implemented yet but kept for compatibility)
        window_width: Browser window width
        window_height: Browser window height
    
    Returns:
        List of aesthetic scores (float) for each HTML, same order as inputs
    """
    if len(html_list) != len(instructions):
        raise ValueError("html_list and instructions must have the same length")
    
    # Create unique identifiers for concurrent execution
    thread_id = threading.current_thread().ident
    unique_id = str(uuid.uuid4())[:8]
    timestamp_ns = int(time.time_ns())
    
    # Chrome options optimized for concurrency
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    # Enable GPU and WebGL support for headless mode
    options.add_argument("--enable-gpu")
    options.add_argument("--use-gl=swiftshader")  # Use software rendering for WebGL
    options.add_argument("--enable-webgl")
    options.add_argument("--enable-accelerated-2d-canvas")
    options.add_argument("--disable-gpu-sandbox")
    options.add_argument("--disable-extensions")
    options.add_argument("--disable-plugins")
    options.add_argument("--disable-javascript-harmony-shipping")
    options.add_argument("--disable-background-timer-throttling")
    options.add_argument("--disable-backgrounding-occluded-windows")
    options.add_argument("--disable-renderer-backgrounding")
    options.add_argument("--disable-features=TranslateUI")
    options.add_argument("--disable-ipc-flooding-protection")
    # Additional WebGL and media support
    options.add_argument("--disable-web-security")
    options.add_argument("--allow-running-insecure-content")
    options.add_argument("--ignore-gpu-blacklist")
    options.add_argument("--enable-features=VaapiVideoDecoder")
    options.add_argument("--disable-features=VizDisplayCompositor")
    options.add_argument(
        "--user-agent=Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    )
    
    # Create unique base directory for this instance
    base_dir = f"./webagent/tmp/{timestamp_ns}_{thread_id}_{unique_id}"
    results = []
    
    try:
        os.makedirs(base_dir, exist_ok=True)
        
        # Use unique user data directory to avoid conflicts
        user_data_dir = os.path.join(base_dir, "chrome_user_data")
        options.add_argument(f"--user-data-dir={user_data_dir}")
        
        # Add random remote debugging port to avoid conflicts
        debug_port = random.randint(9222, 29999)
        options.add_argument(f"--remote-debugging-port={debug_port}")
        
        # Create single webdriver instance for batch processing
        driver = None
        try:
            driver = webdriver.Chrome(options=options)
            driver.set_window_size(window_width, window_height)
            
            # Process each HTML in sequence
            for i, (html, instruction) in enumerate(zip(html_list, instructions)):
                try:
                    # Skip empty HTML (execution failures)
                    if not html.strip():
                        results.append((50, 0.5))  # Default scores for failed execution
                        continue
                    
                    # Create temporary HTML file for this item
                    html_tmp_file = os.path.abspath(os.path.join(base_dir, f"tmp_{i}.html"))
                    
                    with open(html_tmp_file, "w", encoding="utf-8") as f:
                        f.write(html)
                    
                    # Load HTML in browser
                    driver.get(f"file://{html_tmp_file}")
                    
                    # Wait for page to load and handle any alerts
                    time.sleep(1.0)
                    
                    # Handle any alert dialogs (like WebGL not supported)
                    try:
                        alert = driver.switch_to.alert
                        alert.accept()  # Accept the alert to dismiss it
                        print(f"Alert dismissed for HTML {i}: {alert.text}")
                        time.sleep(0.5)  # Give a moment after dismissing alert
                    except:
                        pass  # No alert present, continue normally
                    
                    # Take screenshot
                    img_path = os.path.join(base_dir, f'screenshot_{i}.png')
                    driver.save_screenshot(img_path)
                    
                    # Encode image to base64
                    img_base64 = encode_image(img_path)
                    
                    # Prepare messages for GPT evaluation
                    messages = [
                        {"role": "system", "content": "You are a helpful assistant."},
                        {"role": "user", "content": [
                            {"type": "text", "text": f"{instruction}. \nThe Image of the answer of the assistant you need to judge is: "},
                            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_base64}"}}
                        ]}
                    ]

                    # Get response from GPT
                    response = get_response(messages)
                    try:
                        result = json.loads(response)
                        aesthetic_score = result['total_score']
                    except:
                        aesthetic_score = 50  # Default score on parsing failure
                    
                    # For static scoring, agent_score is set to a default value
                    # agent_score = 0.5  # Default agent score for static evaluation
                    
                    results.append(aesthetic_score)
                    
                    print(f"✅ Processed HTML {i+1}/{len(html_list)}: aesthetic_score={aesthetic_score}")
                    
                except Exception as e:
                    logging.error(f"Error processing HTML {i}: {e}")
                    results.append(50)  # Default scores on error
                    
        finally:
            if driver:
                try:
                    driver.quit()
                except:
                    pass
                    
    except Exception as e:
        logging.error(f"Error in judge_website_batch: {e}")
        # Return default scores for all items on critical error
        results = [50] * len(html_list)
        
    finally:
        # Clean up temporary files
        try:
            if os.path.exists(base_dir):
                shutil.rmtree(base_dir, ignore_errors=True)
        except:
            pass
    
    return results


def judge_website_static(html, instruction, category="website"):
    """
    Wrapper function for compatibility with existing code.
    This function provides the same interface as expected by other modules.
    
    Args:
        html: HTML string to evaluate
        instruction: Instruction string for evaluation
        category: Category of the website (default: "website")
    
    Returns:
        Aesthetic score (float)
    """
    return judge_website_static_single(html, category, instruction)