import glob
import os
import re

# html_dir = "results/gemini-2.0-flash-001/p_run-reddit-2025-03-23-164656/"
# html_dir = "experiments/gemini-2.0-flash-001/webk"
html_dir = "experiments/debugging_gemini/16-Apr"

# ADD stop [ERROR: Invalid operation: The `response.text` quick accessor requires the response to contain a valid `Part`, but none were returned. The candidate's [finish_reason](http://ai.google.dev/api/generate-content#finishreason) is 8.]


def check_html_files():
    # regular expression to capture the username in the <strong class="no-mobile"> tag.
    # This regex assumes the username is the text inside such a tag.
    username_pattern = re.compile(r'<strong\s+class="no-mobile">\s*([^<>]+?)\s*</strong>')

    # iterate over all .html files in the directory
    for filepath in glob.glob(os.path.join(html_dir, "**/render_[0-9]*.html"), recursive=True):
        try:
            with open(filepath, "r", encoding="utf-8") as f:
                content = f.read()
        except Exception as e:
            print(f"Error reading {filepath}: {e}")
            continue

        # Condition 1: Look for any "Log in" occurrence.
        has_login_field = "Log in" in content

        # Condition 2: Check username (if available) is not "MarvelsGrantMan136"
        usernames = username_pattern.findall(content)
        # If any username is found and it does not match, then we mark this file.
        username_mismatch = any(user.strip() != "MarvelsGrantMan136" for user in usernames) if usernames else False

        # Check csrf token
        csrf_token_pattern = re.compile(r'name="csrf_token" value="([^"]+)"')
        csrf_token = csrf_token_pattern.search(content)
        if csrf_token:
            print(f"CSRF token found in {filepath}: {csrf_token.group(1)}")

        if has_login_field:
            print(f"Login field found in {filepath}")

        if username_mismatch:
            print(f"Username mismatch in {filepath}")


if __name__ == "__main__":
    check_html_files()
