#!/usr/bin/env python3
"""
Sanitize citations in every *.tex file under latex/content/ and re-compile the PDF.
All paths are anonymized via ANON_PROJECT_ROOT.
"""

import re
import json
import tqdm  # type: ignore
import argparse
import requests
import random
import os
from loguru import logger
from concurrent.futures import ThreadPoolExecutor
from create_util import *
from create_cite import *

# ---------- Arg Parsing ----------
parser = argparse.ArgumentParser(description="Sanitize citations and re-compile PDF")
parser.add_argument(
    "--topics",
    default='["attack", "large reasoning model"]',
    help="JSON-like list of topic keywords.",
)
parser.add_argument(
    "--topic_description",
    default=["attack", "large reasoning model"],
    help="Short description of the sub-topic.",
)
args = parser.parse_args()

# ---------- Anonymous Base Directory ----------
BASE_DIR = os.environ.get("ANON_PROJECT_ROOT", "./anonymous_root")
folder_name = args.topics.strip()
topic_description = args.topic_description
WORK_DIR = os.path.join(BASE_DIR, folder_name, topic_description)
BIB_FILE = os.path.join(WORK_DIR, "latex", "iclr2025_conference.bib")

# ---------- Process every .tex file ----------
CONTENT_DIR = os.path.join(WORK_DIR, "latex", "content")
for fname in os.listdir(CONTENT_DIR):
    if not fname.endswith(".tex"):
        continue
    fpath = os.path.join(CONTENT_DIR, fname)
    with open(fpath, "r", encoding="utf-8") as f:
        content = f.read()

    # pull missing references into bib file
    get_cite_from_llm_and_arxiv(content, BIB_FILE)
    # remove malformed / duplicated cite commands
    delete_cite(content, fpath)

logger.info("All citations sanitized.")

# ---------- Re-compile PDF ----------
from create_pdf import create_pdf
create_pdf(folder_name, topic_description)