#!/usr/bin/env python3
"""
Generate an ICLR-style Related Work section and compile the PDF.
All paths are anonymized via ANON_PROJECT_ROOT.
"""

import re
import json
import tqdm  # type: ignore
import argparse
import requests  # type: ignore
import random
import os
from loguru import logger  # type: ignore
from concurrent.futures import ThreadPoolExecutor
from create_util import *
from sentence_transformers import SentenceTransformer, util

# ----------  Arg Parsing  ----------
parser_manager = ArgParser()
args = parser_manager.add_topic_args().add_model_args().parse()

topic: str = args.topic
topic_description: str = args.topic_description
model_name: str = args.model_name

# ----------  Anonymous Base Directory  ----------
BASE_DIR = os.environ.get("ANON_PROJECT_ROOT", "./anonymous_root")
WORK_DIR = os.path.join(BASE_DIR, topic, topic_description)
RELATED_WORK_TEX = os.path.join(WORK_DIR, "latex", "content", "related_work.tex")

# ----------  Skip if Already Generated  ----------
if os.path.exists(RELATED_WORK_TEX) and os.path.getsize(RELATED_WORK_TEX) > 100:
    logger.info("Related Work already generated; skipping.")
    exit()

# ----------  Semantic Similarity Helper  ----------
def get_top_n_similar_items(reference_data, topic_desc, n=40):
    """
    Return top-N items whose titles are most similar to topic_desc
    using Sentence-Transformer cosine similarity.
    """
    if not reference_data or not topic_desc:
        return []

    model = SentenceTransformer("all-MiniLM-L6-v2")
    topic_emb = model.encode(topic_desc, convert_to_tensor=True)

    valid_items = [it for it in reference_data if it.get("title")]
    titles = [it["title"] for it in valid_items]
    title_embs = model.encode(titles, convert_to_tensor=True)

    scores = util.cos_sim(topic_emb, title_embs)[0]
    for idx, item in enumerate(valid_items):
        item["similarity_score"] = scores[idx].item()

    valid_items.sort(key=lambda x: x["similarity_score"], reverse=True)
    return valid_items[:n]

# ----------  Load Corpus  ----------
META_JSON = os.path.join(BASE_DIR, topic, "paper_info.json")
with open(META_JSON, "r", encoding="utf-8") as f:
    reference = json.load(f)

top_40 = get_top_n_similar_items(reference, topic_description, n=40)
reference_datas = [
    {"citation_key": it["citation_key"], "title": it["title"], "summary": it["summary"]}
    for it in top_40
]

# ----------  Prompt for Related Work  ----------
prompt = f"""
Write the Related Work section in ICLR style.

Guidelines:
- Focus on academic siblings: alternative attempts to solve the same problem.
- Compare and contrast assumptions / methods vs. ours.
- 2–4 paragraphs; each starts with a bold category summary.
- Use \\cite, \\citet, \\citep correctly; avoid bare "et al."
- Output only LaTeX code inside ```latex```.

#### Reference Corpus ####
{reference_datas}
"""

# ----------  Generate Related Work  ----------
latex_output = ""
while len(latex_output) < 10:
    model = ChatAgent(model_name)
    latex_output = model.chat_with_latex_retry(prompt)

os.makedirs(os.path.dirname(RELATED_WORK_TEX), exist_ok=True)
with open(RELATED_WORK_TEX, "w", encoding="utf-8") as f:
    f.write(latex_output)

logger.info(f"Related Work written to {RELATED_WORK_TEX}")

# ----------  Compile PDF  ----------
from create_pdf import create_pdf
create_pdf(topic, topic_description)