
import os
import json
import openai

class AggregationAgent:
    def __init__(self, extracted_dir, leaderboard_md, api_key, api_base, model="qwen2.5-14b-instruct"):
        self.extracted_dir = extracted_dir
        self.leaderboard_md = leaderboard_md
        self.api_key = api_key
        self.api_base = api_base
        self.model = model

    def load_all_jsons(self):
        jsons = []
        for file in sorted(os.listdir(self.extracted_dir)):
            if file.endswith('.json'):
                with open(os.path.join(self.extracted_dir, file), 'r', encoding='utf-8') as f:
                    try:
                        data = json.load(f)
                        jsons.append(data)
                    except Exception as e:
                        print(f"[WARN] Failed to load {file}: {e}")
        return jsons

    def build_user_prompt(self, jsons):
        prompt = (
            "Based on the following structured information of all papers, please automatically generate a Markdown format leaderboard table. "
            "The table header should be: Title | Metrics | Results | Model | Github | Experiment Settings.\n"
            "Each row represents one paper, including:\n"
            "- Title: paper title\n"
            "- Metrics: selected_table_metrics field, separated by commas\n"
            "- Results: selected_table_core_results field, key-value pairs, separated by commas\n"
            "- Model: selected_table_settings_model_size\n"
            "- Github: github field, use '-' if not available\n"
            "- Experiment Settings: merge selected_table_settings_training_strategy and selected_table_settings_hyperparameter_selection, wrap if too long\n"
            "Please automatically deduplicate, merge, and sort. The table should be concise and clear.\n"
            "Below is the structured information (JSON) of all papers:\n"
        )
        for i, js in enumerate(jsons):
            prompt += f"\n--- Paper {i+1} ---\n{json.dumps(js, ensure_ascii=False, indent=2)}\n"
        prompt += "\nPlease output only the Markdown table, do not output any other content."
        return prompt

    def call_llm(self, prompt):
        openai.api_key = self.api_key
        openai.api_base = self.api_base
        openai.requestssession = __import__("requests").Session()
        system_prompt = (
            "You are an AI assistant skilled in information aggregation and table generation. Please output a high-quality Markdown table according to the user's requirements."
        )
        response = openai.ChatCompletion.create(
            model=self.model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2,
            max_tokens=4096
        )
        return response['choices'][0]['message']['content']

    def generate_leaderboard(self):
        jsons = self.load_all_jsons()
        prompt = self.build_user_prompt(jsons)
        markdown = self.call_llm(prompt)
        with open(self.leaderboard_md, 'w', encoding='utf-8') as f:
            f.write(markdown)
        print(f"[INFO] Leaderboard generated: {self.leaderboard_md}")

if __name__ == "__main__":
    agent = AggregationAgent(
        extracted_dir="autoleaderboard/extracted_content",
        leaderboard_md="autoleaderboard/leaderboard.md",
        api_key="EMPTY",
        api_base="http://localhost:8000/v1"
    )
    agent.generate_leaderboard()