import json
import os
import sys
import gzip
import concurrent.futures as cfuts
import nbformat

# Add project root to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "../../")))

from tqdm import tqdm
from openai import OpenAI
from agent.datascience import DSagent
import globals
import logger
import utils

logger.init(log_filename="ds1000_run.log")


def extract_code_from_notebook(nb_path):
    if not os.path.exists(nb_path):
        return ""
    try:
        nb = nbformat.read(nb_path, as_version=4)
        code_cells = []
        for cell in nb.cells:
            if cell.cell_type == 'code':
                code_cells.append(cell.source)
        return "\n".join(code_cells)
    except Exception as e:
        print(f"Error reading notebook {nb_path}: {e}")
        return ""

def run_openai(model, temperature):
    # Configure summary settings
    if utils.module_config is None:
        utils.load_module_config()
    utils.module_config.enable_summary = True
    utils.module_config.summary_type = "answer"

    def run_func(prompt, id):
        # Use DSagent instead of OpenAI API directly
        agent = DSagent()
        run_id = str(id)
        
        # Ensure output directory for results exists relatively to where we expect DSagent to write
        # DSagent writes to output/results/{run_id} by default
        
        try:
            # Using complexity="simple" for code snippet generation
            # agent.act now returns (plan, summary) tuple
            _, summary = agent.act(requirement=prompt, complexity="simple")
            
            # Use summary as the code/answer
            code = summary

        except Exception as e:
            print(f"Error running agent for id {id}: {e}")
            code = ""
        
        return {'id': id, 'code': [code], 'metadata': ds1000[id]['metadata']}

        # DSagent might output markdown code blocks, but extract_code_from_notebook gets raw cell content.
        # If the content is wrapped in ```python ... ```, we might need to clean it?
        # Usually execute_nb_code puts executable code in cells.
        
        return {'id': id, 'code': [code], 'metadata': ds1000[id]['metadata']}

    ds1000_responses = []
    # Use max_workers=1 to avoid concurrency issues with global state in DSagent
    with cfuts.ThreadPoolExecutor(
        max_workers=1
    ) as executor:
        futs = []
        for p in ds1000:
            id = int(p['metadata']['problem_id'])
            prompt = p['prompt']
            # you have some options on how to actually execute the program here.
            futs.append(executor.submit(run_func, prompt, id))

        for f in tqdm(cfuts.as_completed(futs), total=len(futs)):
            result = f.result()
            ds1000_responses.append(result)
    ds1000_responses.sort(key=lambda x: int(x['id']))
    
    # Ensure data directory exists
    os.makedirs('data', exist_ok=True)
    
    with open(f'data/{model}-answers.jsonl', 'w') as f:
        for r in ds1000_responses:
            f.write(json.dumps(r) + '\n')


# Load ds1000 data
data_path = os.path.join(os.path.dirname(__file__), "ds1000.jsonl.gz")
# fallback to data/ if not found in current dir (legacy support)
if not os.path.exists(data_path):
    data_path = "data/ds1000.jsonl.gz"

if os.path.exists(data_path):
    ds1000 = [json.loads(l) for l in gzip.open(data_path, "rt").readlines()]
    ds1000 = []
    with gzip.open(data_path, "rt") as f:
        for i, line in enumerate(f):
            if i >= 5:
                break
            ds1000.append(json.loads(line))
else:
    print(f"Warning: Data file not found at {data_path}")
    ds1000 = []

run_openai('autods', 0)
