import json
import random
import re
import sys
import os
import pandas as pd
import traceback
import numpy as np

# Add the src directory to the Python path to allow importing tools
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from tools.emulators import (
    query_lat_and_lon,
    future_temperature,
    diy_greenhouse,
    location_summary,
    history_temperature,
    diy_aerosol,
    is_land_or_sea,
    diy_aerosol_mean,
    diff_diy_aerosol_mean,
    diy_greenhouse_summary
)

# Map tool names to their actual function implementations
TOOL_MAP = {
    "query_lat_and_lon": query_lat_and_lon,
    "future_temperature": future_temperature,
    "diy_greenhouse": diy_greenhouse,
    "location_summary": location_summary,
    "history_temperature": history_temperature,
    "diy_aerosol": diy_aerosol,
    "is_land_or_sea": is_land_or_sea,
    "diy_aerosol_mean": diy_aerosol_mean,
    "diff_diy_aerosol_mean": diff_diy_aerosol_mean,
    "diy_greenhouse_summary": diy_greenhouse_summary,
}

# --- Robust City Data Loading ---
CITY_DATA_DF = None
CITIES_LIST = []

def load_city_data(file_path):
    """
    Loads city data from the specified CSV file into a global DataFrame and list.
    This ensures that city selection and coordinate lookup use the exact same data source.
    """
    global CITY_DATA_DF, CITIES_LIST
    try:
        # Construct an absolute path to the data file to avoid relative path issues.
        base_dir = os.path.dirname(os.path.abspath(__file__))
        full_path = os.path.join(base_dir, file_path)
        
        if not os.path.exists(full_path):
            print(f"FATAL: City data file not found at {full_path}")
            CITY_DATA_DF = pd.DataFrame()
            CITIES_LIST = []
            return

        CITY_DATA_DF = pd.read_csv(full_path)
        # Normalize column names to lowercase for easier access.
        CITY_DATA_DF.columns = [x.lower() for x in CITY_DATA_DF.columns]
        
        # Ensure the 'city' column exists and drop rows where city name is missing.
        if 'city' in CITY_DATA_DF.columns:
            CITY_DATA_DF.dropna(subset=['city'], inplace=True)
            CITIES_LIST = CITY_DATA_DF['city'].unique().tolist()
            print(f"Successfully loaded {len(CITIES_LIST)} unique city names from {full_path}")
        else:
            print(f"FATAL: 'city' column not found in {full_path}")
            CITIES_LIST = []

    except Exception as e:
        print(f"FATAL: An error occurred while loading city data: {e}")
        CITY_DATA_DF = pd.DataFrame()
        CITIES_LIST = []

# Load city data at script startup.
load_city_data("tools/Climate_offline/data/worldcities.csv")

CLIMATE_SETTINGS = ["ssp126", "ssp245", "ssp370", "ssp585"]

class SafeDict(dict):
    """A dictionary that returns a placeholder string for missing keys."""
    def __missing__(self, key):
        return f'{{{key}}}'

def get_coords_for_city(city_name):
    """
    A robust, local function to get coordinates directly from the loaded DataFrame.
    This bypasses potential pathing or implementation issues in the external tool.
    """
    if CITY_DATA_DF is None or CITY_DATA_DF.empty:
        return None
    
    # Case-insensitive search for the city.
    city_info = CITY_DATA_DF[CITY_DATA_DF['city'].str.lower() == city_name.lower()]
    
    if not city_info.empty:
        # Return the coordinates for the first match found.
        return {
            "latitude": city_info.iloc[0]['lat'],
            "longitude": city_info.iloc[0]['lng']
        }
    return None

def generate_qa_pair(template):
    """
    Generates a single question-answer pair from a template by filling placeholders
    with random data and calling the actual simulators.
    """
    question_template = template['question_template'].replace('{{', '{').replace('}}', '}')
    answer_template = template['answer_template'].replace('{{', '{').replace('}}', '}')
    tools_required = template.get('tools_required', [])

    params = {}
    # Generate random data for all potential input placeholders
    if not CITIES_LIST:
        print("Skipping generation: The list of available cities is empty.")
        return None
    
    # Select a city, ensuring it doesn't contain special characters to avoid issues.
    attempts = 0
    while attempts < 1000:  # Safety break to prevent infinite loops
        city_name = random.choice(CITIES_LIST)
        # Check for hyphens, single quotes, and right single quotation marks
        if '-' not in city_name and "'" not in city_name and "’" not in city_name:
            params['city_name'] = city_name
            break
        attempts += 1
    else:  # This 'else' belongs to the 'while' loop
        print("Warning: Could not find a city name without special characters after 1000 attempts. Using the last one as fallback.")
        params['city_name'] = city_name  # Use the last picked one as a fallback
    
    # Generate other random parameters
    params['year'] = random.randint(2030, 2090)
    params['year_future'] = params['year']
    params['year_hist'] = random.randint(1980, 2014)
    params['setting'] = random.choice(CLIMATE_SETTINGS)
    params['delta_CO2'] = round(random.uniform(5, 50), 2)
    params['delta_CH4'] = round(random.uniform(5, 50), 2)
    params['delta_SO2'] = round(random.uniform(-20, 20), 2)
    params['delta_BC'] = round(random.uniform(-20, 20), 2)
    # For templates that have separate greenhouse gas deltas
    params['delta_CO2_greenhouse'] = round(random.uniform(5, 50), 2)
    params['delta_CH4_greenhouse'] = round(random.uniform(5, 50), 2)


    try:
        tool_results = {}

        # Get coordinates using the robust local function
        coords = get_coords_for_city(params['city_name'])
        if coords:
            # Convert numpy types to native Python types for JSON serialization
            params['latitude'] = float(coords['latitude'])
            params['longitude'] = float(coords['longitude'])
            print(f"DEBUG: Successfully found coordinates for '{params['city_name']}': {{'latitude': {params['latitude']}, 'longitude': {params['longitude']}}}")
        else:
            print(f"Skipping: Could not find coordinates for city '{params['city_name']}' in the loaded data.")
            return None

        # --- Tool Execution & Result Processing ---
        def to_float(value, tool_name):
            """Helper function to safely convert tool output to float."""
            if value is None:
                print(f"Skipping: Tool '{tool_name}' returned a None value.")
                return None
            if isinstance(value, (float, int, np.number)):
                return float(value)
            if isinstance(value, str):
                # Check if this is an error message first
                if "must be between" in value.lower() or "error" in value.lower() or "provided" in value.lower():
                    print(f"Skipping: Tool '{tool_name}' returned an error message: {value}")
                    return None
                
                numbers = re.findall(r"[-+]?\d*\.\d+|\d+", value)
                if numbers:
                    try:
                        return float(numbers[-1])
                    except (ValueError, TypeError):
                        pass
            print(f"Skipping: Tool '{tool_name}' returned a non-numeric value or could not parse one: {value}")
            return None

        # --- Tool Execution ---
        modify_points_str = f"[({params['longitude']}, {params['latitude']})]"
        params['modify_points'] = modify_points_str

        if 'history_temperature' in tools_required:
            year = params.get('year_hist') # Use standardized key
            if year is None:
                print(f"Skipping: 'year_hist' not found in params for history_temperature.")
                return None
            val, _ = TOOL_MAP['history_temperature'](longitude=params['longitude'], latitude=params['latitude'], year=year)
            temp = to_float(val, 'history_temperature')
            if temp is None: return None
            tool_results['hist_temp'] = round(temp, 2)
            print(f"DEBUG: history_temperature returned: {tool_results['hist_temp']}")

        if 'future_temperature' in tools_required:
            year = params.get('year_future', params.get('year'))
            val, _ = TOOL_MAP['future_temperature'](longitude=params['longitude'], latitude=params['latitude'], year=year, setting=params['setting'])
            temp = to_float(val, 'future_temperature')
            if temp is None: return None
            tool_results['future_temp'] = round(temp, 2)
            print(f"DEBUG: future_temperature returned: {tool_results['future_temp']}")

        if 'diy_aerosol' in tools_required:
            year = params.get('year_future', params['year'])
            temp_val, _ = TOOL_MAP['diy_aerosol'](
                longitude=params['longitude'], latitude=params['latitude'], year=year,
                setting=params['setting'], delta_SO2=params['delta_SO2']/100,
                delta_BC=params['delta_BC']/100, modify_points=modify_points_str
            )
            temp = to_float(temp_val, 'diy_aerosol')
            if temp is None: return None
            tool_results['aerosol_temp'] = round(temp, 2)
            print(f"DEBUG: diy_aerosol returned: {tool_results['aerosol_temp']}")

        if 'diff_diy_aerosol_mean' in tools_required:
            year = params.get('year_future', params['year'])
            _, temp_val = TOOL_MAP['diff_diy_aerosol_mean'](
                year=year, setting=params['setting'], delta_SO2=params['delta_SO2']/100,
                delta_BC=params['delta_BC']/100, modify_points=modify_points_str
            )
            temp = to_float(temp_val, 'diff_diy_aerosol_mean')
            if temp is None: return None
            tool_results['global_temp_diff_val'] = round(temp, 2)
            print(f"DEBUG: diff_diy_aerosol_mean returned: {tool_results['global_temp_diff_val']}")

        if 'diy_greenhouse' in tools_required:
            year = params.get('year_future', params.get('year'))
            delta_co2_g = params.get('delta_CO2_greenhouse', params['delta_CO2'])
            delta_ch4_g = params.get('delta_CH4_greenhouse', params['delta_CH4'])
            temp_val, _ = TOOL_MAP['diy_greenhouse'](
                longitude=params['longitude'], latitude=params['latitude'], year=year,
                setting=params['setting'], delta_CO2=delta_co2_g/100, delta_CH4=delta_ch4_g/100
            )
            temp = to_float(temp_val, 'diy_greenhouse')
            if temp is None: return None
            tool_results['greenhouse_temp'] = round(temp, 2)
            print(f"DEBUG: diy_greenhouse returned: {tool_results['greenhouse_temp']}")

        if 'is_land_or_sea' in tools_required:
            _, result_val = TOOL_MAP['is_land_or_sea'](lon=params['longitude'], lat=params['latitude'])
            tool_results['land_sea_str'] = "land" if result_val == 1 else "sea"
            print(f"DEBUG: is_land_or_sea returned: {tool_results['land_sea_str']}")
        
        if 'location_summary' in tools_required:
            _, summary_val = TOOL_MAP['location_summary'](longitude=params['longitude'], latitude=params['latitude'])
            tool_results['location_summary_result'] = str(summary_val)
            print(f"DEBUG: location_summary returned: {tool_results['location_summary_result']}")

        # --- Map results to placeholders and calculate derived values ---
        final_params = params.copy()
        
        # Check if we need to calculate diff_global_local and if future_temp is missing
        if 'diff_global_local' in answer_template and 'future_temp' not in tool_results and 'future_temperature' not in tools_required:
            print("DEBUG: Force-calling future_temperature to calculate diff_global_local.")
            year = params.get('year_future', params.get('year'))
            val, _ = TOOL_MAP['future_temperature'](longitude=params['longitude'], latitude=params['latitude'], year=year, setting=params['setting'])
            temp = to_float(val, 'future_temperature')
            if temp is not None:
                tool_results['future_temp'] = round(temp, 2)
                print(f"DEBUG: force-called future_temperature returned: {tool_results['future_temp']}")

        # Direct mappings from tool results to placeholders
        if 'hist_temp' in tool_results:
            final_params['hist_temp'] = tool_results['hist_temp']
        if 'future_temp' in tool_results:
            final_params['future_temp'] = tool_results['future_temp']
            final_params['local_temp_without_aerosols'] = tool_results['future_temp']
        if 'aerosol_temp' in tool_results:
            final_params['aerosol_temp'] = tool_results['aerosol_temp']
            final_params['local_temp_with_aerosols'] = tool_results['aerosol_temp']
            final_params['aerosol_adjusted_temp'] = tool_results['aerosol_temp']
            final_params['diy_aerosol_temp'] = tool_results['aerosol_temp']
            final_params['diy_aerosol_future_temp'] = tool_results['aerosol_temp']
            final_params['local_temp'] = tool_results['aerosol_temp']
            final_params['adjusted_temp'] = tool_results['aerosol_temp']
        if 'global_temp_diff_val' in tool_results:
            final_params['global_temp_diff'] = tool_results['global_temp_diff_val']
            final_params['aerosol_temp_diff'] = tool_results['global_temp_diff_val']
            # Add alias for bad templates that use aerosol_temp_change for global change
            final_params['aerosol_temp_change'] = tool_results['global_temp_diff_val']
        if 'greenhouse_temp' in tool_results:
            final_params['greenhouse_temp'] = tool_results['greenhouse_temp']
            final_params['greenhouse_adjusted_temp'] = tool_results['greenhouse_temp']
            final_params['new_temp'] = tool_results['greenhouse_temp']
            final_params['combined_temp_change'] = tool_results['greenhouse_temp']
        if 'land_sea_str' in tool_results:
            final_params['land_sea_current'] = tool_results['land_sea_str']
            final_params['land_sea'] = tool_results['land_sea_str']
            final_params['land_sea_result'] = tool_results['land_sea_str']

        # Derived values
        if 'local_temp_with_aerosols' in final_params and 'local_temp_without_aerosols' in final_params:
            diff = final_params['local_temp_with_aerosols'] - final_params['local_temp_without_aerosols']
            final_params['local_temp_diff'] = round(diff, 2)
            final_params['aerosol_temp_change'] = round(diff, 2)
            final_params['local_temp_difference'] = round(diff, 2)
            final_params['future_diy_temp_diff'] = round(diff, 2)
            # This will overwrite the global alias if local data is available, which is the correct behavior.
        if 'future_temp' in final_params and 'hist_temp' in final_params:
            diff = final_params['future_temp'] - final_params['hist_temp']
            final_params['hist_future_temp_diff'] = round(diff, 2)
            final_params['future_hist_diff'] = round(diff, 2)
            final_params['future_temp_change'] = round(diff, 2)
            # Add alias for bad templates
        if 'aerosol_temp' in final_params and 'hist_temp' in final_params:
            diff = final_params['aerosol_temp'] - final_params['hist_temp']
            final_params['modified_hist_future_temp_diff'] = round(diff, 2)
            final_params['modified_hist_diff'] = round(diff, 2)
        if 'greenhouse_temp' in final_params and 'hist_temp' in final_params:
            diff = final_params['greenhouse_temp'] - final_params['hist_temp']
            final_params['modified_hist_future_temp_diff'] = round(diff, 2)
            final_params['modified_hist_diff'] = round(diff, 2)
        if 'modified_hist_future_temp_diff' in final_params and 'hist_future_temp_diff' in final_params:
            diff = final_params['modified_hist_future_temp_diff'] - final_params['hist_future_temp_diff']
            final_params['net_temp_change'] = round(diff, 2)
        if 'greenhouse_temp' in final_params and 'aerosol_temp' in final_params:
            diff = final_params['greenhouse_temp'] - final_params['aerosol_temp']
            final_params['temp_diff_aerosol_greenhouse'] = round(diff, 2)
        if 'local_temp_diff' in final_params and 'global_temp_diff' in final_params:
            diff = final_params['local_temp_diff'] - final_params['global_temp_diff']
            final_params['diff_global_local'] = round(diff, 2)
        if 'greenhouse_temp' in final_params and 'aerosol_temp_diff' in final_params:
            diff = final_params['greenhouse_temp'] - final_params['aerosol_temp_diff']
            final_params['total_temp_diff'] = round(diff, 2)

    except Exception as e:
        print(f"An unexpected error occurred during tool execution for city '{params.get('city_name')}': {e}")
        traceback.print_exc()
        return None

    # Format the final question and answer
    final_question = question_template.format_map(SafeDict(final_params))
    final_answer = answer_template.format_map(SafeDict(final_params))

    # Check if any placeholders were missed
    if '{' in final_question or '}' in final_question or '{' in final_answer or '}' in final_answer:
        print(f"Skipping pair due to unfilled placeholders. Q: {final_question} A: {final_answer}")
        return None

    return {"question": final_question, "answer": final_answer}

def main():
    """
    Main function to load templates and generate question-answer pairs.
    """
    try:
        with open("topics.json", 'r', encoding='utf-8') as f:
            topics = json.load(f)
    except FileNotFoundError:
        print("Error: topics.json not found. Please run search_topics.py first.")
        return

    if not topics:
        print("Error: topics.json is empty.")
        return

    all_generated_pairs = []
    num_questions_per_topic = 5

    for i, template_to_use in enumerate(topics):
        generated_qa_pairs = []
        print(f"\n--- Generating {num_questions_per_topic} Q&A pairs for topic {i+1} ---")
        if not all(k in template_to_use for k in ['question_template', 'answer_template', 'tools_required']):
            print(f"Skipping invalid topic {i+1}: {template_to_use}")
            continue
            
        print(f"Using template: {template_to_use.get('question_template')}")
        
        attempts = 0
        max_attempts = num_questions_per_topic * 5
        while len(generated_qa_pairs) < num_questions_per_topic and attempts < max_attempts:
            if not CITIES_LIST:
                print("Stopping generation: City list is not available.")
                break
            qa_pair = generate_qa_pair(template_to_use)
            if qa_pair:
                generated_qa_pairs.append(qa_pair)
                print(f"Generated Q&A pair {len(generated_qa_pairs)}/{num_questions_per_topic} for topic {i+1}")
            attempts += 1
        all_generated_pairs.extend(generated_qa_pairs)

    output_file = "generated_questions.json"
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(all_generated_pairs, f, ensure_ascii=False, indent=4)

    print(f"\nSuccessfully generated and saved {len(all_generated_pairs)} Q&A pairs to {output_file}")

if __name__ == "__main__":
    main()