import os
import time
import json
import requests
import re
from openai import OpenAI, APIError, APIConnectionError
from openai.types.chat import ChatCompletion


MODEL_NAME = "gpt-4o-mini" # "deepseek-chat"


# WARNING, THIS SCRIPT DOES NOT WORK ON THE REUTERS (NEWS) PART OF THE GHOSTBUSTERS DATASET
# PLEASE LOOK AT THE SPECIFIC CONVERSION FILE FOR THAT PART OF THE DATASET

# # Function to directly use requests to debug API responses
# def direct_api_request(prompt, api_key, base_url="https://api.deepseek.com"):
#     headers = {
#         "Content-Type": "application/json",
#         "Authorization": f"Bearer {api_key}"
#     }
    
#     data = {
#         "model": "deepseek-chat",
#         "messages": [
#             {"role": "system", "content": "You are a helpful assistant."},
#             {"role": "user", "content": prompt}
#         ]
#     }
    
#     try:
#         response = requests.post(
#             f"{base_url}/v1/chat/completions",
#             headers=headers,
#             json=data,
#             timeout=90
#         )
        
#         # Print raw response for debugging
#         print(f"Status code: {response.status_code}")
#         print(f"Response headers: {response.headers}")
        
#         # Try to get response content
#         try:
#             response_text = response.text
#             print(f"Response text (first 200 chars): {response_text[:200]}...")
#         except Exception as e:
#             print(f"Error getting response text: {str(e)}")
        
#         # Try to parse as JSON
#         try:
#             json_data = response.json()
#             if 'choices' in json_data and len(json_data['choices']) > 0:
#                 return json_data['choices'][0]['message']['content']
#             else:
#                 print("Invalid response structure:", json_data)
#                 return None
#         except json.JSONDecodeError as e:
#             print(f"JSON decode error: {str(e)}")
#             return None
        
#     except requests.exceptions.RequestException as e:
#         print(f"Request error: {str(e)}")
#         return None

def prompt_model(prompt, api_key) -> str:
    max_retries = 3
    retry_delay = 2  # seconds
    
    # First try the OpenAI client
    for attempt in range(max_retries):
        try:
            client = OpenAI(api_key=api_key, base_url="https://api.deepseek.com")
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "system", "content": "You are a helpful assistant."},
                    {"role": "user", "content": prompt}
                ],
                timeout=30
            )
            return response.choices[0].message.content
        except (json.JSONDecodeError, APIError, APIConnectionError) as e:
            print(f"OpenAI client attempt {attempt+1} failed: {str(e)}")
            if attempt < max_retries - 1:
                print(f"Retrying with OpenAI client in {retry_delay} seconds...")
                time.sleep(retry_delay)
                retry_delay *= 2  # Exponential backoff
            else:
                print("All OpenAI client retry attempts failed. Trying direct requests...")
                break
    
    # If OpenAI client fails, try direct requests
    print("Switching to direct requests method...")
    result = direct_api_request(prompt, api_key)
    
    if result:
        return result
    else:
        raise Exception("Failed to get a valid response from the API")

def numerical_sort_key(filename):
    # Extract numbers from filename and convert to int for proper numerical sorting
    numbers = re.findall(r'\d+', filename)
    if numbers:
        return int(numbers[0])
    return filename

def main():
    # Check for API key
    if "DEEPSEEK_API_KEY" not in os.environ:
        print("Error: DEEPSEEK_API_KEY environment variable not set")
        return
    
    api_key = os.environ["DEEPSEEK_API_KEY"]
    MODEL_NAME = "gpt4o_edited_prompt"
    ai_written_text_directory = f'dataset_creation/ghostbuster-data/wp/{MODEL_NAME}'
    prompt_text_directory = 'dataset_creation/ghostbuster-data/wp/prompts'
    
    # Create output directory if it doesn't exist
    if not os.path.isdir(ai_written_text_directory):
        os.makedirs(ai_written_text_directory)
    
    # Get list of files that have already been processed
    processed_files = set(os.listdir(ai_written_text_directory))
    
    # Get list of all prompt files and sort them numerically
    all_files = os.listdir(prompt_text_directory)
    # Sort files numerically instead of alphabetically
    all_files = sorted(all_files, key=numerical_sort_key)
    
    for index, filename in enumerate(all_files):
        print(f"\nProcessing {index+1}: {filename}")
        
        # Skip if file has already been processed
        if filename in processed_files:
            print(f"Skipping {filename} - already processed")
            continue
        
        prompt_text_file = os.path.join(prompt_text_directory, filename)
        
        if not os.path.isfile(prompt_text_file):
            continue
        
        try:
            # Read the prompt file
            with open(prompt_text_file, 'r', encoding='utf-8') as f:
                prompt = f.read()
            
            # Generate AI response
            print(f"Generating response for {filename}...")
            ai_generated_text = prompt_model(prompt, api_key)
            
            if ai_generated_text:
                # Save the response
                ai_generated_text_file = os.path.join(ai_written_text_directory, filename)
                with open(ai_generated_text_file, "w", encoding='utf-8') as text_file:
                    text_file.write(ai_generated_text)
                
                print(f"Successfully processed {filename}")
            else:
                print(f"Failed to generate text for {filename}")
            
            # Add a delay to avoid overwhelming the API
            time.sleep(3)
            
        except Exception as e:
            print(f"Error processing {filename}: {str(e)}")
            continue

if __name__ == "__main__":
    main()