#!/usr/bin/env python
# coding: utf-8

# In[ ]:
import openai
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import GPy
import re
import copy
import math
import concurrent.futures

def compute_RegSq(Y, R_function_class, R_predicted):
    # Cumulative squared error of the online regression oracle
    online_error = np.sum((R_predicted - Y) ** 2)

    # Compute the cumulative squared error for each function in the function class
    function_errors = np.sum((R_function_class - Y) ** 2, axis=1)

    # Find the cumulative squared error of the optimal regressor (minimum value)
    optimal_error = np.min(function_errors)

    # Compute regret bound
    RegSq = online_error - optimal_error
    return RegSq

def compute_gamma(Y, R_predicted, K, itr, delta):
    
    T = itr + 1 
    # RegSq = compute_RegSq(Y, R_function_class, R_predicted)
    RegSq = 0
    gamma = math.sqrt((K * T) / (RegSq + math.log(2 / (delta))))
    
    return gamma

def choose_next_arm(Y, R_predicted, K, itr, delta, regret_predicted):
    
    # Compute parameters
    gamma = 5 * compute_gamma(Y, R_predicted, K, itr, delta)
    mu = K
    # Find the action b_t with the minimum predicted regret
    b_t_idx = np.argmin(regret_predicted)
    # Initialize probability distribution
    probabilities = np.zeros_like(regret_predicted)
    # Compute probability for each action
    for a_idx in range(len(regret_predicted)):
        if a_idx != b_t_idx:
            probabilities[a_idx] = 1 / (mu + gamma * (regret_predicted[a_idx] - regret_predicted[b_t_idx]))
    # Compute the probability for the optimal action b_t, ensuring the sum equals 1
    probabilities[b_t_idx] = 1 - np.sum(probabilities)
    # Prevent probabilities from being negative or exceeding floating-point precision limits (numerical stability handling)
    # print('gamma: ', gamma)
    # print('probabilities: ', probabilities)
    probabilities = np.maximum(probabilities, 0)
    probabilities /= np.sum(probabilities)
    # Sample an action based on the probability distribution
    x_t_idx = np.random.choice(len(regret_predicted), p=probabilities)
    
    return x_t_idx

# Optimization iteration function
def optimize_experiment(X, Y, K, num_iter, xx, f, noise_intensity, delta):

    rewards_all = []
    R_predicted = []  # All predicted regrets in the iteration
    
    for itr in np.arange(num_iter):
        regret_predicted = []  # Predicted regret values
        for j in range(xx.shape[0]):  # This loop updates the acquisition function
            prompt = make_prompt_experiment(X, Y, xx[j, :])

            response = get_chatgpt_response_variable_t(prompt, itr, model=gpt_model)  # Variable temperature
            
            msg = response.choices[0].message.content  # Extract content from the model's first response
            msg = ''.join(filter(lambda x: x.isdigit() or x == '.', msg))  # Keep only digits and decimal points
            msg = remove_extra_dots(msg)  # Remove extra decimal points
            
            regret_predicted.append(np.asarray(msg, dtype=float))  # Convert LLM's predicted result to float and store it
        
        x_t_idx = choose_next_arm(Y, R_predicted, K, itr, delta, regret_predicted)  # Select the next arm
        
        R_predicted.append(regret_predicted[x_t_idx])  # Store the regret prediction of the next arm
        
        x_t = xx[x_t_idx, :]  # Corresponding features
        y_t = f[x_t_idx] + np.random.normal(0, np.sqrt(noise_intensity))  # Simulate sampling process
        y_t = round(y_t, 4)
    
        rewards_all.append(f[x_t_idx])  # Store the true reward of the next sampling point (without noise)
    
        print("Iteration: ", itr)
        # print("x_t: ", x_t)
        # print("y_t: ", y_t)
        X = np.append(X, np.expand_dims(x_t, axis=0), axis=0)
        Y = np.append(Y, y_t)
        
    return rewards_all


def optimize_randomsearch(num_iter, xx, f, noise_intensity, K):

    rewards_all = []
    
    for itr in np.arange(num_iter):
        
        x_t_idx = np.random.choice(list(range(K)))  # Randomly select the index of the next arm
        x_t = xx[x_t_idx, :]  # Corresponding features
        y_t = f[x_t_idx] + np.random.normal(0, np.sqrt(noise_intensity))  # Simulate sampling process
        y_t = round(y_t, 4)
    
        rewards_all.append(f[x_t_idx])  # Store the true reward of the next sampling point (without noise)
    
        print("Iteration: ", itr)
        # print("x_t: ", x_t)
        # print("y_t: ", y_t)
        # print("reward: ", rewards_all[-1])
        
    return rewards_all

