#!/usr/bin/env python
# coding: utf-8

# In[ ]:
import openai
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import math
import concurrent.futures
import copy
import GPy

# Generate features and theta
def generate_xx_theta(d, K):
    # Randomly generate theta (parameters for linear relationships)
    theta = np.random.randn(d)  # Sample a d-dimensional vector from a standard normal distribution

    # Randomly generate feature vectors for K arms (Gaussian distribution)
    xx = np.random.randn(K, d)  # K arms, each with a d-dimensional feature vector
    
    # Perform Min-Max normalization on xx (column-wise normalization)
    xx_min = xx.min(axis=0) 
    xx_max = xx.max(axis=0) 
    xx = (xx - xx_min) / (xx_max - xx_min)
    
    return xx, theta

# Compute reward
def compute_f(xx, x, theta, d):
    
    if f_type == 1:
        return compute_f_linear(xx, x, theta)
    
    elif f_type == 2:
        return compute_f_square(xx, x, theta)
    
    elif f_type == 3:
        return compute_f_sin(xx, x, theta)
    
    elif f_type == 4:
        return compute_f_GP(xx, x, d)

# Linear function
def compute_f_linear(xx, x, theta):
    
    f = np.dot(xx, theta)
    
    max_f = np.max(f)
    min_f = np.min(f)
    
    f_x = np.dot(x, theta)
    
    f_x = (f_x - min_f) / (max_f - min_f)
    
    if np.isclose(f_x, 0):
        f_x = 0
    elif np.isclose(f_x, 1):
        f_x = 1

    return f_x

# sin(2 * θ^T x)
def compute_f_sin(xx, x, theta):
    
    f = np.sin(2 * np.dot(xx, theta))
    
    max_f = np.max(f)
    min_f = np.min(f)

    f_x = math.sin(2 * np.dot(x, theta))
    
    f_x = (f_x - min_f) / (max_f - min_f)
    
    if np.isclose(f_x, 0):
        f_x = 0
    elif np.isclose(f_x, 1):
        f_x = 1

    return f_x

# (θ^T x)^2
def compute_f_square(xx, x, theta):
    
    f = np.square(np.dot(xx, theta))
    
    max_f = np.max(f)
    min_f = np.min(f)

    f_x = (np.dot(x, theta))**2
    
    f_x = (f_x - min_f) / (max_f - min_f)
    
    if np.isclose(f_x, 0):
        f_x = 0
    elif np.isclose(f_x, 1):
        f_x = 1

    return f_x

# Gaussian Process (GP)
def compute_f_GP(xx, x, d):
    
    kernel = GPy.kern.RBF(input_dim=d, lengthscale=1, variance=1)
    # Compute covariance matrix
    C = kernel.K(xx, xx)
    # Generate a sample from a multivariate normal distribution
    m = np.zeros((C.shape[0]))
    f = np.random.multivariate_normal(m, C, 1).reshape(-1, 1)
    
    max_f = np.max(f)
    min_f = np.min(f)
    
    f_x = f[np.argmax(np.all(xx == x, axis=1))]
    
    f_x = (f_x - min_f) / (max_f - min_f)
    # Convert f_x to a scalar
    f_x = f_x.item() if isinstance(f_x, np.ndarray) else f_x
    
    if np.isclose(f_x, 0):
        f_x = 0
    elif np.isclose(f_x, 1):
        f_x = 1

    return f_x

# Return duel result
def observe_duel(xx, x1, x2, theta, noise_intensity, d):
    prefer_prob = 1 / (1 + np.exp(-10 * (compute_f(xx, x1, theta, d) - compute_f(xx, x2, theta, d))))
    duel_result = np.random.binomial(n=1, p=max(0, min(1, prefer_prob + np.random.normal(0, noise_intensity))))
    return duel_result

