import numpy as np
import cvxpy as cp
import scipy
from scipy.stats import norm
import pickle

# Directed causal graph G - first 4 nodes are unobservable, last 4 are observable
node_names = [
    "Alcohol intake",
    "Dietary pattern",
    "Physical activity",
    "Smoking",
    "DM",
    "HPL",
    "HPT",
    "Obesity"
]

# intermediate matrix, values of 0 indicate no edges between those nodes
# positive values indicate average consensus values for those edges from the paper.
T = [
    [0,  0, 0, 0, 0.55, 0.57, 0.81, 0.82],
    [0,  0, 0, 0, 0.92, 0.92, 0.92, 0.93],
    [0,  0, 0, 0, 0.91, 0.91, 0.91, 0.91],
    [0,  0, 0, 0, 0.76, 0.67, 0.93, 0],
    [0,  0, 0, 0, 0,    0,    0,    0],
    [0,  0, 0, 0, 0,    0,    0,    0],
    [0,  0, 0, 0, 0,    0,    0,    0],
    [0,  0, 0, 0, 0,    0,    0,    0],
]

# we can now construct the weighted adjacency matrix A for G
# edges with weight 0, remain 0 (indicating no edge)
# edges with positive weight (\in [0.5,1]) mapped to the range [0,1]

A = [[np.round(max(0, (x-0.5)/0.5), 2) for x in row] for row in T]

# from the adjacency matrix, we will now construct the contribution matrix C
# since G is bipartite, C = A + I

C = np.array(A) + np.identity(8)


# function to compute beta, given h, std_info, alpha, delta:
def compute_beta(h, std_info, alpha, delta):

    # for each classifier h, we need to compute mu (mean of contribution vector z) and Sigma (covariance matrix of z)
    mu = C @ h
    # print(mu)

    cov_h_prim = np.identity(8)
    for i in range(4):
        cov_h_prim[i][i] = 0

    cov_h = (std_info**2)*cov_h_prim
    Sigma = C @ cov_h @ C.T
    Sigma_sqrt = scipy.linalg.sqrtm(Sigma)
    p = norm.ppf(delta)

    # optimization problem to compute optimal effort profile e
    e = cp.Variable(8)
    objective = cp.Minimize(cp.norm(e, 2))
    constraint = [-p*cp.norm(Sigma_sqrt @ e, 2) <= mu.T @ e - alpha ]

    # Create a new problem instance
    problem = cp.Problem(objective, constraint)
    problem.solve(solver=cp.SCS)

    # print(problem.status)
    # print(f"Results for delta = {delta}:")
    # print(f"Optimal effort: {e.value}")
    # print(f"Optimal objective value: {problem.value}\n")

    if problem.status in ["optimal", "optimal_inaccurate"]:
        e_good = e.value[:4]  # First 4 coordinates
        norm_good = np.linalg.norm(e_good)
        norm_whole = np.linalg.norm(e.value)
        beta = norm_good/norm_whole
    else:
        beta = 0

    return beta


# classifiers 'h' (weights normalized to 1) - we have 4 different types of classifier,
# with entire weight on each of the 4 observable features (last 4)

h_list_aux = [[0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0],
           [0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 1]]
h_list = [np.array(x) for x in h_list_aux]

compute_beta(h_list[0], 1, 10, 0.3)


# data generation
std_val = 0.1
b_std01 = []
delta_values = np.arange(0.01, 0.51, 0.01)
alpha_values = [1, 10]

for h in h_list:
  for a in alpha_values:
    row = []
    for delta in delta_values:
      b = compute_beta(h, std_val, a, delta)
      row.append(b)
    b_std01.append(row)

pickle.dump(b_std01, open('beta_std01.pickle', 'wb'))

