#!/usr/bin/env python
# coding: utf-8
"""
Synthetic data for cover problem
"""

import numpy as np

    
def cspo_genData(num_data, num_features, num_items, num_reqs=1, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for Cover. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        num_reqs (int): number of requirements
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)
    n, p, m = num_data, num_features, num_items

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(n, p))

    # # 2. Generate concentration matrix
    P = rnd.uniform(1, 10.0, size=(num_reqs, m))
    G = rnd.gamma(shape=P[:, None, :], scale=1.0, size=(num_reqs, n, m))
    G = np.transpose(G, (1, 0, 2))  # (n, num_reqs, m)
    # print(f'range before noise of G: {G.min()}, {G.max()} G shape: {G.shape}')

    # G = rnd.uniform(1, 10.0, size=(n,num_reqs, m))
    # Add Gaussian noise and clip
    if noise_width > 0:
        G += noise_width * rnd.normal(0, .5, size=(n, num_reqs, m))
    G = np.maximum(G, 0.01)


    # print(f'range of G: {G.min()}, {G.max()} G shape: {G.shape}')


    # random matrix parameter B
    B_cost = rnd.binomial(1, 0.5, (m, p))
    # value of items
    c = np.zeros((n, m))
    for i in range(n):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
    
    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64)



def cspo_genData_test(num_data, num_features, num_items, num_reqs=1, cost_deg=1, weight_deg=1, noise_width=0, seed=136):
    """
    A function to generate synthetic data and features for cover. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        dim (int): dimension of multi-dimensional cover
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    print("Test Data Generation Mode \n")
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)
    n, p, m = num_data, num_features, num_items

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(n, p))

    # # 2. Generate concentration matrix
    P = rnd.uniform(1, 10.0, size=(num_reqs, m))
    G = rnd.gamma(shape=P[:, None, :], scale=1.0, size=(num_reqs, n, m))
    G = np.transpose(G, (1, 0, 2))  # (n, num_reqs, m)
    # print(f'range before noise of G: {G.min()}, {G.max()} G shape: {G.shape}')

    # G = rnd.uniform(1, 10.0, size=(n,num_reqs, m))
    # Add Gaussian noise and clip
    if noise_width > 0:
        G += noise_width * rnd.normal(0, 0.5, size=(n, num_reqs, m))
    G = np.maximum(G, 0.01)

    
    # random matrix parameter B
    B_cost = rnd.binomial(1, 0.5, (m, p))
    d_cost = rnd.uniform(0,1,m)
    # value of items
    c = np.zeros((n, m))
    for i in range(n):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** cost_deg + 10 + d_cost
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        epsilon = noise_width * rnd.normal(0,1,m)
        values += epsilon
        c[i, :] = values

    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64)

    