#!/usr/bin/env python
# coding: utf-8
"""
Synthetic data for knapsack problem
"""

import numpy as np


def genData(num_data, num_features, num_items, dim=1, deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for knapsack

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        dim (int): dimension of multi-dimensional knapsack
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: weights of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    # positive integer parameter
    if type(deg) is not int:
        raise ValueError("deg = {} should be int.".format(deg))
    if deg <= 0:
        raise ValueError("deg = {} should be positive.".format(deg))
    # set seed
    rnd = np.random.RandomState(seed)
    # number of data points
    n = num_data
    # dimension of features
    p = num_features
    # dimension of problem
    d = dim
    # number of items
    m = num_items
    # weights of items
    weights = rnd.choice(range(300, 800), size=(d,m)) / 100
    # random matrix parameter B
    B = rnd.binomial(1, 0.5, (m, p))
    # feature vectors
    x = rnd.normal(0, 1, (n, p))
    # value of items
    c = np.zeros((n, m), dtype=int)
    for i in range(n):
        # cost without noise
        values = (np.dot(B, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
        # float
        c = c.astype(np.float64)
    return weights, x, c
    
def cspo_genData(num_data, num_features, num_items, dim=1, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for knapsack. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.
    For simplicity, let us assume the dimension of the problem as 1 (d=1).

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        dim (int): dimension of multi-dimensional knapsack
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: weights of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    # positive integer parameter
    if type(cost_deg) is not int:
        raise ValueError("deg = {} should be int.".format(cost_deg))
    if cost_deg <= 0:
        raise ValueError("deg = {} should be positive.".format(cost_deg))
    # set seed
    rnd = np.random.RandomState(seed)
    # number of data points
    n = num_data
    # dimension of features
    p = num_features
    # number of items
    m = num_items
    # random matrix parameter B
    B_cost = rnd.binomial(1, 0.5, (m, p))
    B_weight = rnd.binomial(1, 0.5, (m, p))
    # feature vectors
    x = rnd.normal(0, 1, (n, p))
    # value of items
    c = np.zeros((n, m), dtype=int)
    # weights of items
    weights = np.zeros((n, m), dtype=int)
    for i in range(n):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
        # float
    
        # weight without noise
        values = (np.dot(B_weight, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** weight_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** weight_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        weights[i, :] = values

    # float
    c = c.astype(np.float64)
    weights = weights.astype(np.float64)
    return weights, x, c

def cspo_genData_test(num_data, num_features, num_items, dim=1, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for knapsack. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.
    For simplicity, let us assume the dimension of the problem as 1 (d=1).

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        dim (int): dimension of multi-dimensional knapsack
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: weights of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    print("Test Data Generation Mode \n")
    # positive integer parameter
    if type(cost_deg) is not int:
        raise ValueError("deg = {} should be int.".format(cost_deg))
    if cost_deg <= 0:
        raise ValueError("deg = {} should be positive.".format(cost_deg))
    # set seed
    rnd = np.random.RandomState(seed)
    # number of data points
    n = num_data
    # dimension of features
    p = num_features
    # number of items
    m = num_items
    # random matrix parameter B
    B_cost = rnd.binomial(1, 0.5, (m, p))
    d_cost = rnd.uniform(0,1,m)
    B_weight = rnd.binomial(1, 0.5, (m, p))
    # feature vectors
    x = rnd.uniform(-1, 1, (n, p))
    # value of items
    c = np.zeros((n, m))
    # weights of items
    weights = np.zeros((n, m))
    for i in range(n):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** cost_deg + 10 + d_cost
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
#         noise_level = noise_width * (2 * p - 2 * abs(np.sum(x[i]))) / (2*p)
#         epsilon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
        epsilon = noise_width * rnd.normal(0,1,m)
        values += epsilon
        # convert into int
#         values = np.ceil(values)
        c[i, :] = values
        # float
    
        # weight without noise
        values = (np.dot(B_weight, x[i].reshape(p, 1)).T / np.sqrt(p) + 3) ** weight_deg + 10
        # rescale
        values *= 5
        values /= 3.5 ** weight_deg
        # noise
        # Adjust Noise level depending on x 
        noise_level = noise_width * (p - abs(np.sum(x[i]))) / p
#         epsilon = rnd.uniform(1 - noise_level, 1 + noise_level, m)
        epsilon = noise_level * rnd.normal(0,1,m)
        values += epsilon
        # convert into int
#         values = np.ceil(values)
        weights[i, :] = values

    # float
    c = c.astype(np.float64)
    weights = weights.astype(np.float64)
    return weights, x, c