#!/usr/bin/env python
# coding: utf-8
"""
Synthetic data for packing problem
"""

import numpy as np
import os
    
def cspo_genData(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for packing. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        num_reqs (int): number of requirements
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)
    print(os.getcwd())
    G_ind = np.loadtxt('./paking_data/POLSKA/POLSKA_G011.txt')

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(num_data, num_features))

    # # 2. Generate random binary adjacency matrix for different paths
    G = np.zeros((num_data, num_edges, num_paths))
    for i in range(num_data):
        G[i] = G_ind
        # for j in range(num_edges):
        #     # Generate random path connections with binary values
        #     path = rnd.choice(num_paths, size=rnd.randint(1, max(2, num_paths//2)), replace=False)
        #     G[i,j,path] = 1

    # 3. Generate edge capacity
    lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges)) + rnd.normal(0, 0.5, size=(num_data,num_edges))

    # 4. Generate item cost
    B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
    # value of items
    c = np.zeros((num_data, num_paths))
    for i in range(num_data):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, num_paths)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
    
    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64)

    
def cspo_genData_test(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    """
    A function to generate synthetic data and features for packing. In CSPO, weight is also a random variable,
    which can be learned from the features. This function is used to generate data for CSPO.

    Args:
        num_data (int): number of data points
        num_features (int): dimension of features
        num_items (int): number of items
        num_reqs (int): number of requirements
        deg (int): data polynomial degree
        noise_width (float): half witdth of data random noise
        seed (int): random state seed

    Returns:
       tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
    """
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(num_data, num_features))

    G_ind = np.loadtxt('./packing_data/POLSKA/POLSKA_G011.txt')

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    # x = rnd.normal(0, 1, size=(num_data, num_features))

    # # 2. Generate random binary adjacency matrix for different paths
    G = np.zeros((num_data, num_edges, num_paths))
    for i in range(num_data):
        G[i] = G_ind
        
    rnd = np.random.RandomState(seed)
    lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges))

    # 4. Generate item cost
    B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
    # value of items
    c = np.zeros((num_data, num_paths))
    for i in range(num_data):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, num_paths)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
    
    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64)



# def cspo_genData_test(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
#     """
#     A function to generate synthetic data and features for packing. In CSPO, weight is also a random variable,
#     which can be learned from the features. This function is used to generate data for CSPO.

#     Args:
#         num_data (int): number of data points
#         num_features (int): dimension of features
#         num_items (int): number of items
#         dim (int): dimension of multi-dimensional packing
#         deg (int): data polynomial degree
#         noise_width (float): half witdth of data random noise
#         seed (int): random state seed

#     Returns:
#        tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
#     """
#     print("Test Data Generation Mode \n")
#     # positive integer parameter
#     if not isinstance(cost_deg, int) or cost_deg <= 0:
#         raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
#     rnd = np.random.RandomState(seed)


#     # 1. Generate feature vectors: x_i ~ N(0, I_p)
#     x = rnd.normal(0, 1, size =(num_data, num_features))


#     # 2. Generate concentration matrix
#     G = rnd.uniform(1, 10.0, size=(num_data, num_edges, num_paths))
#     print(f'range of G: {G.min()}, {G.max()} G shape: {G.shape}')

#     lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges))
    
#     # random matrix parameter B
#     B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
#     d_cost = rnd.uniform(0,1,num_paths)
#     # value of items
#     c = np.zeros((num_data, num_paths))
#     for i in range(num_data):
#         # cost without noise
#         values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 10 + d_cost
#         # rescale
#         values *= 5
#         values /= 3.5 ** cost_deg
#         # noise
# #         noise_level = noise_width * (2 * p - 2 * abs(np.sum(x[i]))) / (2*p)
# #         epsilon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
#         epsilon = noise_width * rnd.normal(0,1,num_paths)
#         values += epsilon
#         # convert into int
# #         values = np.ceil(values)
#         c[i, :] = values

#     return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64) 

    