#!/usr/bin/env python
# coding: utf-8
"""
Synthetic data for packing problem
"""

import numpy as np
import networkx as nx
import os


num_nodes = 10
 
def cspo_genData(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)

    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(num_data, num_features))

    # 2. Generate random binary adjacency matrix for different paths
    G_nx = nx.gnm_random_graph(num_nodes, num_nodes * 2, directed=True, seed=seed)

    # Ensure it's possible to get from source to sink
    source, sink = 0, num_nodes - 1
    while not nx.has_path(G_nx, source, sink):
        G_nx = nx.gnm_random_graph(num_nodes, num_nodes * 2, directed=True, seed=rnd.randint(1000))

    edge_list = list(G_nx.edges())
    edge_to_idx = {e: i for i, e in enumerate(edge_list)}
    num_edges = len(edge_list)

    # Sample paths
    all_paths = list(nx.all_simple_paths(G_nx, source=source, target=sink, cutoff=6))
    if len(all_paths) < num_paths:
        raise ValueError("Not enough paths from source to sink.")
    chosen_paths = rnd.choice(all_paths, size=num_paths, replace=False)

    # Build G matrix
    G = np.zeros((num_data,num_edges, num_paths), dtype=np.float32)
    for j, path in enumerate(chosen_paths):
        for u, v in zip(path[:-1], path[1:]):
            e = (u, v)
            i = edge_to_idx[e]
            G[:,i, j] = np.ones(num_data)

    # 3. Generate edge capacity
    lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges)) + (noise_width*rnd.normal(0, 1, size=(num_data,num_edges)))

    # 4. Generate item cost
    B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
    # value of items
    c = np.zeros((num_data, num_paths))
    for i in range(num_data):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, num_paths)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
    
    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64)

    
def cspo_genData_test(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
    # positive integer parameter
    if not isinstance(cost_deg, int) or cost_deg <= 0:
        raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
    rnd = np.random.RandomState(seed)
    # 1. Generate feature vectors: x_i ~ N(0, I_p)
    x = rnd.normal(0, 1, size=(num_data, num_features))

    # 2. Generate random binary adjacency matrix for different paths
    G_nx = nx.gnm_random_graph(num_nodes, num_nodes * 2, directed=True, seed=seed)

    # Ensure it's possible to get from source to sink
    source, sink = 0, num_nodes - 1
    while not nx.has_path(G_nx, source, sink):
        G_nx = nx.gnm_random_graph(num_nodes, num_nodes * 2, directed=True, seed=rnd.randint(1000))

    edge_list = list(G_nx.edges())
    edge_to_idx = {e: i for i, e in enumerate(edge_list)}
    num_edges = len(edge_list)

    # Sample paths
    all_paths = list(nx.all_simple_paths(G_nx, source=source, target=sink, cutoff=6))
    if len(all_paths) < num_paths:
        raise ValueError("Not enough paths from source to sink.")
    chosen_paths = rnd.choice(all_paths, size=num_paths, replace=False)

    # Build G matrix
    G = np.zeros((num_data,num_edges, num_paths), dtype=np.float32)
    for j, path in enumerate(chosen_paths):
        for u, v in zip(path[:-1], path[1:]):
            e = (u, v)
            i = edge_to_idx[e]
            G[:,i, j] = np.ones(num_data)

    lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges))

    # 4. Generate item cost
    B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
    # value of items
    c = np.zeros((num_data, num_paths))
    for i in range(num_data):
        # cost without noise
        values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 1
        # rescale
        values *= 5
        values /= 3.5 ** cost_deg
        # noise
        epislon = rnd.uniform(1 - noise_width, 1 + noise_width, num_paths)
        values *= epislon
        # convert into int
        values = np.ceil(values)
        c[i, :] = values
    
    return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64)



# def cspo_genData_test(num_data, num_features, num_paths, num_edges, cost_deg=1, weight_deg=1, noise_width=0, seed=135):
#     """
#     A function to generate synthetic data and features for packing. In CSPO, weight is also a random variable,
#     which can be learned from the features. This function is used to generate data for CSPO.

#     Args:
#         num_data (int): number of data points
#         num_features (int): dimension of features
#         num_items (int): number of items
#         dim (int): dimension of multi-dimensional packing
#         deg (int): data polynomial degree
#         noise_width (float): half witdth of data random noise
#         seed (int): random state seed

#     Returns:
#        tuple: G of items (np.ndarray), data features (np.ndarray), costs (np.ndarray)
#     """
#     print("Test Data Generation Mode \n")
#     # positive integer parameter
#     if not isinstance(cost_deg, int) or cost_deg <= 0:
#         raise ValueError(f"cost_deg = {cost_deg} should be a positive integer.")
    
#     rnd = np.random.RandomState(seed)


#     # 1. Generate feature vectors: x_i ~ N(0, I_p)
#     x = rnd.normal(0, 1, size =(num_data, num_features))


#     # 2. Generate concentration matrix
#     G = rnd.uniform(1, 10.0, size=(num_data, num_edges, num_paths))
#     print(f'range of G: {G.min()}, {G.max()} G shape: {G.shape}')

#     lhs = rnd.uniform(1, 10.0, size=(num_data,num_edges))
    
#     # random matrix parameter B
#     B_cost = rnd.binomial(1, 0.5, (num_paths, num_features))
#     d_cost = rnd.uniform(0,1,num_paths)
#     # value of items
#     c = np.zeros((num_data, num_paths))
#     for i in range(num_data):
#         # cost without noise
#         values = (np.dot(B_cost, x[i].reshape(num_features, 1)).T / np.sqrt(num_features) + 3) ** cost_deg + 10 + d_cost
#         # rescale
#         values *= 5
#         values /= 3.5 ** cost_deg
#         # noise
# #         noise_level = noise_width * (2 * p - 2 * abs(np.sum(x[i]))) / (2*p)
# #         epsilon = rnd.uniform(1 - noise_width, 1 + noise_width, m)
#         epsilon = noise_width * rnd.normal(0,1,num_paths)
#         values += epsilon
#         # convert into int
# #         values = np.ceil(values)
#         c[i, :] = values

#     return G.astype(np.float64), x.astype(np.float64), c.astype(np.float64), lhs.astype(np.float64) 

    