from random import Random
from typing import Optional
import pandas as pd
import csv

def generate_random_facility_client_df(
    n_points: int,
    n_features: int,
    n_groups: int,
    facility_probability: float,
    max_capacity: int,
    seed: Optional[int] = None,
) -> pd.DataFrame:
    """
    Generate random facility/client data as a pandas DataFrame.

    Columns:
      f1, ..., f<n_features>       : feature coordinates in [0, 1]
      is_facility                 : 1 if facility, 0 if client
      capacity                    : 0 for clients, 1..max_capacity for facilities
      group1, ..., group<n_groups> : 0/1 membership; facilities have a non-empty
                                     subset of groups, clients are all zeros.

    Parameters
    ----------
    n_points : int
        Number of data points (rows).
    n_features : int
        Number of feature columns.
    n_groups : int
        Number of group columns.
    facility_probability : float
        Probability that a point is a facility (instead of a client).
    max_capacity : int
        Maximum capacity assigned to a facility (minimum is 1).
    seed : Optional[int]
        Random seed for reproducibility.

    Returns
    -------
    df : pd.DataFrame
        DataFrame with the schema above.
    """
    l_random = Random()

    if seed is not None:
        l_random.seed(seed)

    feature_cols = [f"f{i+1}" for i in range(n_features)]
    group_cols = [f"group{i+1}" for i in range(n_groups)]

    rows = []

    for _ in range(n_points):
        row = {}

        # Features in [0, 1]
        for c in feature_cols:
            row[c] = l_random.random()

        # Decide client vs facility
        is_fac = l_random.random() < facility_probability
        row["is_facility"] = 1 if is_fac else 0

        if is_fac:
            # Facility: random capacity 1..max_capacity
            row["capacity"] = l_random.randint(1, max_capacity)

            # Choose a random non-empty subset of groups
            num_groups = l_random.randint(1, n_groups)
            groups = [0] * n_groups
            for gi in l_random.sample(range(n_groups), num_groups):
                groups[gi] = 1
        else:
            # Client: no capacity, no group membership
            row["capacity"] = 0
            groups = [0] * n_groups

        # Group membership columns
        for c, val in zip(group_cols, groups):
            row[c] = val

        rows.append(row)

    df = pd.DataFrame(rows, columns=feature_cols + ["is_facility", "capacity"] + group_cols)
    return df


def generate_random_facility_client_csv(
    n_points: int,
    n_features: int,
    n_groups: int,
    facility_probability: float,
    max_capacity: int,
    filename: str,
    seed: Optional[int] = None,
) -> None:
    """
    Generate random CSV data with the same schema as toy_data.csv / toy_data_2.csv.

    Columns:
      f1, ..., f<n_features>      : feature coordinates in [0, 1]
      is_facility                : 1 if facility, 0 if client
      capacity                   : 0 for clients, 1..max_capacity for facilities
      group1, ..., group<n_groups>: 0/1 membership; facilities have a non-empty
                                    subset of groups, clients are all zeros.

    Parameters
    ----------
    n_points : int
        Number of data points (rows).
    n_features : int
        Number of features (columns f1..f_k).
    n_groups : int
        Number of groups (columns group1..group_t).
    facility_probability : float
        Probability that a point is a facility (instead of a client).
    max_capacity : int
        Maximum capacity assigned to a facility (minimum is 1).
    filename : str
        Path to the CSV file to write.
    seed : Optional[int]
        Random seed for reproducibility (default: None).
    """

    l_random = Random()
    if seed is not None:
        l_random.seed(seed)

    feature_cols = [f"f{i+1}" for i in range(n_features)]
    group_cols = [f"group{i+1}" for i in range(n_groups)]
    fieldnames = feature_cols + ["is_facility", "capacity"] + group_cols

    with open(filename, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        for _ in range(n_points):
            row = {}

            # Features in [0, 1]
            for c in feature_cols:
                row[c] = l_random.random()

            # Decide client vs facility
            is_fac = l_random.random() < facility_probability
            row["is_facility"] = 1 if is_fac else 0

            if is_fac:
                # Facility: random capacity 1..max_capacity
                row["capacity"] = l_random.randint(1, max_capacity)

                # Choose a random non-empty subset of groups
                num_groups = l_random.randint(1, n_groups)
                groups = [0] * n_groups
                for gi in l_random.sample(range(n_groups), num_groups):
                    groups[gi] = 1
            else:
                # Client: no capacity, no group membership
                row["capacity"] = 0
                groups = [0] * n_groups

            # Group membership columns
            for c, val in zip(group_cols, groups):
                row[c] = val

            writer.writerow(row)

# example usage
def test_stub2():
    generate_random_facility_client_csv(
        n_points=100,
        n_features=3,
        n_groups=2,
        facility_probability=0.5,
        max_capacity=5,
        filename="random_toy_data.csv",
        seed=42,
        )

def test_stub1():
    df = generate_random_facility_client_df(
        n_points=100,
        n_features=3,
        n_groups=2,
        facility_probability=0.5,
        max_capacity=5,
        seed=42,
    )
    print(df.head())

if __name__ == "__main__":
    test_stub1()
    test_stub2()

