import numpy as np
from itertools import combinations

def adult_filter(data):
    """Mimic the filters in place for Adult data.

    Adult documentation notes: Extraction was done by Barry Becker from
    the 1994 Census database. A set of reasonably clean records was extracted
    using the following conditions:
    ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))
    """
    df = data
    df = df[df['AGEP'] > 16]
    df = df[df['PINCP'] > 100]
    df = df[df['WKHP'] > 0]
    df = df[df['PWGTP'] >= 1]
    return df

def generate_subsets(N, k):
    # Generate the set {0, 1, ..., N-1}
    elements = range(N)
    
    # Generate all subsets of size k
    subsets = list(combinations(elements, k))
    
    return subsets

def repeat_rows(df, K):
    # Repeat each row K times
    repeated_df = df.loc[df.index.repeat(K)].reset_index(drop=True)
    return repeated_df

def to_binary_array(sub_array, length=10):
    sub_array = sub_array.astype("int")
    binary_array = np.zeros(length, dtype=int)
    binary_array[sub_array] = 1
    return binary_array
    
def hamming_distance(arr1, arr2):
    return np.sum(arr1 != arr2)