import numpy as np
import pandas as pd
import os
n_datasets = 5

p_rule = 0.1

n_samples = 5000
n_features = 20
rule_size = 4
np.random.seed(42)

for n_rules in [2,4,6,8,12]:
    # make directory
    if not os.path.exists(f"syn_data/list-size/{n_rules}/"):
        os.makedirs(f"syn_data/list-size/{n_rules}/")

    for n in range(n_datasets):
        X = np.random.uniform(size=(n_samples, n_features))
        
        rules = []
        rule_coverage = np.zeros((n_samples, n_rules))
        rule_classes = np.random.randint(2, size=n_rules)
        for i in range(n_rules):
            interval = p_rule ** (1 / rule_size)
            rule_features = np.random.choice(n_features, size=rule_size, replace=False)
            rule_inclusion = np.ones(shape=n_samples, dtype=bool)
            
            rule = "Class {rule_classes[i]} if:"
            for feat in rule_features:
                interval_start = np.random.uniform(low=0, high=1 - interval)
                interval_end = interval_start + interval
                rule_inclusion = np.logical_and(rule_inclusion, np.logical_and(X[:, feat] > interval_start, X[:, feat] < interval_end))
                rule += f"X{feat} in [{interval_start:.2f}, {interval_end:.2f}] AND "
            rule_coverage[:, i] = rule_inclusion
            rule = rule[:-5]
            rules.append(rule)
        
        # random labels
        Y = np.random.randint(2, size=n_samples)
        not_yet_covered = np.ones(n_samples, dtype=bool)
        rule_labels = np.zeros(n_samples)
        rule_labels[:] = -1
        # rule label assignment
        for i in range(n_rules):
            rule_class = rule_classes[i]
            rule_scope = np.logical_and(rule_coverage[:, i], not_yet_covered)
            Y[rule_scope] = rule_class
            not_yet_covered = np.logical_and(not_yet_covered, np.logical_not(rule_scope))
            rule_labels[rule_scope] = i
        # make df
        # concat with Y
        df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
        df["target"] = Y
        df["rule_label"] = rule_labels
                
        df.to_csv(f"syn_data/list-size/{n_rules}/data{n}.csv", index=False,sep=";")
        f = open(f"syn_data/list-size/{n_rules}/rule{n}.txt","w")
        for rule in rules:
            f.write(rule + "\n")
        f.close()

n_rules = 2

for n_samples in [100, 500, 1000, 5000, 10000]:
    
    # make directory
    if not os.path.exists(f"syn_data/sample-size/{n_samples}/"):
        os.makedirs(f"syn_data/sample-size/{n_samples}/")

    for n in range(n_datasets):
        X = np.random.uniform(size=(n_samples, n_features))
        
        rules = []
        rule_coverage = np.zeros((n_samples, n_rules))
        rule_classes = np.random.randint(2, size=n_rules)
        for i in range(n_rules):
            interval = p_rule ** (1 / rule_size)
            rule_features = np.random.choice(n_features, size=rule_size, replace=False)
            rule_inclusion = np.ones(shape=n_samples, dtype=bool)
            interval_start = np.random.uniform(low=0, high=1 - interval)
            interval_end = interval_start + interval
            rule = ""
            for feat in rule_features:
                rule_inclusion = np.logical_and(rule_inclusion, np.logical_and(X[:, feat] >= interval_start, X[:, feat] <= interval_end))
                rule += f"Class {rule_classes[i]} if: X{feat} in [{interval_start:.2f}, {interval_end:.2f}] AND "
            rule_coverage[:, i] = rule_inclusion
            rule = rule[:-5]
            rules.append(rule)
        
        # random labels
        Y = np.random.randint(2, size=n_samples)
        not_yet_covered = np.ones(n_samples, dtype=bool)
        rule_labels = np.zeros(n_samples)
        rule_labels[:] = -1
        # rule label assignment
        for i in range(n_rules):
            rule_class = rule_classes[i]
            rule_scope = np.logical_and(rule_coverage[:, i], not_yet_covered)
            Y[rule_scope] = rule_class
            not_yet_covered = np.logical_and(not_yet_covered, np.logical_not(rule_scope))
            rule_labels[rule_scope] = i
        # make df
        # concat with Y
        df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
        df["target"] = Y
        df["rule_label"] = rule_labels
                
        df.to_csv(f"syn_data/sample-size/{n_samples}/data{n}.csv", index=False,sep=";")
        f = open(f"syn_data/sample-size/{n_samples}/rule{n}.txt","w")
        for rule in rules:
            f.write(rule + "\n")
        f.close()
            
n_samples = 10000        

n_features = 20
rule_size = 2
for rule_size in [2,4,6,8]:
    # make directory
    if not os.path.exists(f"syn_data/rule-size/{rule_size}/"):
        os.makedirs(f"syn_data/rule-size/{rule_size}/")
    for n in range(n_datasets):
        X = np.random.uniform(size=(n_samples, n_features))
        
        rules = []
        rule_coverage = np.zeros((n_samples, n_rules))
        rule_classes = np.random.randint(2, size=n_rules)
        for i in range(n_rules):
            interval = p_rule ** (1 / rule_size)
            rule_features = np.random.choice(n_features, size=rule_size, replace=False)
            rule_inclusion = np.ones(shape=n_samples, dtype=bool)
            interval_start = np.random.uniform(low=0, high=1 - interval)
            interval_end = interval_start + interval
            rule = ""
            for feat in rule_features:
                rule_inclusion = np.logical_and(rule_inclusion, np.logical_and(X[:, feat] >= interval_start, X[:, feat] <= interval_end))
                rule += f"Class {rule_classes[i]} if: X{feat} in [{interval_start:.2f}, {interval_end:.2f}] AND "
            rule_coverage[:, i] = rule_inclusion
            rule = rule[:-5]
            rules.append(rule)
        
        # random labels
        Y = np.random.randint(2, size=n_samples)
        not_yet_covered = np.ones(n_samples, dtype=bool)
        rule_labels = np.zeros(n_samples)
        rule_labels[:] = -1
        # rule label assignment
        for i in range(n_rules):
            rule_class = rule_classes[i]
            rule_scope = np.logical_and(rule_coverage[:, i], not_yet_covered)
            Y[rule_scope] = rule_class
            not_yet_covered = np.logical_and(not_yet_covered, np.logical_not(rule_scope))
            rule_labels[rule_scope] = i
        # make df
        # concat with Y
        df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(n_features)])
        df["target"] = Y
        df["rule_label"] = rule_labels
                
        df.to_csv(f"syn_data/rule-size/{rule_size}/data{n}.csv", index=False,sep=";")
        f = open(f"syn_data/rule-size/{rule_size}/rules{n}.txt","w")
        for rule in rules:
            f.write(rule + "\n")
        f.close()
