'''
Synthetic Experiment Code for the paper titled
``Constrained Robust Submodular Partitioning''.
The experiments first generate random similarity matrices for facility location functions,
and then compare Min Block Greedy, Round Robin Greedy and random selection under a matroid constraint
for the constrained robust submodular partitioning problem.
'''
import numpy as np


# facility location function gain f(v|cache)
def fl_gain(v, cache, sim):
    cache_new = np.maximum(sim[v], cache)
    return np.sum(cache_new - cache), cache_new

# facility location function evaluation f(S)
def fl_val(S, sim):
    value = 0.0
    sub_matrix = sim[S].transpose()
    values = np.amax(sub_matrix, axis=1) 
    return np.sum(values)



def minBlockGreedyMatroid(sim, n, m, k, mod):

    large_value = 1000000000
    caches = np.zeros((m, n))
    matroid_check = np.zeros((m, n // mod) ).astype(np.int32)
    vals = np.zeros(m)
    S_all = set()

    min_id = 0
    block_set = set(np.arange(m))
    final_vals = np.zeros(m)

    while len(block_set) > 0:

        min_id = np.argmin(vals)
        best_ele = -1
        best_gain = 0
        best_cache = []
        for jj in range(n):
            if jj not in S_all and matroid_check[min_id, jj // mod] < k:
                gain, cache_new = fl_gain(jj, caches[min_id], sim)
                if gain >= best_gain:
                    best_gain = gain
                    best_cache = cache_new
                    best_ele = jj

        if best_ele == -1:
            final_vals[min_id] = vals[min_id]
            vals[min_id] = large_value
            block_set.remove(min_id)
            continue

        S_all.add(best_ele)
        vals[min_id] = np.sum(best_cache)
        caches[min_id] = best_cache
        matroid_check[min_id, best_ele // mod] += 1
        
    print(final_vals, np.min(final_vals), len(S_all))
    return final_vals, np.min(final_vals)



def rrMeta(sim, n, m, k, mod, tau, rr_num_guesses):
    factor = m ** (1 / rr_num_guesses) # factor^{rr_num_guesses} = m
    best_val = 0
    taus = []
    for jj in range(rr_num_guesses):
        taus.append(tau)
        tau *= factor

    print("guessed values", taus)
    low = 0
    high = rr_num_guesses - 1
    while low <= high:
        idx = (high - low) // 2 + low
        _, val = rrGreedyMatroid(sim, n, m, k, mod, taus[idx])
        best_val = max(val, best_val)
        if val >= taus[idx] * (1 - 1 / np.e) / 5:
            low = idx + 1
        else:
            high = idx - 1

    return best_val


def rrGreedyMatroid(sim, n, m, k, mod, tau = 1e7):

    large_value = 1000000000
    caches = np.zeros((m, n))
    matroid_check = np.zeros((m, n // mod) ).astype(np.int32)
    vals = np.zeros(m)
    S_all = set()

    block_id = 0
    final_vals = np.zeros(m)

    singleton_counter = 0
    for ii in range(n):
        gain, _ = fl_gain(ii, caches[0], sim)
        if gain >= tau:
            singleton_counter += 1
            final_vals[-singleton_counter] = gain
            vals[-singleton_counter] = large_value
            S_all.add(ii)

    print("rr singleton step", singleton_counter)
    block_set = set(np.arange(m - singleton_counter))


    while len(block_set) > 0:

        block_id = (block_id + 1 ) % (m - singleton_counter)
        if vals[block_id] == large_value:
            continue

        best_ele = -1
        best_gain = 0
        best_cache = []
        for jj in range(n):
            if jj not in S_all and matroid_check[block_id, jj // mod] < k:
                gain, cache_new = fl_gain(jj, caches[block_id], sim)
                if gain >= best_gain:
                    best_gain = gain
                    best_cache = cache_new
                    best_ele = jj


        if best_ele == -1:
            final_vals[block_id] = vals[block_id]
            vals[block_id] = large_value
            block_set.remove(block_id)
            continue

        S_all.add(best_ele)
        vals[block_id] = np.sum(best_cache)
        caches[block_id] = best_cache
        matroid_check[block_id, best_ele // mod] += 1


    print(final_vals, np.min(final_vals), len(S_all))
    return final_vals, np.min(final_vals)



def randomBaseline(sim, n, m, k, mod):
    Ss = [[] for ii in range(m)]

    for ii in range(n // mod):
        matroid_check = np.zeros(m).astype(np.int32)
        valid_blocks = np.arange(m)

        for jj in range(mod):
            rand_idx = np.random.randint(len(valid_blocks))
            target = valid_blocks[rand_idx]
            if matroid_check[target] < k:
                matroid_check[target] += 1
                Ss[target].append(ii * mod + jj)
                
                if matroid_check[target] == k:
                    valid_blocks = np.delete(valid_blocks, rand_idx)

            if len(valid_blocks) == 0:
                break

    vals = []
    for S in Ss:
        if len(S) > 0:
            vals.append(fl_val(S, sim))
        else:
            vals.append(0)

    print(vals, np.min(vals))
    return vals, np.min(vals)



def greedy():
    cache = np.zeros(n)
    S = set()

    for ii in range(k):

        best_ele = -1
        best_gain = 0
        best_cache = []
        for jj in range(n):
    
            if not jj in S:
                gain, cache_new = fl_gain(jj, cache, sim)
                if gain >= best_gain:
                    best_gain = gain
                    best_cache = cache_new
                    best_ele = jj

        print(best_gain, best_ele)
        S.add(best_ele)
        cache = best_cache

    print(len(S))
    return S, np.sum(cache)


'''
n: ground set size
m: number of blocks
k and mod: matroid parameters, we divide the ground set into blocks of size mod, and for every block we pick at most k elements
'''
def exp(n, m, k, mod):

    mb_results = []
    rr_results = []
    base_results = []
    rr_num_guesses = 10
    num_runs = 30

    for ii in range(num_runs):
        sim = np.random.rand(n, n)
        sim = 0.5 * (sim + sim.T)

        _, val1 = minBlockGreedyMatroid(sim, n, m, k, mod)

        val2 = rrMeta(sim, n, m, k, mod, val1, rr_num_guesses)
        
        _, val3 = randomBaseline(sim, n, m, k, mod)
    
        mb_results.append(val1)
        rr_results.append(val2)
        base_results.append(val3)

    return [np.mean(mb_results), np.mean(rr_results), np.mean(base_results)], [np.std(mb_results), np.std(rr_results), np.std(base_results)]



def main():
    print(exp(100, 20, 3, 10))


if __name__ == "__main__":
    main()
