# Effect of aggregation probability on the convergence rate



from algorithms import *

plot_only = True

linestyles = ['-', '-.', '--', ':', '-.', '-', '-', '-.', '--', ':', '-.', '-']
markers = ['o', '*', 'd', 'v', 'P', '1', 'p', 'X']
colors = ['tab:blue', 'tab:brown', 'tab:green', 'tab:red', 'tab:purple', 'tab:gray',
          'tab:olive', 'tab:cyan']

mu = 1e-3
experiment = "minibatch"


datasets = ["a1a", "duke", "phishing", "madelon", "mushrooms", "a8a", "gisette_scale"]

T_dict = {"a1a": 321, "mushrooms": 677,  "phishing": 1005, "duke": 11, "madelon": 500, "gisette_scale": 60, "a8a": 2837}



omega_dict = {"a1a": 0.1, "mushrooms": 0.05, "w2a": 0.01, "phishing": 0.1, "duke": 0.4, "madelon": 0.02, "gisette_scale": 0.2, "a8a": 0.1}

# characterization of the problem
for dataset in datasets:
    print("#############################")
    print(dataset)
    print("#############################")

    T = T_dict[dataset]


    A, b = get_data(dataset)


    n, d = A.shape

    A = normalize_data(A)
    A, b = rearrange_data(A, "random", b)

    blocks = create_blocks(n, T)

    K = int(10000)
    skip_it = int(K / 500)

    v = (1 + 1e-6)*np.ones(n)
    m = int(n/T)
    x0 = np.zeros(d)
    f, g = make_fg_logreg(A, b, mu)



    def total_loss(x): return objective(f, x, n)


    if m*T != n:
        continue

    if not plot_only:
        Lmat = np.dot(A.T,A)/n/4 + mu*np.eye(d)
        L = eigh(Lmat, eigvals = (d-1,d-1))[0][0]

        xstar = find_xstar(g, mu, L, d, n, K, pre_text='', loss=total_loss)

        Fstar = total_loss(xstar)

    Flist = []
    Xlist = []

    labels = [
        r'$\tau = 1$, importance',
        r'$\tau = 8$, importance',
        r'$\tau = 60$, importance',
        r'$\tau = 1$, uniform',
        r'$\tau = 8$, uniform',
        r'$\tau = 60$, uniform'
    ]

    it = create_it(T=K, skip_it=skip_it, tau = 1)



    vr =1

    taus = [1, 6, 60]
    simple_spars = False
    imps = [True, False]

    if not plot_only:
        for imp in imps:
            for tau in taus:
                F, x = dist_GD(total_loss, g, blocks, A, mu, vr,  imp, K, skip_it, tau, simple_spars, x0=None, pre_text='')
                Flist.append((F - Fstar)/(F[0]-Fstar))
                Xlist.append(x)

         # save F
        for backup in [True, False]:
            filename = createfilename(experiment, dataset, T,  mu, backup)
            pickle_out = open(filename, "wb")
            pickle.dump((Flist, Xlist), pickle_out)
            pickle_out.close()


    # load F
    (Flist, Xlist) = load_pickle(experiment, dataset, T, mu)


    it_list = [it for i in Flist]
    it_list[1] = it_list[1]*8
    it_list[2] = it_list[2]*60
    it_list[4] = it_list[4]*8
    it_list[5] = it_list[5]*60

    # plot
    alphas = None#
    visualize(Flist, it_list, "minibatch_bits_", dataset, labels, linestyles=linestyles, markers=markers,
              colors=colors, alphas=alphas, muT=mu/T, ppe=n/T)



