import sklearn
import sys
import pickle
import numpy as np
from sklearn import preprocessing
from sklearn.neighbors import NearestNeighbors
from numpy.linalg import norm
from sklearn.linear_model import LinearRegression

from matplotlib import pyplot as plt

with open("../all_states_humanoid.pkl", "rb") as f_in:
    all_state_arrs_np = pickle.load(f_in)
seed_val = int(sys.argv[1])
np.random.seed(seed_val)
new_array = np.unique([tuple(row) for row in all_state_arrs_np], axis = 0)
num_reduction_runs = 5

#for num_samples in [500, 1000, 1500, 2000, 5000, 10000, 20000, 30000, 50000, 100000]:
for dot_color, num_samples in zip(['red', 'blue', 'yellow', 'grey', 'green', 'black', 'white'], [1000, 2500, 5000, 7500, 10000]):
    print(num_samples)
    for rand_run in range(num_reduction_runs):
        random_indices = np.random.choice(new_array.shape[0], size=num_samples, replace=False)
        all_state_arrs_np_sampled = new_array[random_indices, :]

        scaler = preprocessing.StandardScaler().fit(all_state_arrs_np_sampled)
        X_scaled = scaler.transform(all_state_arrs_np_sampled)

        neigh = NearestNeighbors(n_neighbors=2)

        neigh.fit(X_scaled)

        X_neighbors = neigh.kneighbors(X_scaled, 3, return_distance=False)
        mu_vals = []
        for i in range(num_samples):
            nearest_neighbors = X_scaled[X_neighbors[i]]
            center_x = X_scaled[i]
            r_1 = norm(nearest_neighbors[1] - center_x)
            r_2 = norm(nearest_neighbors[2] - center_x)
            mu = r_2/r_1
            mu_vals.append(mu)
        mu_vals_sorted = np.array(np.sort(mu_vals))

        xs = np.log(mu_vals_sorted)
        N = len(mu_vals_sorted)
        is_val = np.array([i/N for i in range(N)])
        ys = -1 * np.log(1 - is_val)

        frac_sample = 0.9

        xs_lim = xs[0:int(frac_sample*num_samples)].reshape(-1, 1)
        ys_lim = ys[0:int(frac_sample*num_samples)].reshape(-1, 1)



        reg = LinearRegression(fit_intercept=False).fit(xs_lim, ys_lim)

        plt.scatter(xs_lim, ys_lim, color=dot_color, label=num_samples, alpha=0.7)
        x = np.linspace(0, 0.5, 100)
        y = reg.coef_[0][0]*x
        #plt.plot(x, y, '-r', label='y=c*x')

        print("{},{},{}".format(rand_run, num_samples, reg.coef_[0][0]))
#plt.legend()
#plt.savefig("walker_cumulative.png")



