import pandas as pd
import torch
import numpy as np
from onlinedatasets.datasets import SVMDataset, get_dataset, get_batches, GrowingNumpyDataSet, DataSetUnsupervised, DataSet, get_autoencoder_dataset
from onlinedatasets.models import TorchRewardsModel, AutoEncoder, TorchRewardsModelMultilayer
import pandas as pd
import random
import IPython
import os
import copy

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt


from algorithms import train_autoencoder, train_simple_regression
import pickle

from autoencoder_test import get_data, regression_analysis

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("DEVICE ", device)


def get_autoencoded_CMAP_datasets():

	df, df1, df2, df3 = get_data()


	raw_dataset = df.values.astype('float64')
	raw_dataset1 = df1.values.astype('float64')
	raw_dataset2 = df2.values.astype('float64')
	raw_dataset3 = df3.values.astype('float64')

	#responses_Y = np.mean(raw_dataset, 1)*20-10
	responses_Y = np.mean(raw_dataset, 1).squeeze()
	responses_Y = pd.DataFrame(responses_Y)

	responses_Y1 = np.mean(raw_dataset1, 1).squeeze()
	responses_Y1 = pd.DataFrame(responses_Y1)
	responses_Y2 = np.mean(raw_dataset2, 1).squeeze()
	responses_Y2 = pd.DataFrame(responses_Y2)
	responses_Y3 = np.mean(raw_dataset3, 1).squeeze()
	responses_Y3 = pd.DataFrame(responses_Y3)


	encoded_supervised_dataset = pickle.load( open("../CMAP/cmap_processed/encoded_trt_sh.VCAP.978genes.p", "rb"))
	encoded_supervised_dataset1 = pickle.load(open("../CMAP/cmap_processed/encoded_trt_sh.HA1E.978genes.p", "rb") )
	encoded_supervised_dataset2 = pickle.load(open("../CMAP/cmap_processed/encoded_trt_sh.MCF7.978genes.p", "rb"))
	encoded_supervised_dataset3 = pickle.load(open("../CMAP/cmap_processed/encoded_trt_sh.A375.978genes.p", "rb") )


	#IPython.embed()

	# encoded_supervised_dataset1 = DataSet(autoencoded_df1, responses_Y1)
	# encoded_supervised_dataset2 = DataSet(autoencoded_df2, responses_Y2)
	# encoded_supervised_dataset3 = DataSet(autoencoded_df3, responses_Y3)

	return encoded_supervised_dataset, encoded_supervised_dataset1, encoded_supervised_dataset2, encoded_supervised_dataset3

def main():

	encoded_supervised_dataset, encoded_supervised_dataset1, encoded_supervised_dataset2, encoded_supervised_dataset3 = get_autoencoded_CMAP_datasets()
	dim = encoded_supervised_dataset.dataset.values.shape[1]

	num_steps = 20000 #12000
	batch_size = 200
	logging_frequency = 100

	regression_analysis(dim, encoded_supervised_dataset, num_steps, batch_size, logging_frequency, data_index= "", 
		representation_layer_sizes = [ 300, 40])
	regression_analysis(dim, encoded_supervised_dataset1, num_steps, batch_size, logging_frequency, data_index= "1", 
		representation_layer_sizes = [ 300, 40])
	regression_analysis(dim, encoded_supervised_dataset2, num_steps, batch_size, logging_frequency, data_index= "2", 
		representation_layer_sizes = [ 300, 40])
	regression_analysis(dim, encoded_supervised_dataset3, num_steps, batch_size, logging_frequency, data_index= "3", 
		representation_layer_sizes = [ 300, 40])





if __name__ == "__main__":
    main()
