import scanpy as sc
import numpy as np
import argparse
import faiss
import warnings
import scanpy as sc
import time
import matplotlib.pyplot as plt
from sklearn import metrics
import seaborn as sns
import random
import numpy as np
from ...retreival_utils.faiss_retreival import similarity_search
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings('ignore')

N_COMPONENTS = 128

parser = argparse.ArgumentParser(description='Process single-cell data.')
parser.add_argument('--input_adata', type=str, required=True, help='Path to input .h5ad file')
parser.add_argument('--paired_adata', type=str, required=True, help='Path to input paired .h5ad file')
parser.add_argument('--input_embeddings', type=str, required=True, help='Path to input .npy file')
parser.add_argument('--pair_embeddings', type=str, required=True, help='Path to input paired .npy file')
parser.add_argument('--method', type=str, required=True, help='Method used for embeddings; all if you want to run all methods')
parser.add_argument('--retrieved_for_each_cell', type=int, required=True, help='Number of cells to retrieve for each cell')
parser.add_argument('--faiss_search', type=str, required=True, help='Faiss search method')
parser.add_argument('--norm', type=str, required=True)
parser.add_argument('--output_dir', type=str, required=True)

args = parser.parse_args()

adata = sc.read(args.input_adata)
pair_adata = sc.read(args.paired_adata)

scaler = StandardScaler()
scaler_paired =  StandardScaler()

print(f"########### {args.method.capitalize()} ###########")
embeddings = np.load(args.input_embeddings)
embeddings_pair = np.load(args.pair_embeddings)

print("Embeddings shape: ", embeddings.shape)
query_indexes = np.array([i for i in range(np.shape(embeddings)[0])])

target_indexes = np.array([i+np.shape(embeddings)[0] for i in range(np.shape(embeddings_pair)[0])])
if(args.norm=="sc1p"):
    embeddings = sc.pp.log1p(embeddings)
    embeddings_pair = sc.pp.log1p(embeddings_pair)
elif(args.norm=="standard"):
    embeddings = scaler.fit_transform(embeddings)
    embeddings_pair = scaler_paired.fit_transform(embeddings_pair)
embeddings = np.concatenate((embeddings,embeddings_pair))

distances, index = similarity_search(args, embeddings, query_indexes, target_indexes)

columns = ['Query'] + ['Result-' + str(i) for i in range(1,args.retrieved_for_each_cell+1)]
df = pd.DataFrame(np.concatenate([query_indexes.reshape(-1,1),target_indexes[index]],axis=1),columns=columns)
os.makedirs(args.output_dir,exist_ok=True)
df.to_csv(args.output_dir + "/index.csv")

temp = list(adata.obs.index)
temp.extend(list(pair_adata.obs.index))
target = pd.DataFrame(np.array(temp)[df.values],columns = columns)
target.to_csv(args.output_dir + "/" + "cell_name" + ".csv")
temp = list(adata.obs.cell_type)
temp.extend(list(pair_adata.obs.cell_type))
target = pd.DataFrame(np.array(temp)[df.values],columns = columns)
target.to_csv(args.output_dir + "/" + "cell_type" + ".csv")
