import pandas as pd
import numpy as np
import scipy.stats
import scipy.special
import h5py
import multiprocessing as mp
import argparse
import pysam
import sequence_tools 
import nb_model
from genic_driver_tools import *
import time

def genicDetect(args):
    f_genic_str = '/scratch1/priebeo/neurIPS/supressor_det/genic_rdata.h5'
    fasta_str = '/scratch1/maxas/ICGC_Roadmap/reference_genome/hg19.fasta'
    mut_counts = get_all_mut_df(args.tabix_file, f_genic_str)

    all_windows_df, window = get_gp_results_df(args.f_gp, args.cancer)
    all_regions = all_windows_df.index
    non_genic = non_genic_windows(f_genic_str, all_regions, window)

    d_pr = get_S_prob(args.f_model, non_genic, args.cancer)

    df_obs_exp = add_model(mut_counts, args.est_str, d_pr, args.f_model, f_genic_str, fasta_str, args.tabix_file, all_windows_df, args.cancer, args.mapp, window)
    
    df_obs_exp.to_hdf(args.fout, key = 'df', mode = 'w')

    
def genicDetectParallel(args):
    f_genic_str = '/scratch1/priebeo/neurIPS/supressor_det/genic_rdata.h5'
    fasta_str = '/scratch1/maxas/ICGC_Roadmap/reference_genome/hg19.fasta'
    mut_counts = get_all_mut_df(args.tabix_file, f_genic_str)

    all_windows_df, window = get_gp_results_df(args.f_gp, args.cancer)
    all_regions = all_windows_df.index
    non_genic = non_genic_windows(f_genic_str, all_regions, window)
    d_pr = get_S_prob(args.f_model, non_genic, args.cancer)
    df_obs_exp = add_model_parallel(mut_counts, args.est_str, d_pr, args.f_model, f_genic_str, fasta_str, args.tabix_file, all_windows_df, args.cancer, args.mapp, args.N_procs, window)
    df_obs_exp.to_hdf(args.fout, key = 'df', mode = 'w')
    
def noncodingDetect(args):
    f_nonc_str = '/scratch2/priebeo/pcawg_annotations/all_elements.h5'
    fasta_str = '/scratch1/maxas/ICGC_Roadmap/reference_genome/hg19.fasta'
    f_key = 'pcawg_elements'
    mut_counts = nonc_mut_df(args.tabix_file, f_nonc_str, f_key)

    all_windows_df, window = get_gp_results_df(args.f_gp, args.cancer)
    all_regions = all_windows_df.index
    non_genic = nonc_complement_windows(f_nonc_str, all_regions, f_key, window)

    _, d_pr = nonc_train_sequence_model(non_genic, args.f_model, 1, args.cancer)
    
    df_obs_exp = nonc_model(mut_counts, args.est_str, d_pr, args.f_model, f_nonc_str, fasta_str, args.tabix_file, all_windows_df, args.cancer, args.mapp, window)
    
    df_obs_exp.to_hdf(args.fout, key = 'df', mode = 'w')

def noncodingDetectParallel(args):
    f_nonc_str = '/scratch2/priebeo/pcawg_annotations/all_elements.h5'
    fasta_str = '/scratch1/maxas/ICGC_Roadmap/reference_genome/hg19.fasta'
    f_key = 'pcawg_elements'
    mut_counts = nonc_mut_df(args.tabix_file, f_nonc_str, f_key)

    all_windows_df, window = get_gp_results_df(args.f_gp, args.cancer)
    all_regions = all_windows_df.index
    non_genic = nonc_complement_windows(f_nonc_str, all_regions, f_key, window)

    _, d_pr = nonc_train_sequence_model(non_genic, args.f_model, 1, args.cancer)
    
    df_obs_exp = nonc_model_parallel(mut_counts, args.est_str, d_pr, args.f_model, f_nonc_str, fasta_str, args.tabix_file, all_windows_df, args.cancer, args.mapp, window, args.N_procs)
    
    df_obs_exp.to_hdf(args.fout, key = 'df', mode = 'w')

def parse_args(text = None):
    parser = argparse.ArgumentParser()
    subparser = parser.add_subparsers()

    parse_a = subparser.add_parser('genicDetect', 
                                   help='run genic detection algorithm')
    parse_a.add_argument('tabix_file', type=str, help='path to tabix file')
    parse_a.add_argument('f_gp', type=str, help='base path to gp results file')
    parse_a.add_argument('f_model', type=str, help='path to sequence model file')
    parse_a.add_argument('fout', type=str, help='output h5 file name')
    parse_a.add_argument('-c', type=str, required=True, action='store', dest='cancer', help='cancer label')
    parse_a.add_argument('-m', type=float, required=True, action='store', dest='mapp', help='mappability')
    parse_a.add_argument('-u', '--submap_path', type=str, required=False, action='store', dest = 'est_str', help='base path to gp estimates of submappability windows')
    parse_a.set_defaults(func=genicDetect)

    parse_b = subparser.add_parser('genicDetectParallel', 
                                   help='run genic detection algorithm')
    parse_b.add_argument('tabix_file', type=str, help='path to tabix file')
    parse_b.add_argument('f_gp', type=str, help='base path to gp results file')
    parse_b.add_argument('f_model', type=str, help='path to sequence model file')
    parse_b.add_argument('fout', type=str, help='output h5 file name')
    parse_b.add_argument('-c', type=str, required=True, action='store', dest='cancer', help='cancer label')
    parse_b.add_argument('-N', type=int, required=True, action='store', dest='N_procs', help='number of processes to use')
    parse_b.add_argument('-m', type=float, required=True, action='store', dest='mapp', help='mappability')
    parse_b.add_argument('-u', '--submap_path', type=str, required=False, action='store', dest = 'est_str', help='base path to gp estimates of submappability windows')
    parse_b.set_defaults(func=genicDetectParallel)
    

    parse_c = subparser.add_parser('noncDetect', 
                                   help='run noncoding driver element detection algorithm')
    parse_c.add_argument('tabix_file', type=str, help='path to tabix file')
    parse_c.add_argument('f_gp', type=str, help='base path to gp results file')
    parse_c.add_argument('f_model', type=str, help='path to sequence model file')
    parse_c.add_argument('fout', type=str, help='output h5 file name')
    parse_c.add_argument('-c', type=str, required=True, action='store', dest='cancer', help='cancer label')
    parse_c.add_argument('-m', type=float, required=True, action='store', dest='mapp', help='mappability')
    parse_c.add_argument('-u', '--submap_path', type=str, required=False, action='store', dest = 'est_str', help='base path to gp estimates of submappability windows')
    parse_c.set_defaults(func=noncodingDetect)

    parse_d = subparser.add_parser('noncDetectParallel', 
                                   help='run noncoding driver element detection algorithm')
    parse_d.add_argument('tabix_file', type=str, help='path to tabix file')
    parse_d.add_argument('f_gp', type=str, help='base path to gp results file')
    parse_d.add_argument('f_model', type=str, help='path to sequence model file')
    parse_d.add_argument('fout', type=str, help='output h5 file name')
    parse_d.add_argument('-c', type=str, required=True, action='store', dest='cancer', help='cancer label')
    parse_d.add_argument('-m', type=float, required=True, action='store', dest='mapp', help='mappability')
    parse_d.add_argument('-N', type=int, required=True, action='store', dest='N_procs', help='number of processes to use')
    parse_d.add_argument('-u', '--submap_path', type=str, required=False, action='store', dest = 'est_str', help='base path to gp estimates of submappability windows')
    parse_d.set_defaults(func=noncodingDetectParallel)
    
    if text:
        args = parser.parse_args(text.split())
    else:
        args = parser.parse_args()

    return args

if __name__ == "__main__":
    args = parse_args()
    args.func(args)
