import argparse
import pathlib
import time
from typing import List

import numpy as np

from aligners import Foldseek


parser = argparse.ArgumentParser()
parser.add_argument('-i', '--inputpath',
                    help='Structure directory.',
                    metavar='I',
                    type=pathlib.Path,
                    required=True,
                    default="data/deepfri_ec/structures")
parser.add_argument('-o', '--outputpath',
                    help='Path to output folder.',
                    metavar='O',
                    type=pathlib.Path,
                    required=True,
                    default="data/deepfri_ec/foldseek")
parser.add_argument('--tm_thresh',
                    type=float,
                    required=False,
                    default=0,
                    choices=np.arange(0,1,0.01),
                    help="accept alignments with a tmscore > thr.")
parser.add_argument('--tmalign_hit_order',
                    type=int,
                    required=False,
                    default=0,
                    choices=[0,1,2,3,4],
                    help="Order hits by 0: (qTM+tTM)/2, 1: qTM, 2: tTM, 3: min(qTM,tTM) 4: max(qTM,tTM).")
parser.add_argument('--tmalign_fast',
                    type=int,
                    required=False,
                    default=1,
                    choices=[0,1],
                    help="Turn on fast search in TM-align: 1: True.")
parser.add_argument('--lddt_threshold',
                    type=float,
                    required=False,
                    default=0,
                    choices=np.arange(0,1,0.01),
                    help="accept alignments with a lddt > thr.")
parser.add_argument('--alignment_type',
                    type=int,
                    required=False,
                    default=1,
                    choices=[0,1,2],
                    help="0: 3Di Gotoh-Smith-Waterman (local, not recommended), 1: TMalign (global, slow),\
                        2: 3Di+AA Gotoh-Smith-Waterman (local, default).")
parser.add_argument('-s',
                    type=float,
                    required=False,
                    default=9.5,
                    choices=np.arange(0,10,0.1),
                    help="Adjust sensitivity to speed trade-off; lower is faster, higher more sensitive (fast: 7.5, default: 9.5)")
parser.add_argument('-c',
                    type=float,
                    required=False,
                    default=0.8,
                    choices=np.arange(0,1.1,0.1),
                    help="List matches above this fraction of aligned (covered) residues (see --cov-mode) (default: 0.0);\
                          higher coverage = more global alignment")
parser.add_argument('--cov_mode',
                    type=int,
                    required=False,
                    default=1,
                    choices=[0,1,2,3,4,5],
                    help="0: coverage of query and target, 1: coverage of target, 2: coverage of query")
parser.add_argument('--alignment_mode',
                    type=int,
                    required=False,
                    default=3,
                    choices=[0,1,2,3],
                    help="0:automatic; 3: Outputs alignment start, end, and seq_id score.")
parser.add_argument('--alignment_output_mode',
                    type=int,
                    required=False,
                    default=0,
                    help="Has to be set to 0 = automatic. MMseqs does not work with any other value.")
parser.add_argument('--alignment_outputs',
                    type=List[str],
                    required=False,
                    default=['query','target','evalue','fident','nident','qstart','qend','qlen','tstart','tend','tlen',
                             'alnlen','cigar','qseq','qaln','taln','tseq','qcov','tcov',
                             'alntmscore','qtmscore','ttmscore','lddt','lddtfull','rmsd',
                             ],
                    help="Metrics outputted by the alignment module.")
parser.add_argument('-e',
                    type=float,
                    default=1.000E-03,
                    help="List matches below this E-value. The lower, the more stringent the comparisons.")
parser.add_argument('--min_seq_id',
                    type=float,
                    required=False,
                    choices=np.arange(0,1,0.01),
                    )
parser.add_argument('--seq_id_mode',
                    type=int,
                    default=0,
                    choices=[0,1,2],
                    help="Which sequence the seq_id score is normalized with - 0: alignment, 1: shorter seq; 2: longer seq.")
parser.add_argument('--cluster_mode',
                    type=int,
                    required=False,
                    default=1,
                    help="0: greedy, 1: connected components, 2-3: CDHIT.")
parser.add_argument('--use_easy',
                    type=bool,
                    help="Whether to use the easy search module.")
parser.add_argument('--create_db',
                    action='store_true',
                    help="Whether to create a structure database.")
parser.add_argument('--search',
                    action='store_true',
                    help="Whether to align the structures.")
parser.add_argument('--cluster',
                    action='store_true',
                    help="Whether to cluster the sequence database.")
parser.add_argument('--threads',
                    type=int,
                    required=False,
                    default=64,
                    help="Number of CPU-cores used.")
parser.add_argument('-v',
                    type=int,
                    required=False,
                    default=3,
                    choices=[0,1,2,3],
                    help="Verbosity level: 0: quiet, 1: +errors, 2: +warnings, 3: +info.")


def main(args):
    # Check input parameters
    assert args.alignment_output_mode == 0, 'Step mmseqs align does not work with alignment_output_mode set to any other value than 0.'
    foldseek = Foldseek(args)
    if args.use_easy:
        t = time.time()
        foldseek.do_easy_search()
        print(f'total time: {time.time() - t} s.')
    else:
        t = time.time()
        foldseek.make_output_dirs()
        if args.create_db:
            foldseek.create_db()
        if args.search:
            foldseek.search()
        if args.cluster:
            foldseek.cluster()
        print(f'total time: {time.time() - t} s.')


if __name__ == "__main__":
    args = parser.parse_args()
    print(args)
    main(args)










