import numpy as np

import os
import sys

from typing import List

import time
import os
import pytz
from datetime import datetime  


def extract_elements(pairs, indicator):
    return [pair[1] for pair in pairs if pair[0] == indicator]

def create_folder(folder_path):

    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

def convert_to_fasta(sequences: List[str], name: str) -> List[str]:

    """
    Converts a list of sequences to a list in .fasta format.
    
    Args:
    sequences (list): The list of sequences.

    Returns:
    list: The list of sequences in FASTA format.

    EXAMPLE:
    INPUT:  ['ATT', 'ACC', 'AGG']
    OUTPUT: ['>sequence0', 'ATT', '>sequence1', 'ACC', '>sequence2', 'AGG']
    """

    fasta_format = []
    for i, sequence in enumerate(sequences):
        fasta_format.append(f'>{name}_seq_{i+1}')
        fasta_format.append(sequence)
        
    return fasta_format

def write_fasta(sequences_fasta, filepath):
    
    """
    Writes a list of sequences in .fasta format to a file.

    Args:
    sequences_fasta (list): The list of sequences in .fasta format.
    filepath (str): The path to the file to write the sequences to.

    Returns:
    """

    with open(filepath, "w") as fasta_file:
        for line in sequences_fasta:
            fasta_file.write(line + "\n")

    
def read_fasta(filepath):

    """
    Reads a .fasta file and returns the sequences in a list.

    Args:
    filepath (str): The path to the .fasta file.

    Returns:
    list: The list of sequences in the .fasta file.
    
    EXAMPLE:
    INPUT:  'test.fasta'
    ...
    """

    sequences = []
    current_sequence = ""

    with open(filepath, "r") as file:
        for line in file:
            if line.startswith('>'):
                if current_sequence:
                    sequences.append(current_sequence)
                current_sequence = ""
            else:
                current_sequence += line.strip()

    # Add the last sequence
    if current_sequence:
        sequences.append(current_sequence)

    return sequences


def create_fasta_file(sequences, name, file_path):
    """
    This file creates a .fasta file out of one training example of the following form obs1|obs2:algn1|algn2.
    Based on the indicator, the function extracts the observed sequences or the alignment sequences.

    Args:
    test_data_example (str): The training example of the form obs1|obs2:algn1|algn2.
    data_path (str): The path including the filename to the file to write the .fasta file to.
    indicator (int): The indicator to extract the observed sequences or the alignment sequences.

    Returns:
    -
    """
    list_fasta = convert_to_fasta(sequences = sequences, name = name)
    write_fasta(sequences_fasta = list_fasta, filepath = file_path)


def remove_dash_tokens(lst: List[str]) -> List[str]:

    """
    Removes all '-' tokens from a list of strings.
    
    Args:
    lst (List[str]): The list of strings to remove the '-' tokens from.
    
    Returns: 
    List[str]: The list of strings with the '-' tokens removed.
    """
    
    return [''.join(ch for ch in string if ch != '-') for string in lst]

def filter_string(s: str) -> str:

    """
    Filters a string to only contain the characters 'A', 'C', 'T', and 'G'.

    Args:
    s (str): The string to filter.

    Returns:
    str: The filtered string.
    """

    return ''.join(c for c in s if c in 'ACTG')

def append_fasta_seqs(sequences: List[str], index: int, data_path: str) -> None: 

    """
    Appends the sequences to a .fasta file.

    Args:
    sequences (list): The list of sequences to append to the .fasta file.
    index (int): The index of the sequences.
    data_path (str): The path to the .fasta file to append the sequences to.

    Returns:
    -
    """

    list_fasta = convert_to_fasta(sequences = sequences, name = f'index_{index}')
    with open(data_path, 'a') as file:
        file.write('\n')  # Write a blank line
        for line in list_fasta:
            file.write(line + '\n')  # Write each sequence line by line


def get_now_str():
    timezone = pytz.timezone('Europe/Berlin')
    now = datetime.now(timezone)
    now_str = now.strftime('%Y%m%d_%H%M%S')
    return now_str

def get_repo_path(script_dir, n):
    dir_n_levels_up = script_dir
    for _ in range(n):
        dir_n_levels_up = os.path.dirname(dir_n_levels_up)

    return dir_n_levels_up
        

if __name__ == "__main__":
    
    print("Running helper_functions.py")


    