# export OPENAI_API_KEY=key

from openai import OpenAI
import re
import time
import Levenshtein

import sys
import random
import textwrap

from utils.print_functions import print_list

def extract_estimate(completion):
    # Extract the string between *** ***
    match = re.search(r'\*\*\*(.*?)\*\*\*', completion)
    if match:
        return match.group(1)
    return ""    

def transform_cpred_example(cpred_string):
    # Split the input string into sequences and ground truth
    sequences, ground_truth = cpred_string.split(':')
    
    # Split the sequences by '|'
    sequence_list = sequences.split('|')
    
    # Format the sequences
    formatted_sequences = "\n".join([f"{i+1}. {seq}" for i, seq in enumerate(sequence_list)])
    
    # Combine formatted sequences with the ground truth
    result = f"{formatted_sequences}\nCorrect output:\n{ground_truth}"
    
    return result

def concatenate_examples(list_of_examples):

    prompt = ""
    if len(list_of_examples) == 1:
        prompt += '\nHere is an example:\n'

    else:
        prompt += '\nHere are some examples:\n'

    prompt += "\n".join(list_of_examples)
    return prompt


class GPT4oMini:

    def __init__(self, config):
        self.client = OpenAI()
        #self.client = None

        self.n_shots = config['n_shots']
        self.examples = []

    def inference(self, test_example):

        # process test_example
        ground_truth_sequence = test_example.split(":")[1]
        ground_truth_length = len(ground_truth_sequence)

        noisy_reads = test_example.split(":")[0].split("|")
        cluster_size = len(noisy_reads)

        noisy_reads_prompt = "\n".join([f"{i+1}. {seq}" for i, seq in enumerate(noisy_reads)])

        # hints
        if self.n_shots != 0:

            #print('self.examples')
            #print(len(self.examples))
            #print_list(self.examples)
            sampled_examples = random.sample(self.examples, self.n_shots)
            sampled_examples_prompt = []

            for i, sample_example in enumerate(sampled_examples):
                sampled_examples[i] = f"\nExample #{i+1}\n" + "Input DNA sequences:\n" + transform_cpred_example(sample_example)

            sampled_examples_prompt = concatenate_examples(sampled_examples) + "\n"

        else:
            sampled_examples_prompt = ''

        prompt = textwrap.dedent(
f"""We consider a reconstruction problem of DNA sequences. We want to reconstruct a DNA sequence consisting of {ground_truth_length} characters (either A,C,T or G) from {cluster_size} noisy DNA sequences.
These noisy DNA sequences were generated by introducing random errors (insertion, deletion, and substitution of single characters).
The task is to provide an estimate of the ground truth DNA sequence. 
{sampled_examples_prompt}
Task:
Reconstruct the DNA sequence from the following noisy input sequences. 
Input DNA sequences:
{noisy_reads_prompt}
Provide an estimate of the ground truth DNA sequence consisting of {ground_truth_length} characters in the format ***estimated DNA sequence*** - use three * on each side of the estimated DNA sequence.
""")

        #print(prompt)
        #return

        start_time = time.time()
        completion = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        )
        end_time = time.time()
        time_taken = end_time - start_time

        #print(completion.choices[0].message.content)
        output_string = completion.choices[0].message.content
        #print(output_string)
        candidate_sequence = extract_estimate(output_string)

        # Calculate the Levenshtein distance
        #distance = Levenshtein.distance(ground_truth_sequence, candidate_sequence)
        #print(f"Levenshtein distance: {distance}")
        #print(ground_truth_sequence)
        #print(estimate)
        #print(len(estimate))

        return_dict = {
            'candidate_sequence': candidate_sequence,
            'time_taken': time_taken
        }
        return return_dict


if __name__ == "__main__":

    use_class = True

    # Example usage
    input_string = "AGGTTCCCTAGAAGTGATGATGGACAGCTGGAATCGCGGGCATATAATTTGTTGCCTTGGTTGCT|GGTCCACTAGAAGTGTCATGGAATGCTGTTCGGGGGCATTCATGTTGTGCTTGGTTGCACT|AGGTTCCTCGAAGGTGATATGGATGCTGTTGCGGGCATGCTACATGTTTGCACATTGAGTTGCA|AGGTCCCTAAGAAGTGTATATGGAGCTCGTTCTGCGGGATCCTAATTGGTTGTGTCCTTCAGGTTAGT|GGTCCCTAGAAGGATTGGATGCTGTTCGCGGGTATCTAATGTTGTGCCTTGGTGCAT|AGGTCGCCCAGAAGTGATATGGTCGCTGTGTCGCGGCATCTAATGTTTGTGACATCTTGATGCT|AGGTTCACCCTAGATAGTGATTGTAGTGATGCATGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT|AGGTCCCTAGTAAGTGTATATGGCATGCGGTCGCGGGCTCTAATGTTGTGCCTTGAGTTGCT|AGCGTCCGCTAAGAAGGAATGGATGCTGATTCGCCGGGCATCTAGATGTTGTGCCTTCGGTTGCT|AGGGTCCCCCTACAGAAGTGATATGGATGACTCGCGGGCATCTAAATGGTTGTGGCCTTGTTTGCT:AGGTCCCTAGAAGTGATATGGATGCTGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT"
    #output = transform_cpred_example(input_string)
    #print(output)
    #sys.exit()

    ground_truth_length = 60
    n_shots = 5

    examples = [
        'TGTTCGGGATGGGAGACACCAGAAACCTCGGAAGTAATTCCGCGCATCTGGCCTCCGGG|TGTTTGGGATGGGAGACACCAGGACAACCGCCGGAATAATTTCCGCGCATCTGGCTCCGGG|TGTTTTGGATGGCACACCACCAAGACAACCGCGCGGAGTAATTCCGCGCATCTGGCTGCGGA|TGTTGTGGGATGGGAGACACCTAGACAACCGCGCGGAGTAATCTCCGCGCATCTGGCTCCGGG|TGTTTGGGATTGGAGACCCCGACACCGCGCGGAGTAATTCCGCGCATCTGGCTCCGGG|TGTTTGGGATGGGAGACACCAGACAACCGCGGGGAGTAATTCGCGCAGCTGGCTCCGGG|TGATGTGGGATTGGAGTCACCAGACAACCGGCGGAGTAGGTCCGCCGTGTGGCTCCTG|TGTTTGGGATGGGAGACAACAACAACCGCGCGGAGTAACTCCGCGCATCTGGCCCCGGG|TGTTTGGGCTAGAGACACCAGACAACCGCGCGGAGTAATTCCGGGCATCTGGCTCCGGG|GGTTTGGGATGGGAGACACCAGACAACCGTGCGGAGAAATCCGCGCATCTAGCTCGGG:TGTTTGGGATGGGAGACACCAGACAACCGCGCGGAGTAATTCCGCGCATCTGGCTCCGGG',
        'GGTGATGCCCGCTCGCTTGGAATGCTATACGTACTCCGTGTGGTCGTGACGGGAAGGGATG|GGTGAACCCCGCTCGCTCGTAATGGCTAGACGCAATCCGTAGGTCGTGGCGAGAAGGGATG|GGTGAGCCAGCTGCGCTCCGGAATGGCTATCGCAATCCGTAGGTCGTGACGGGAGAGCGATG|GGTGAGCCCGCTCGGCTTGGAAGTGGCATATACGCATCCGTAGGTCGTGAGCGGGTAAGGGATG|GGTGAGCTCGCTCGAGCTTTTATTCGTTATACGCTATTCGTATGTCGTGTCGGGAAGGGATG|GGCTGAGCCCGCTCGCTTGGAATCGCTCTATACACATTCCGTAGGTCGTGAGCGGGAAGGGAG|GTGTTGCTCGCTCGCTTGGAATGGCTATACGCAATCCGTAGGTCGTGACGGCAAGGGATC|GGTGAGCCTGCTCGCTTGGGATGGCTATACGCAATCAGTATGTCGTTGACGGAATGGGATG|GGTGATCTCGCTCCACTGGAATGGCTATGCGCAAACCCGTAGCTCGTGATGGGAAGGGATG|GGTGAGCTCCGCTTCGTTTGCGAATCCGGCTATAGGCAATCCGTAGCTCGTTGCCGGGACAGGGCTC:GGTGAGCCCGCTCGCTTGGAATGGCTATACGCAATCCGTAGGTCGTGACGGGAAGGGATG',
        'CCACGACATGGTACAAGAAAGGGCCATGAAACCCTCACTCCTTCGGGCTTGCAAATTGC|CCCAGGAAAATGGCAAGAAGGCCATGAAACCCTCACTCTTCCGGGCGTCCGGCAATTGC|CTCCACGGAACATGGTGCAAGAAAGTGCCATGAAACCCTCACTCCTCCGGGCGTGGCAGAATTGC|CACCAGAACATGGGTCAAGAAAGGCCATGCAGAACCTCACTCCTCCGGCGTGGCATACATTGC|CCCACGAACATGGTCCAAAGCAGGCGATGAAACCACTCACTCCTCCGGGCGTGGCAAATTGC|CCACGAACATGGTCAAGAAGGCCATAAACCTCAACTCCTCCGGGCGGTGGCATAATCTGC|CCACGGACATGGTCGAGAAAGCCATGAAACCCTCACTCCTCCGGGCGTGGAAATGC|CCCCAGAACATGGGTCAAGAAAGGCCATGAACCCCACTCCTCCGGGCGTGTCAAATGC|CCCACGAACATGGTCAGAACAGGCCATGAAACCCTCGACTCTCCGGGCGTGGCAAATTC|GCCACATACATGCGTCAATGAAAGGCCTGAAACCTCACTCCTCCGGGCGTGGGCAAATTGA:CCCACGAACATGGTCAAGAAAGGCCATGAAACCCTCACTCCTCCGGGCGTGGCAAATTGC',
        'CGCGCTTGAGTCGAGCGCCTCAACTCCAAGTCTGCTCGATTGCTCGAATTACACTGCCTC|CGGCGTATGAGTCGATGGCCTCCACTCACAAGTCTCTCATATGCTGCCATTACACTGCCTC|TCGTTAGGTCGATGGCGCTCACTCACAAGTCTGGCTCATATGCTGAATGTAACACCCCTC|CGCGTTGAGTCGAGGCCTAATACAGTCTGCTCATTTTGCTGTAATGTAACACTGCCTC|CGCGTTGAGTTCGATGGCCCACTCACAAGTCTGCTCATTAAGCTGAATAGTAACACTGCCTC|CGCGTTGAGTCGATGGCTCCACTCACAAGTCTGCTCATATGCTGAATGTAACACTGCCTC|CGAGGTGAGTCGATGGCCTCAGCTCAAAATCGCTAACATATGCTGAATGAAAACACTGCCATAC|CGCGTTAGTCTGTATGGCCTCACTCACTAAGATCTGTCATATGCTGAACGTAACAGCTGCCTC|ACGCGTTGAGTCGGTAGGCCTCACTCACAAGATTGCTCATATGCTGGATGTAACACTGCCTC|CGCGTTGAGTCGATGGCCTACTCACAAGTCTGCTCATATGCTTAATGTAACCATGCCCT:CGCGTTGAGTCGATGGCCTCACTCACAAGTCTGCTCATATGCTGAATGTAACACTGCCTC',
        'AACTCGAAGGAGAGCGCCATCCATATGGCGGAGCGTTACGTGTCACTCAAGGTTCTT|AAATCGAACGCCGAGAGCTCCATAATAGGGCAGTAGCGGAAGTATCTGTCATCTTAAGGTGCTT|GACGTTGACCATGGATGAGCGCATATCCATCGGGTTTGACGGCGTATCTCCCATTTTAAGTCTT|ACCAGTCGAACTGACGAGCGCGAATCCATAGGGCCTGACGACGTAGTTGTCTATCTTAAGGTTCTT|AAGCCGAACCGGAGAGAGCCATCCCAGGGCCTGGCGCCGATCTGCTCATCGGGTTAAGGTGTCCTT|AGGTCGAACAGAGAGCTCCATCCTTGGGGCCTGACGTGTTTCTCGGTTCATCGTCTTAGGTCATT|ATAGTCGAAGGAGCGCGCCATCCACCAGGGCGTGCGCGTATCCTGTCATCTATAAGGTTCTT|AAGTCCGAAGGGCAGTGCCACCATAAGGGCGTGACGTGTATGTGTCATCTTAAGGTGGTT|ATGTTCGAACGGAGAGCGCATCCGATAGGGCCGTGACGCGTATCTGTCATCTTAAGCGTTATT|AAGATCGGACCGGAGAGCGCCGTTCCCCAGGGCTGTGACCGTAGGATTCTGTCGTGCTTAAGGTTCAT:AAGTCGAACGGAGAGCGCCATCCATAGGGCGTGACGCGTATCTGTCATCTTAAGGTTCTT',
        'AAGCTTGAGTGATGGCCAACGGGTTAAGGCCTATATCAAAACTAGAGGCTAATAAGTC|ACTAGCTGCGGTGATGGCCAACGGCTTAAGGCCTATTCAAACCCTAGGAGCTCATAACTC|AAAGCTGCAGTGATGGCCAACGGCTTAAGGCCTATATCCAACTAGGAGCTCAATAACTC|AAAGGCGCGAGGATGGCCAACGGCGTAAGGCCTATATGAAACTAGGGCTCAATATCTC|AAAGCTTGGGGGGCCAAGGCTTAAGGCCCATATCAAACGTAGGAGCTCAATAACGC|AAAGTTGAGTGATGGCTAACGGCTTATGGCTATATCAAACCAGAGCTCAATAACTA|AAAGCCTGAGTCAGGCCAACGGCTTAAGGCCTAGATCAAACCCTAGGAGCTCAATTAATC|AAAGCTGAGTGGGCCAACGGCTTAAGGCCTATATCAAACCTATGGAGCTCATAACCC|AAAGCGTGAGTGGTCGCCAACGGCTTAAGGCATTCAAACCTAGGAGCTCAATAACTC|AAAGCTTGAGGTGGCCAACGGCTTAAGGCCTTTATCAAACCTAAGGAGCTCAATAACTC:AAAGCTTGAGTGATGGCCAACGGCTTAAGGCCTATATCAAACCTAGGAGCTCAATAACTC',
        'ACAATCGTGGGCATGAGTCCTCAGTATCCTCAGGAATAGGACAAGTTCCACGTGAACTATGGCG|ACATCGGGCAGCGCTCTTTCATACCGTAGAATATTGGCAAGTTTGCCTGTGAACTATGCG|ACATCTGTGGGCAGAGGTCCTCAGACCCTCAGGAATTGGACAAGTTTCCACGTGAAACTAGGCG|CATCGTGGCAGAGTCCTCAGCACCCTCAGGAATTGGACAAGTTTCCAGTGATTCTATGCG|ACTCAGTGGCAGGGTCCTAGACCTCAGGAGATTGGCACAACGATTCCACGTGAACTATGC|ACATCGTGGCAGAATCCTCAGACCCTCAGGAAAGTTGGACAAGTTTCCAGGTGAATCTATGCG|ACATGTGAGCAGAGTCCTCGAACCCTCGGGCCGATTGGAATAAGTTTCCAAAGTGACTATGGG|ACATCGTGGCCTAGGAGTACCTCATGACCCTCATGATTGGACAAGATTCCACGTGAATGTAGG|ACATACCGTGGCAAGTCGTCTTTCATACCCTCAGGGATTGGACATGTGTCACACCGTGATACTGTGCG|ACAGCGTGGCAGAGTCCTCAGCCCCTCAGGAATTGGACAGGTTCCACGGCCAACTATGCG:ACATCGTGGCAGAGTCCTCAGACCCTCAGGAATTGGACAAGTTTCCACGTGAACTATGCG',
        'ATGAACTACCCTCAATCATCTCAACTGCGGAAGTGGAGCCCAGCCCGACACGGGAAAG|ATGTAACATTACCCAATCTCTAGTCACTGCGGAAGTGGAGCCCAGCCCTGATACGGGAACG|ACGTAACTAACCCCAATCGTCGTCAACTGCGGAGTGAGCGCCAGCCACGTACACGCGGAACG|ATGTAACTAACCCAATCATCTTCAACTCGGAAGTGGAGCCCACCCGATGCGGGAAACG|TTGTAACTAACCACAATCAGTCTTCACCTGCGGAGCAGGGAGCCCAGCCCGACACGGGAAACG|ATGTAACTGAAGCCAATCATCTCAACTGCGGAAGTGGAGGCCCACCGACACGGGAAACAG|ATGTAACTAACACAACCATCTCAACTGGGAAGTGGAGCCCAGCCCGACACGGCACAG|ATGTTAACAAACCCAATCATCTTCAACTGGCGGAAGTTGGAGCCCAGTCCAGACACGGGAAACG|ATGTAACCTCCCCAATCGAACTTCAACTGCGGAAGTGGAGCCCAGGCCCCGAACGGGCGAAACG|ATGTACACTACACCCAATCATCTCAACATGCTGAAGTGGATGCCCAGTCCCGAGACGGGAAACG:ATGTAACTAACCCAATCATCTTCAACTGCGGAAGTGGAGCCCAGCCCGACACGGGAAACG',
        'ATGCAACGAATGCTGGCCGGATACATCAAACGATTTCAAGTTATATCCCGTTT|GCCCGACGAATGATGTCCGGTCAGCTACACGTCGTCAAGTATACCGTTAT|ATGCCCGATAATATATGGCGGACTCCACTCTACACGTCGTCAAGTTATATCCCGTTAG|TGCCCCGACGATATGCCGGCGGATACACTCTCACGATCGTCAAGTATATCCGTTAA|ATGCCCGACGCTTCTGGCCGGATACACTCAACAATCGTCACCGTTTATCCGATAA|ATGCCCGACGAATGCTGGCCGGATACACTTACACGATGTCAATGATATCCGAGTG|CGCCCACATATTTGCCGGATACACTTAACATAGTCAAGTAATCGCGTTAT|ATGCCCGAGATATGTGGCCGGCTAGACTTACACGATCTCAGTTAATCCCGTTAT|ATGCCCACGAGTATGCTGCCGGATCCTCACAAATCGTCAAGTTATATCCCGATAT|GCCAGACATAAGCTGGCCGATAAACTGTCACAAACGTCAGTTATCCCGGT:ATGCCCGACGATATGCTGGCCGGATACACTCTACACGATCGTCAAGTTATATCCCGTTAT',
        'CTACCAGGTCGAGGTAATGTGTTCGCATATCCTGACACAGGGCTGTCATGGTTGAACAA|CTACTAGGTCGAGGGAATCAGTTCGCCTGATCGTAACAGATGGGCCGTCATGGTTGAACAT|CTACTAGCCGAGGTAATATGTTCGCTTGATGCCTAACACAGAGCCGTATGGTCGAAAA|CTACTGGGTCGAGGCACTAAGTTCGCTTGAACCTAAGCACAGGGCCGTCAAGGTTGCACAA|CTACTAGGTCGAGGTAATAAGTTCGCTTGATCCTAACACAGGGCCGGCATGGTAGAACAA|CAACTAGGTCGGGACATAATTTCGCTTGATCCTAACACAGGGCCGTCATGGTTGACA|CAACTAGGTCGGTAATAAGTTCGCTGGATCCTAACACAGGGCCGTCATGACTGAACAA|ATACTAGTCGAGGAATAAGATTGTTGATCCCAACACAGGGCTCCGTGATGGTTAAACTTCA|CTACTAGGTCGAGGTAATAAGTTCGCTTGATCCTAACCCAGGGCCGACATGCGTTGAACAA|CTACTACAGGGGGGAAATAAGTTCGCTTGATCCTACAAAGGGGCGTCTGGTTGGACAT:CTACTAGGTCGAGGTAATAAGTTCGCTTGATCCTAACACAGGGCCGTCATGGTTGAACAA',
        'CACTTCGACCGTGTTACGCCGAGGGTTCGTTTCATAAAAGTAGCAAGCGTGATTATCTCATAGC|CACATATCGAACCGGTTACGCCGAGGGTTCGTGAAATAATAGTAGGAGCGTGTTAATTCAAACTGC|CACTTTCGACCGAGTTACGCCGAGGGTTCATTCAATAATTGTAGGAGCGTGTTAATTCACCTGC|CACCTTCGACCGGTTCGTCCGAGGGTATGGATCATTAATAGTAGGAGCGTGTTGATCCACTGC|CGACTTCGACCGGTTACGCCTGCAGCGTTCGTACCTAGTAGTAGGAAGCGTCTTAATTCACTGC|CAAATTCGATCGGTTACGCCGAGGGTCCGTTCTTAATAGTTAGGAAGCGTGTTGATTCACTGC|CACTTCGACCGGTTACTCACGAGGGTTGGTTCTTAATAAGAGGAGCGTGTTACATTCACTGCC|CGACCTTCGACTGGTTACGACCTGAGGGTTCGTTCACATTTAGTAGGAGCGGGTTAATTCACTGC|CACTTCGACCGGTTACGCTCAGAGGGTTCGTTCATAATCCGGTAGGAGCTTCGTTTAAGATCTCATGC|CACTCCGACCTGGTTACGCGAGGGGTCAGTTCATACATAGTAGGAGCCTGTTAATTCACTGGC:CACTTCGACCGGTTACGCCGAGGGTTCGTTCATAATAGTAGGAGCGTGTTAATTCACTGC',
        'AAACCCTTACGGGTCGAATACATCTTATCCGAGCGCCTCAAGGAGTAGCGATTCCTAC|AAACCCATAGGGTCCAAAAATATTTACCGTGCACTCCGAAAGGGAGTATCGTTGATA|AAACACTTGGGGTCGAAAAAATACTATCCGTGTACCCCAGAGGTGTAGTGTCTCATAC|AACCCTGAGGGTCGAATACTGTTTGATCCGTGCACCTCCATGAGGGTGTCGCGGTTCATGC|AAACCTTAGGGCTCGAATACATATTTACCGTGCACCTCCAGAGGAGTAGCGTTTCAA|GAATCCTTAGGGTCGACCACATATTATCCGAGCACCTCCAGAGGAGTAGGTTTCATGAC|TCACCCTTGGCGGTCGAAGCCAATTTATACGTGCAGCTGCAGAGGTCACCGTTTCATAC|ACACCCTTAGGGTCGAAAACATATTTACCGTGCACCTCCAGAGGTGTATCGTTTCATAC|AAACCCCTAGGGTCGAATACATATTTATCCGTGCACCTCCAGAGGAGTTCTTTTATAC|AAACCCGTCGGGTCGAATCCTATTTATCCGTGCACGACCAGAGGAGAATCGTTTCCGAC:AAACCCTTAGGGTCGAATACATATTTATCCGTGCACCTCCAGAGGAGTAGCGTTTCATAC',
        'GTACTTGGCGTGAATACTGCTACAGGGTCGCAGCCCCTGCTTCGTGCGCTCATTGCCATAGGAGCATATG|GTTACTGGCTGTCTTGTAAGGTACCGCAGACCCTGCTTTCGCCTTACACTATTAGGCACATG|GTAGCTTTTGCTGACTGCTAACTCGCAGCCCCGTTCTTGTCTCTCATACATAGGGAGAATG|CTATGGCTGACTGCTAAGGTCGCAGCCCTAAGCTTCCGGGCCTCATGAATCTGGTCTGAATG|TTAACTGGCTGACTAGGCTAAGGTCGCAGCCCCGCTTCGTCCTCTAACCATGGCAAGAAG|GGTACGGGCTGCACAGCTAAGGTGTCAGCCCACGTGCTTCGTGCCTAAACCTCTAGTCAGAAGTG|ATACTGGTGACTGCAAGGTCCAGGCAGCCCCCTGCTTCGTGCCTCATGATCCATAGGCATAATG|GACTGGCGGCTGCTAAGGTCGCAGGCCCCTGCTTCGTGTCCGCATACACATAGGCATGAAAGC|GTACGGCTCTGCCAGAGTCGCCAGCCCGCTGCTTCGTGCCTCATAGAATAGCAGAATG|GTAGGGTGCTGCCAAGGTCGCCGCCCCTGCTTCGTCCCCTAGTACCATAGGCAGAAT:GTACTGGCTGACTGCTAAGGTCGCAGCCCCTGCTTCGTGCCTCATACCATAGGCAGAATG',
        'GATATAGATGTTGCTCGAGAGAATACTGCACAAAGTGTACAGAAGAGATGCTGTAGGAG|GAGAAAGGATTGTACTGAGTGGTACTGTACAAGAGTAAGAAGAGATGCAAAGGTAG|ATAAAGGATCGTTAGCTCGAGTGGATACTGACAAAGAGTCAGAAGAGACGCTAATGGTAG|GAGTAAGATTGTGTGCTCGAGTGAATACCTGTACAAGAGCAAAGAGATGCTAACGGTAG|GTGAAAGGTTGTGCTCGAATTGGAATCATGTGACAAAAATTTCAGAAGAGATGGAAGGTACG|GATACGGATTGTGCTCGAGTGGATACTGGTATAGAGAAGAGAGTAATGCTAAGGTAG|ATATAGGACTGTTCCTCGAAGTGGATACTGTACAAAAATCAGAAGCGAGTAAGGTAG|GATCAGGATTGTACTCGAGTGCTACTGTACAAAGCGTCAGAGGTGCCATAGGTACG|GATAAAGGGACGTTGCCCGAGTGATACTGTCAAAGCGTAAAAGAGATGCTAGGTG|GGATCAAAGGATTGCTTGCTCGAGTGTGATACTGTACAATGATCAGAAGAGATCTAATAG:GATAAAGGATTGTTGCTCGAGTGGATACTGTACAAAGAGTCAGAAGAGATGCTAAGGTAG',
        'TGCTCGCCTCTTTGTTCCTCTCTGTGCAGCTCAACTTTTTAACAACGCTCTATAT|TGCCGCACTCTTGTTCGCTTTAGGGACGGCTCACCTTTTGGAACATAACGCGTCTAATAT|TGCTGCGCCTCTTGTTCTCTTTACGGACGTCTCAACTTTTGTAACATACGCGTGCATAT|TTGCTCGCCTCTTGTCCTTCTTTTAAGACGTCTCAACTCGTGAACATACGCGTGCTATAT|TCTCGCCTCTTGTCCTCTTTACGGCACTCAAACTTTTGGAACAACGCGTGTTATTT|GCCGCCTATGATCCTCTTGAACGACGTCACAATTTGGAAGCCATACGCGTGCTATAT|TCTTGGCACTCTGCTCTCTTTACGTGCCATTCACTTTTGGGTACATAGCCGTGCTATGA|TGCACGCCTCTTGTTCCTTTTTCGGACGGCGCAACTTTTGAAACATCTCTGGCCTTAT|TGCCTCTGTTGGTCCCTAGTTTACGACATTCAAGCTGTTGGAACATACGCGTGATATAT|TGCTCGCCCTCATTGTTCCCCTTTCAGGCGTCTCACCTTTATTGGACTATAACGCGTGGCTATAT:TGCTCGCCTCTTGTTCCTCTTTACGGACGTCTCAACTTTTGGAACATACGCGTGCTATAT'
    ]

    if use_class:

        inference_params = {
            'n_shots': n_shots,
        }
        gpt4o_mini = GPT4oMini(inference_params)
        gpt4o_mini.examples = examples
        return_dict = gpt4o_mini.inference(input_string)

        candidate_sequence = return_dict['candidate_sequence']
        ground_truth_sequence = input_string.split(":")[1]

        print('gt:')
        print(ground_truth_sequence)
       
        #candidate_sequence = candidate_sequence[:ground_truth_length]
        print('candidate:')
        print(candidate_sequence)

        levenshtein_distance = Levenshtein.distance(ground_truth_sequence, candidate_sequence)
        print(f"Levenshtein distance: {levenshtein_distance}")

        #AGGTTCCTCGAAGGTGATGATGGACAGCTGGAATCGCGGGCATATAATTTGTTGCCTTGGTTGCT
        #AGGTCCCTAGAAGTGATATGGATGCTGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT


    else: 
        client = OpenAI()
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {
                    "role": "user",
                    "content": f"""We consider a trace reconstruction of DNA sequences. We want to reconstruct a DNA sequence of length {ground_truth_length} from {cluster_size} noisy DNA sequences.
                                The noisy DNA sequences are obtained by introducing insertion, deletion, and substitution errors in the ground truth sequence. The error probabilities fall within the following ranges:
                                - Insertion error probability: [0.01,0.1]
                                - Deletion error probability: [0.01,0.1]
                                - Substitution error probability: [0.01,0.1]

                                The reconstructed sequence should be as similar as possible to the ground truth sequence.

                                Here is an example: 
                                Input DNA sequences: 
                                1. AGGTTCCCTAGAAGTGATGATGGACAGCTGGAATCGCGGGCATATAATTTGTTGCCTTGGTTGCT
                                2. GGTCCACTAGAAGTGTCATGGAATGCTGTTCGGGGGCATTCATGTTGTGCTTGGTTGCACT
                                3. AGGTTCCTCGAAGGTGATATGGATGCTGTTGCGGGCATGCTACATGTTTGCACATTGAGTTGCA
                                4. AGGTCCCTAAGAAGTGTATATGGAGCTCGTTCTGCGGGATCCTAATTGGTTGTGTCCTTCAGGTTAGT
                                5. GGTCCCTAGAAGGATTGGATGCTGTTCGCGGGTATCTAATGTTGTGCCTTGGTGCAT
                                6. AGGTCGCCCAGAAGTGATATGGTCGCTGTGTCGCGGCATCTAATGTTTGTGACATCTTGATGCT
                                7. AGGTTCACCCTAGATAGTGATTGTAGTGATGCATGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT
                                8. AGGTCCCTAGTAAGTGTATATGGCATGCGGTCGCGGGCTCTAATGTTGTGCCTTGAGTTGCT
                                9. AGCGTCCGCTAAGAAGGAATGGATGCTGATTCGCCGGGCATCTAGATGTTGTGCCTTCGGTTGCT
                                10 .AGGGTCCCCCTACAGAAGTGATATGGATGACTCGCGGGCATCTAAATGGTTGTGGCCTTGTTTGCT
                                Correct output:
                                AGGTCCCTAGAAGTGATATGGATGCTGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT
                                
                                Task:
                                Reconstruct the DNA sequence from the following noisy input sequences. 
                                Input DNA sequences:
                                1. AGGTTCCCTAGAAGTGATGATGGACAGCTGGAATCGCGGGCATATAATTTGTTGCCTTGGTTGCT
                                2. GGTCCACTAGAAGTGTCATGGAATGCTGTTCGGGGGCATTCATGTTGTGCTTGGTTGCACT
                                3. AGGTTCCTCGAAGGTGATATGGATGCTGTTGCGGGCATGCTACATGTTTGCACATTGAGTTGCA
                                4. AGGTCCCTAAGAAGTGTATATGGAGCTCGTTCTGCGGGATCCTAATTGGTTGTGTCCTTCAGGTTAGT
                                5. GGTCCCTAGAAGGATTGGATGCTGTTCGCGGGTATCTAATGTTGTGCCTTGGTGCAT
                                6. AGGTCGCCCAGAAGTGATATGGTCGCTGTGTCGCGGCATCTAATGTTTGTGACATCTTGATGCT
                                7. AGGTTCACCCTAGATAGTGATTGTAGTGATGCATGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT
                                8. AGGTCCCTAGTAAGTGTATATGGCATGCGGTCGCGGGCTCTAATGTTGTGCCTTGAGTTGCT
                                9. AGCGTCCGCTAAGAAGGAATGGATGCTGATTCGCCGGGCATCTAGATGTTGTGCCTTCGGTTGCT
                                10 .AGGGTCCCCCTACAGAAGTGATATGGATGACTCGCGGGCATCTAAATGGTTGTGGCCTTGTTTGCT
                                Provide an estimate of the true DNA sequence of length {ground_truth_length} in the format ***estimated DNA sequence*** - use three * on each side of the estimated DNA sequence."""
                }
            ]
        )

        print(completion.choices[0].message.content)

        output_string = completion.choices[0].message.content

        estimate = extract_estimate(output_string)

        if estimate is None:
            raise Exception("Could not extract estimate")

        gt = 'AGGTCCCTAGAAGTGATATGGATGCTGTTCGCGGGCATCTAATGTTGTGCCTTGGTTGCT'


        print(Levenshtein.distance(estimate, gt))
        #print(completion.choices[0].message)
        #print(completion)