from copy import deepcopy
from utilities import extract_variable_name
from utilities import locate_dimension_in_connector


class prompter() :
    common_basic_prompt = "will provide the following information for the machine learning task:\n1. A description of the " \
                          "input data. The input data contains the features that are the characteristics or attributes of the dataset. " \
                          "These features provide information that the model uses to make predictions.\n2. A description of the output " \
                          "data, also known as 'labels' or 'targets', represents the desired prediction or outcome associated with each " \
                          "input example.\n3. The task objective, which describes the goal of the machine learning task.\n4. The evaluation " \
                          "metrics, which is a numerical measure of how well the model's predictions match the desired outcome.\n5. A " \
                          "description of the available files.\n"

    common_basic_prompt_first_person = "I " + common_basic_prompt
    common_basic_prompt_third_person = "The user " + common_basic_prompt

    common_basic_prompt = \
        "The user is trying to create a Python program for a supervised machine learning task.\nYour goal is to " \
        "assist the user in creating the program.\n" + common_basic_prompt_third_person

    basic_task_types_1 = ["traditional algorithms" , 'natural language processing' , 'computer vision']
    basic_task_types_2 = ["binary classification" , 'multi-class classification' , 'multi-label classification' ,
                          "multi-output regression" ,
                          'single-output regression' , 'sequence-to-sequence' , 'sequence generation' ,
                          'image generation' ,
                          'other']

    output_types = ["probability representation of class labels" , "integer representation of class labels" ,
                    "regression output"]

    example_task_types_CV = ["image classification" , "object detection" , "image segmentation" ,
                             "facial recognition" ,
                             "optical character recognition" , "edge detection" ,
                             "motion analysis and object tracking" ,
                             "depth estimation" , "image generation" , "pose estimation" , "image super-resolution"]

    example_task_types_NLP = ["text generation" , "text classification" , "machine translation" ,
                              "question answering" ,
                              "sentiment analysis" , "topic modeling" , "text summarization" ,
                              "named entity recognition"]

    sample_task_types_DL = ["computer vision" , "multiclass classification" , "probabilistic output" ,
                            "image classification"]

    sample_input_and_output_format_DL_1 = {
        "input" : [
            {
                "name" : "images" ,
                "shape" : ["batch_size" , "channel" , "height" , "width"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "channel" : {
                    "meaning" : "the number of channels of the image" ,
                    "fixed_or_variable" : "fixed"
                    } ,
                "height" : {
                    "meaning" : "the height of the image" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "width" : {
                    "meaning" : "the width of the image" ,
                    "fixed_or_variable" : "variable"
                    }
                }
            ] ,
        "output" : [
            {
                "name" : "predictions" ,
                "shape" : ["batch_size"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    }
                }
            ]
        }

    sample_input_and_output_format_DL_2 = {
        "input" : [
            {
                "name" : "latent_space" ,
                "shape" : ["batch_size" , "latent_dim"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "latent_dim" : {
                    "meaning" : "the dimensionality of the latent space" ,
                    "fixed_or_variable" : "fixed"
                    }
                } ,
            {
                "name" : "input_images" ,
                "shape" : ["batch_size" , "channel" , "height" , "width"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "channel" : {
                    "meaning" : "the number of channels of the input image" ,
                    "fixed_or_variable" : "fixed"
                    } ,
                "height" : {
                    "meaning" : "the height of the input image" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "width" : {
                    "meaning" : "the width of the input image" ,
                    "fixed_or_variable" : "variable"
                    }
                }
            ] ,
        "output" : [
            {
                "name" : "generated_images" ,
                "shape" : ["batch_size" , "channel" , "height" , "width"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "channel" : {
                    "meaning" : "the number of channels of the generated image" ,
                    "fixed_or_variable" : "fixed"
                    } ,
                "height" : {
                    "meaning" : "the height of the generated image" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "width" : {
                    "meaning" : "the width of the generated image" ,
                    "fixed_or_variable" : "variable"
                    }
                } ,
            {
                "name" : "predictions" ,
                "shape" : ["batch_size" , "1"] ,
                "batch_size" : {
                    "meaning" : "the size of the batch" ,
                    "fixed_or_variable" : "variable"
                    } ,
                "1" : {
                    "meaning" : "the output prediction (real or fake)" ,
                    "fixed_or_variable" : "fixed"
                    }
                }
            ]
        }

    sample_task_types_tabular = ["traditional algorithms" , "binary classification" , "direct output"]

    def __init__(self , plan_configurations=None , generated_connector_plan=None) :
        self.prompt_message = None
        self.task_description = []
        self.task_description_converted = []
        self.plan_configurations = plan_configurations
        self.connector_plan = ""
        self.generated_connector_plan = generated_connector_plan

    def prompt_formatting_GPT(self , prompt) :
        for idx , n in enumerate(prompt) :
            if idx > 0 :
                n = "\n" + n
            prompt[idx] = { "role" : "system" , "content" : n }
        return prompt

    def task_description_conversion(self , task_description) :
        file_list = task_description[4]
        file_list_keys = list(file_list.keys())
        file_description = ""
        for idx , n in enumerate(file_list_keys) :
            file_description += f"({idx + 1}). File location: {n}. A description about the file is {file_list[n]}"

        self.task_description_prompt = \
            f"The description of the input data is: {task_description[0]}\nThe description of the output data is: " \
            f"{task_description[1]}\nThe objective of the machine learning task is: {task_description[2]}\nThe " \
            f"evaluation metrics is: {task_description[3]}\nThe following files are available: {file_description}"

        self.task_description_prompt_formatted = self.prompt_formatting_GPT([self.task_description_prompt])

    def reset(self) :
        self.prompt_message = self.prompt_formatting_GPT([prompter.common_basic_prompt])
        self.feed_backs_prompt = ""
        self.feed_backs_prompt_formatted = []

    def connector_plans_analysis(self) :
        prompt = [
            "Your goal is to analyze a machine learning task by classifying the machine learning task and determining a suitable format of input and output data in " \
            "the machine learning task. Both the input and output data refer to the data after all data processing " \
            "steps. The model takes the input data and the model generates the predicted output data."]

        prompt += [prompter.common_basic_prompt_first_person]

        prompt += [self.task_description_prompt]

        answer_composition_string = f"Your answer must consist of the following parts:\n"
        answer_composition_string += \
            f"Part 1: a classification of the machine learning task among one of {prompter.basic_task_types_1}. The " \
            f"classification should be based on which types of machine learning models are suitable for task. " \
            f"Traditional algorithms refer to ML tasks that are processed with traditional machine learning models (" \
            f"instead of deep learning models). Natural language processing refers to deep learning tasks involving " \
            f"text processing. Computer vision refers machine learning tasks deep learning tasks involving image " \
            f"processing\n"
        answer_composition_string += \
            f"Part 2: a classification of the machine learning task among one of {prompter.basic_task_types_2}.\n"
        answer_composition_string += \
            f"Part 3: a classification of the format of the ground truth output of the machine learning task among " \
            f"one of {prompter.output_types}.\n"
        answer_composition_string += \
            f"Part 4: a classification of the specific type of machine learning task. If the machine learning task is " \
            f"about 'computer vision', then examples include " \
            f"{prompter.list_to_and_form(prompter.example_task_types_CV)}. If the machine learning task is about " \
            f"'natural language processing', then examples include " \
            f"{prompter.list_to_and_form(prompter.example_task_types_NLP)}." \
            f" If the task is about 'traditional algorithms', then you do not need to choose and simply say 'N/A' for " \
            f"Part 4.\n"
        answer_composition_string += \
            f"Part 5: determine a suitable format of input and output data in the machine learning task. If the " \
            f"machine learning task is about traditional algorithms (in part 1), your answer for part 5 is simply 'N/A', your " \
            f"answer ends here and you should not answer Part 5-1, 5-2, 5-3, 5-4, or 5-5. If the " \
            f"machine learning task is about deep learning (in part 1), your answer for part 5 should consider the " \
            f"following fact: Both the input and output data refer to the data after all data processing steps. The " \
            f"model takes the input data and the model generates the predicted output data. Both the input and output " \
            f"data are torch.Tensor objects. The user uses a torch.utils.data.dataloader.DataLoader object to manage " \
            f"the input and output data.\n"
        answer_composition_string += \
            f"And, if the machine learning task is about deep learning (in part 1), your answer for part 5 should be " \
            f"in the following five parts:\n"
        answer_composition_string += \
            f"Part 5-1: determine a suitable number of types of tensors in input data. If the machine learning task " \
            f"is a natural language processing task (in part 1), the suitable number of types of tensors in input " \
            f"data is always 2 or more than 2. Besides input_ids and other types of tensors, the types of tensors " \
            f"must always include a type of tensor about attention_mask for each type of sequence. If the task is a computer vision " \
            f"task (in part 1), you should ignore attention masks. Part 5-2: " \
            f"determine a suitable number of types of tensors in output data. If the task is a multi-output regression or " \
            f"classification task, combine all outputs in one type of tensor and the suitable number of types" \
            f"of tensor is 1. Part 5-3: For each type of tensor, " \
            f"determine a suitable shape for the type of tensor. Name each dimension. If there is more than one type " \
            f"of tensor in input or output data, you are forbidden to treat all input tensors as one type of tensor " \
            f"or all output tensors as one type of tensor. Part 5-4: For each dimension of each type of tensor, " \
            f"explain the meaning of the dimension. Part 5-5: For each dimension of each type of tensor, " \
            f"determine whether the size of the dimension changes over the settings and parameters of the data " \
            f"preparation process."

        prompt += [answer_composition_string]

        prompt += [
            f"You should give an exact answer for each part without ifs, or, at least, at most, more than, less than, "
            f"or situational dependence. Let's think step by step:"]

        self.prompt_message = self.prompt_formatting_GPT(prompt)

    def connector_plans_prompt(self , analysis) :

        prompt = [
            f"There is an analysis of a machine learning task. Your goal is to convert the analysis into a Python " \
            f"list of five elements for deep learning task or a Python list of three elements for task " \
            f"about traditional algorithms."]

        prompt += ["Below is the analysis:"]
        prompt += [analysis]
        explanation_string = \
            f"The first element of the list is one of {prompter.basic_task_types_1}.\nThe second element of the list " \
            f"is one of {prompter.basic_task_types_2}.\nThe first and second element of the list are both strings " \
            f"representing the types of the machine learning task.\nThe third element of the list is one of " \
            f"{prompter.output_types}, which is a string representing the format of the ground truth output in the " \
            f"machine learning task.\nThe fourth element of the list is a string that represents the specific type of " \
            f"the machine learning task. The fourth element is only applicable for deep learning tasks and not " \
            f"applicable for task about traditional algorithms.\n"

        explanation_string += \
            f"The fifth element of the list is a dictionary about the data format in the machine learning task. " \
            f"The fifth element is only applicable for deep learning tasks and not applicable for task about traditional algorithms. The " \
            f"dictionary has two keys 'input' and 'output', each corresponding to the tensors in input data and " \
            f"output data. Each value of the dictionary is a list. Each list contains one or multiple dictionaries, " \
            f"with each dictionary representing a type of tensor. The number of dictionaries in the list " \
            f"corresponding to the key 'input' must always be equal to the number of types of tensors suggested in Part 5-1 " \
            f"of the analysis. The number of dictionaries in the list corresponding to the key 'output' must always be equal " \
            f"to the number of types of tensors suggested in Part 5-2 of the analysis. And the output tensors refer to" \
            f"the ground truth output tensors as described in Part 3."
        prompt += [explanation_string]

        prompt += [
            f"Below is an " \
            f"example Python dictionary for an image " \
            f"classification task:\n{prompter.sample_input_and_output_format_DL_1}\nBelow is an example Python dictionary " \
            f"for task about traditional algorithms:\n{prompter.sample_task_types_tabular}" \
            f"\nBelow is an example Python " \
            f"dictionary for an image generation task:\n{prompter.sample_input_and_output_format_DL_2}\n" \
            f"The fixed_or_variable key for each dimensions represents whether the size of the dimension" \
            f"changes over the settings and parameters of the data preparation process. And you must choose it " \
            f"carefully for each dimension by considering the analysis and the provided information" \
            f"for the machine learning task from the user."]

        prompt += ["Here is the Python list:"]

        self.prompt_message = self.prompt_formatting_GPT(prompt)

    def modeling_plans_prompt(self) :
        format_modeling_plans_single_model_entry = "{name_of_model: explanation_of_the_suitability_of_model}"

        format_modeling_plans_multi_models_entry = "{'name_of_model_1_in_the_combination" \
                                                   "+name_of_model_2_in_the_combination+...': {'reason:'," \
                                                   "'explanation_of_the_suitability_of_the_combination'}," \
                                                   "{'name_of_model_1_in_the_combination': " \
                                                   "'role_of_model_1_in_the_combination', " \
                                                   "'name_of_model_2_in_the_combination': " \
                                                   "'role_of_model_2_in_the_combination', ...}} "

        prompt = [self.task_description_prompt]
        prompt += [self.basic_task_types_1_and_2_prompt]
        prompt += [self.output_types_prompt]

        if self.connector_plan[0] == "traditional algorithms":

            prompt += [
                "Your task is to suggest some models for the machine learning task. The models must be from the "
                "following libraries: Scikit-learn, XGBoost, CatBoost, and LightGBM. "]

            prompt += [
                "Your answer should be a python list and only a python list. Each entry of the list is a dictionary "
                "that describes a choice of a single scikit-learn  model. "
                f"The dictionary should be in the format of {format_modeling_plans_single_model_entry}. "
                f"You should suggest {self.plan_configurations['maximum_models']} choices and the "
                f"python list should have {self.plan_configurations['maximum_models']} keys."]

        if self.connector_plan[0] == "natural language processing" or self.connector_plan[0] == "computer vision" :
            prompt += [self.specific_task_types_prompt]
            prompt += [self.data_format_prompt]

            prompt += [
                "Your task is to suggest some pretrained models for the machine learning task. Some machine learning "
                "tasks only need one model, some tasks need multiple models (e.g., one for backbone and one for head, "
                "one for backbone and one for the framework , one for encoder and one for decoder, or one for generator"
                " and one for discriminator), and some tasks can be solved by either one model or multiple models. "]

            prompt += [
                "Your answer should be a python list and only a python list. Each entry of the list is a dictionary "
                "that describes a choice of a single model or a combination of models. "
                f"For a choice of a single model, the dictionary of a single key with the format of {format_modeling_plans_single_model_entry}. "
                f"For a choice of a combination of models, the dictionary should be a dictionary of multiple keys with the format of {format_modeling_plans_multi_models_entry}. "
                f"Examples of role_of_model_N_in_the_combination include 'backbone', 'head', 'encoder', 'decoder', 'generator', 'discriminator', and 'framework'. "
                f"The list can consist of purely choices of single model, purely choices of combinations of models, "
                f"or choices of both single model and combinations of models. "
                f"You should suggest {self.plan_configurations['maximum_models']} choices and the "
                f"python list should have {self.plan_configurations['maximum_models']} keys. "]

        prompt += ["Here is the Python list:"]

        self.prompt_message += self.prompt_formatting_GPT(prompt)


    def load_generated_data_preparation_plans(self , generated_plans) :
        self.generated_data_preparation_plans = f"The user already has a list of data preparation approaches: {generated_plans}. " \
                                                f"For any approach (dictionary) in the list, at least one step of your suggestion (" \
                                                f"one value of the dictionary in your answer) must be different from the approach (dictionary)."

    def data_preparation_plans_prompt(self) :

        prompt=[self.task_description_prompt]
        prompt += [self.basic_task_types_1_and_2_prompt]
        prompt += [self.output_types_prompt]

        if self.connector_plan[0] == "traditional algorithms" :

            prompt +=["Your task is to suggest some data preparation steps for the machine learning task."]

            prompt +=[
                "First, suggest a method to load the data. Then, suggest a method for data cleaning. Then, suggest a "
                "method for encoding. Then, suggest a method for data preprocessing. Then, suggest a method for "
                "feature engineering. For each step, the method can be from the following libraries: Scikit-learn, "
                "Pandas, NumPy, or SciPy"]

            data_preparation_steps_tabular = ["data loading" , "data cleaning" , "encoding" , "data preprocessing" ,
                                              "feature engineering"]
            sample_data_preparation_plans_tabular = {
                "data loading" : "pandas.read_csv" ,
                "data cleaning" : "pandas.DataFrame.dropna" ,
                "encoding" : "none" ,
                "data preprocessing" : "sklearn.preprocessing.StandardScaler" ,
                "feature engineering" : "sklearn.decomposition.KernelPCA" ,
                }

            prompt +=[f"Your answer should be a python dictionary. The keys in the dictionary are {data_preparation_steps_tabular}. "
                      f"The value of each key is a string that describes the specific method for the key. "
                      f"The value of a key can be 'none' if your suggestion is to skip the step. "
                      f"An example dictionary can be : {sample_data_preparation_plans_tabular}."
                      f"{self.generated_data_preparation_plans}"]

        if self.connector_plan[0] == "natural language processing" or self.connector_plan[0] == "computer vision" :

            prompt += [self.specific_task_types_prompt]
            prompt += [self.data_format_prompt]

            prompt += ["Your task is to suggest some data preparation steps for the machine learning task."]

            data_preparation_steps_NLP = ["data loading" , "text cleaning" , "data augmentation" ,
                                          "task-specific feature engineering"]
            data_preparation_steps_CV = ["resizing" , "data augmentation" , "normalization"]

            sample_data_preparation_plans_NLP = {
                "data loading" : "load the raw data using Pandas" ,
                "text cleaning" : "remove unnecessary characters, such as HTML tags, URLs and special characters" ,
                "data augmentation" : "use back translation to generate more data" ,
                "task-specific feature engineering" : "none" ,
                }
            sample_data_preparation_plans_CV = {
                "resizing" : "torchvision.transforms.Resize" ,
                "data augmentation" : "torchvision.transforms.RandomHorizontalFlip" ,
                "normalization" : "none" ,
                }

            if self.connector_plan[0] == "computer vision" :

                prompt +=["First, suggest a method for resizing. Then, suggest a method for data augmentation. Then, suggest a method for normalization. "
                            "For each step, the method can be from the following libraries: torch, torchvision, OpenCV, Pillow, NumPy, Pandas, Scipy, Scikit-image, Scikit-learn, or Albumentations."]

                prompt +=[f"Your answer should be a python dictionary. The keys in the dictionary are {data_preparation_steps_CV}. "
                            f"The value of each key is a string that describes the specific method for the key. "
                            f"The value of a key can be 'none' if your suggestion is to skip the step. "
                            f"An example dictionary can be : {sample_data_preparation_plans_CV}."
                            f"{self.generated_data_preparation_plans}"]

            if self.connector_plan[0] == "natural language processing" :

                prompt +=["First, suggest a method to load the data. Then, suggest a method for text cleaning. Then, "
                          "suggest a method for data augmentation. Then, suggest a method for task-specific feature "
                          "engineering. For each step, the method can be from the following libraries: transformers, "
                          "nltk, sentencepiece, PyTorch, Pandas, Numpy, scikit-learn, or SciPy."]

                prompt += ["Your answer should not include anything about tokenization, handling special tokens,"
                            "padding and truncation, attention masks, lower casing/lower case/case conversion, segment IDs, or handling"
                            "out-of-vocabulary words."]

                prompt += [f"Your answer should be a python dictionary. The keys in the dictionary are {data_preparation_steps_NLP}. "
                            f"The value of each key is a string that describes the specific method for the key. "
                            f"The value of a key can be 'none' if your suggestion is to skip the step. "
                            f"An example dictionary can be : {sample_data_preparation_plans_NLP}."
                            f"{self.generated_data_preparation_plans}"]

        prompt += ["Here is the Python dictionary:"]

        self.prompt_message += self.prompt_formatting_GPT(prompt)

    def connector_isomorphic_dimensions_prompt(self , connector_choice) :
        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]
        self.connector_plans_answers_prompt(fixed_variable_info=True)


        prompt = [self.task_description_prompt]
        prompt += [self.basic_task_types_1_and_2_prompt]
        prompt += [self.output_types_prompt]
        prompt += [self.specific_task_types_prompt]

        prompt += ["In this machine learning task, the user uses the following format of input and output data:"
                   f"\n{self.connector_plan[4]}\n\nHere is an explanation of the format:"]

        prompt += [self.data_format_prompt]

        variable_dimensions = { }
        for I_O in self.connector_plan[4] :
            for tensor_idx , tensor in enumerate(self.connector_plan[4][I_O]) :
                for dimension_idx , dimension in enumerate(tensor['shape'][1 :]) :
                    if tensor[dimension]['fixed_or_variable'] == "variable" :
                        variable_dimensions[I_O , tensor_idx + 1 , dimension_idx + 2] = tensor[dimension][
                            "variable_name"]

        variable_dimensions_string = ""
        for idx , n in enumerate(variable_dimensions) :
            variable_dimensions_string += f"The {prompter.int_to_ordinal(n[2])} dimension of the {prompter.int_to_ordinal(n[1])} tensor in the {n[0]} data : {variable_dimensions[n]}. "

        prompt += ["Your goal is to identify any groups of dimensions that have variable sizes and should have equal size "
                     "over the settings and parameters of data preparation process, among the following dimensions: "
                     f"{variable_dimensions_string}"]

        prompt += ["Your answer should be in the format of a Python list. Each element of the Python list is a dictionary "
                     "that represents a group of dimensions that should have equal sizes. The key of each dictionary is a "
                     "common name (as the new name for each dimensions in the group, in singular form, without spaces) for the group of "
                     "dimensions. The value of each dictionary is a list of strings. Each string is the name of a dimension in "
                     "the group. The dimensions in each list can be from the same type of tensor or different types of tensor. "
                     "Each list has two or more elements. If there is no group of dimensions that have variable sizes and "
                     "should have equal sizes, your answer is simply [{}]. If there is only one such group of dimensions, "
                     "the Python list in your answer only has 1 dictionary.  If there are multiple such groups of dimension, "
                     "the Python list in your answer has multiple dictionaries. Your answer should not simply put all "
                     "dimensions in one group. You must analyzie whether there are some dimensions that should have equal size."]

        prompt += ["Here is the Python list:"]

        self.prompt_message += self.prompt_formatting_GPT(prompt)

    def connector_hyperparameter_extraction(self , connector_choice) :
        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]
        self.connector_plans_answers_prompt(fixed_variable_info=True)

        prompt = [self.task_description_prompt]
        prompt += [self.basic_task_types_1_and_2_prompt]
        prompt += [self.output_types_prompt]
        prompt += [self.specific_task_types_prompt]

        # self.prompt_message += self.prompt_data_format
        # marker: delete this part?

        if self.connector_plan[2] == "integer representation of class labels" :
            sample_connector_hyperparameter_extraction = "{'input':list_input,'output':list_output,'num_classes':integer}"
        else :
            sample_connector_hyperparameter_extraction = "{'input':list_input,'output':list_output}"

        sample_connector_hyperparameter_extraction_list_single = "[{'name_of_the_first_dimension_in_the_tensor':list_for_the_dimension " \
                                                                 "'name_of_the_second_dimension_in_the_tensor':list_for_the_dimension, ...}]"
        sample_connector_hyperparameter_extraction_list_multiple = "[{'name_of_the_first_dimension_in_the_first_type_of_tensor':'list_for_the_dimension', " \
                                                                   "'name_of_the_second_dimension_in_the_first_type_of_tensor':'list_for_the_dimension', ...}," \
                                                                   "{'name_of_the_first_dimension_in_the_second_type_of_tensor':'list_for_the_dimension', " \
                                                                   "'name_of_the_second_dimension_in_the_second_type_of_tensor':'list_for_the_dimension', ...}...]"

        value_list_variable = "[minimum suitable value, maximum suitable value], e.g., [128,512]"
        value_list_fixed = "['a single suitable value'], e.g. [4]"

        # marker: tests the code for tasks with integer representation of class labels

        prompt += ["Your task if to decide the suitable values of the size of each dimension of each tensor in the input and output data for hyperparameter optimization.\n"
                    "In the hyperparameter optimization, for the dimensions with variable sizes, the values of the sizes are the decision variables while the "
                    "objective is the prediction performance of the machine learning program, you should determine the minimum and maximum suitable values for the sizes of the dimensions."
                    "\nFor dimensions with fixed sizes, the value for the size of the dimension remains a constant during the hyperparameter optimization, you should decide a single suitable "
                    "value for the size of the dimension."]

        prompt += ["Your answer must be in the following format:\n" + self.hyperparameter_explanation_base_prompt()]

        if self.connector_plan[2] == "integer representation of class labels" :
            prompt += ["Since the task is a classification task, the dictionary should have a key named 'num_classes'. "
                        "The value of the key 'num_classes' is a Python integer which represents the number of classes in the machine learning task."
                        "You must carefully decide this number by reading the provided description of the task from the user."]

        prompt += ["Here is the Python dictionary:"]
        self.prompt_message += self.prompt_formatting_GPT(prompt)


    def simulated_data_analysis_prompt(self , connector_choice) :
        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]
        self.connector_plans_answers_prompt(per_tensor_explanation=True)

        dtype_list = ["torch.float32" , "torch.float64" , "torch.float16" , "torch.complex32" , "torch.complex64" ,
                      "torch.complex128" , "torch.uint8" , "torch.int8" , "torch.int16" , "torch.int32" ,
                      "torch.int64" ,
                      "torch.bool"]

        data_preparation_step = {
            "natural language processing" : "tokenization" ,
            "computer vision" : "image preprocessing"
            }

        if self.connector_plan[2] == "integer representation of class labels" :

            prompt=["There is a task is to generate simulated input data, simulated ground truth output data, "
                    "and simulated predicted output data for a machine learning task. "]

            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]
            prompt += [self.specific_task_types_prompt]
            prompt += [self.data_format_prompt]


            self.integer_classification_output_prompt = \
                "Since the output type of the machine learning task is integer representation of class labels, the predicted "\
                 "output data has a different format from the ground truth output data. "\
                 "The predicted output data are logits representing the probability of each class. The ground "\
                 "truth output data are integers representing the class. Therefore, the tensor for labels in the "\
                 "predicted output data should include an additional dimension num_classes representing the number of "\
                 f"classes. The number of classes in this task is {self.connector_plan[6]['num_classes']}."


            prompt += [self.integer_classification_output_prompt]

            prompt +=[
                f"The simulated input data and ground truth output data need to be compatible with the "
                f"torch.utils.data.dataloader.DataLoader object. "
                f"Analyze the requirements of input and output data after data processing "
                f"in the machine learning task.\n"
                f"You answer should include:\n1. For each type of tensor in input data, predicted output data, and ground truth output data, "
                f"determine the data type (dtype) of the type of tensor after {data_preparation_step[self.connector_plan[0]]} out of {dtype_list}\n2. For each "
                f"of tensor, determine any types of unary constraint for the values of any "
                f"element in the tensors after {data_preparation_step[self.connector_plan[0]]}\n3. For each type of tensor, determine any "
                f"types of higher-order constraints (relational constraints among the "
                f"values of two or more elements) among the values of any set of elements "
                f"in the tensors after {data_preparation_step[self.connector_plan[0]]} "
                f"(including any set of elements in a single type of tensor and any set of elements across different types of tensors)."
                f" Your answer should only include the analysis and should "
                f"not include any code."]

        else :

            prompt = ["There is a task is to generate simulated input and output data for a machine learning task."]

            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]
            prompt += [self.specific_task_types_prompt]
            prompt += [self.data_format_prompt]

            prompt += [
                f"The input and output data need to be compatible with the "
                f"torch.utils.data.dataloader.DataLoader object. "
                f"Analyze the requirements of input and output data after data processing "
                f"in the machine learning task.\n"
                f"You answer should include:\n1. For each type of tensor in input data or output data, "
                f"determine the data type (dtype) of the type of tensor after {data_preparation_step[self.connector_plan[0]]} out of {dtype_list}\n2. For each "
                f"of tensor, determine any types of unary constraint for the values of any "
                f"element in the tensors after {data_preparation_step[self.connector_plan[0]]}\n3. For each type of tensor, determine any "
                f"types of higher-order constraints (relational constraints among the "
                f"values of two or more elements) among the values of any set of elements "
                f"in the tensors after {data_preparation_step[self.connector_plan[0]]} "
                f"(including any set of elements in a single type of tensor and any set of elements across different types of tensors)."
                f" Your answer should only include the analysis and should"
                f"not include any code."]

        prompt += ["Here is the answer:"]
        self.prompt_message += self.prompt_formatting_GPT(prompt)

    def simulated_data_module_prompt(self , analysis_answer , connector_choice , fixed_variable_detail) :


        self.connector_plans_answers_prompt(fixed_variable_info=True , fixed_variable_detail=fixed_variable_detail)

        if self.connector_plan[2] == "integer representation of class labels" :

            prompt =["There is a task is to generate simulated input data, simulated ground truth output data, "
                    "and simulated predicted output data for a machine learning task."]

        else :
            prompt = ["There is a task is to generate simulated input and output data for a machine learning task."]

        prompt += [self.basic_task_types_1_and_2_prompt]
        prompt += [self.output_types_prompt]
        prompt += [self.specific_task_types_prompt]

        total_count = len(self.connector_plan[4]['input']) + len(self.connector_plan[4]['output'])

        if self.connector_plan[2] == "integer representation of class labels" :

            prompt += ["The input and ground truth output data need to be compatible with the "
                          "torch.utils.data.dataloader.DataLoader object. Generate the Python code "
                          "for generating the simulated input data, simulated ground truth output data, "
                          "and simulated predicted output data. The code should have the "
                          "the following features: (1): there is a function named 'generate_data' in "
                          f"the code. (2): The function 'generate_data' should return {total_count + 1} objects. Each object is a "
                          f"tensor in the input data, ground truth output data or predicted output data. The format of each tensor is: "
                          ]

        else :

            prompt += ["The input and ground truth output data need to be compatible with the "
                            "torch.utils.data.dataloader.DataLoader object. Generate the Python code "
                            "for generating the simulated input and output data. The code should have the "
                            "the following features: (1): there is a function named 'generate_data' in "
                            f"the code. (2): The function 'generate_data' should return {total_count} objects. Each object is a "
                            f"tensor in the input data or output data. The format of each tensor is: "
                            ]


        prompt +=[self.data_format_prompt]


        if self.connector_plan[2] == "integer representation of class labels" :

            prompt += [self.integer_classification_output_prompt]

        self.connector_specific_arguments_formation(connector_choice)

        if self.connector_plan[2] == "integer representation of class labels" :

            prompt += [f"(3): The arguments of the function 'generate_data' should include the following lists of "
                            f"arguments: {self.connector_specific_arguments_formation_string} (4): the function 'generate_data' should "
                            f"consider the shapes of the tensors in the input data, ground truth output data, and predicted output data, "
                            f"especially the dimensions with fixed sizes: "
                            ]

        else :

            prompt += [f"(3): The arguments of the function 'generate_data' should include the following lists of "
                            f"arguments: {self.connector_specific_arguments_formation_string} (4): the function 'generate_data' should "
                            f"consider the shapes of the tensors in the input and output data, especially the dimensions with fixed sizes: "
                            ]

        prompt += [self.data_format_prompt]



        if self.connector_plan[2] == "integer representation of class labels" :

            prompt += [self.integer_classification_output_prompt]


        prompt += ["In addition, the code should consider the dtype unary constraints,"
                    "and higher-order constraints (relational constraints among the values of two or more elements) in "
                    f"the following description:\n\n{analysis_answer}"]

        prompt += ["Here is the Python code:"]
        self.prompt_message = self.prompt_formatting_GPT(prompt)


    def post_processing_module_prompt(self , connector_choice , fixed_variable_detail=None) :

        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]

        task_type = self.connector_plan[0]

        if task_type == "traditional algorithms" :

            self.connector_plans_answers_prompt()


            prompt = ["There is a machine learning task, your goal is to generate the code for the evaluation metrics "
                       f"and for processing the output data for the evaluation metrics.\n\n{prompter.common_basic_prompt_third_person}"]


            prompt += [self.task_description_prompt]

            prompt += ["You need to pay special attention to the evaluation metrics in the above information"]

            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]

            prompt += ["You need to pay special attention to the type of the machine learning task and the ground truth output type of the above information"]

            prompt += ["The raw output consists of the predicted output and the ground truth output. For either "
                        "predicted output or ground truth output, there is 1 NumPy array. "
                        "More specifically, for either predicted output or ground truth output: "
                        f"{self.data_format_output_prompt}"]


            prompt += ["You should pay special attention to the shape of the arrays the output data. "
                        "The generated Python code should meet the following requirements: (1): there is a function named "
                        "'generate_evaluation' in the code. (2) The function 'generate_evaluation' should always include and only "
                        f"include the following arguments: ground_truth_output and predicted_output. The ground_truth_output must"
                        f"be the first argument and the predicted_output must be the second argument. (3): the function 'generate_evaluation' "
                        f"should convert the raw output (in NumPy array format) into the format acceptable by the evaluation "
                        f"metrics. (4): the function 'generate_evaluation' should compare the predicted output(s) and ground truth "
                        f"output(s) based on the specified evaluation metrics in this machine learning task. (5): the function "
                        f"'generate_evaluation' should return one object, which is the numerical score based on the evaluation "
                        f"metrics in the format of Python built-in float. (6): the generated code should not contain any example usage. The generated "
                        f"code should only contain code about the function 'generate_evaluation'."]

        if task_type == "natural language processing" or task_type == "computer vision" :

            self.connector_plans_answers_prompt(fixed_variable_info=True , fixed_variable_detail=fixed_variable_detail ,
                                                per_tensor_explanation=True)

            prompt = ["There is a machine learning task, your goal is to generate the code for the evaluation metrics "
                          f"and for processing the output data for the evaluation metrics.\n\n{prompter.common_basic_prompt_third_person}"]

            prompt += [self.task_description_prompt]

            prompt += ["You need to pay special attention to the evaluation metrics in the above information"]

            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]

            prompt += ["You need to pay special attention to the type of the machine learning task and the output type of the above information"]


            prompt += [self.specific_task_types_prompt]


            if self.connector_plan[2] == "integer representation of class labels" :

                self.integer_classification_output_prompt = \
                    "Since the output type of the machine learning task is integer representation of class labels, the predicted " \
                    "output data has a different format from the ground truth output data. " \
                    "The predicted output data are logits representing the probability of each class. The ground " \
                    "truth output data are integers representing the class. Therefore, the tensor for labels in the " \
                    "predicted output data should include an additional dimension num_classes representing the number of " \
                    f"classes. The number of classes in this task is {self.connector_plan[6]['num_classes']}."

                prompt +=["Since the output type of the machine learning task is integer representation of class labels, the predicted "\
                            "output data has a different format from the ground truth output data. "\
                            "The predicted output data are logits representing the probability of each class. The ground "\
                            "truth output data are integers representing the class. Therefore, the tensor for labels in the "\
                            "predicted output data should include an additional dimension num_classes representing the number of "\
                            f"classes. The number of classes in this task is {self.connector_plan[6]['num_classes']}."]


                prompt +=["The raw output consists of the predicted output and the ground truth output. For either "
                          "predicted output or ground truth output, there is 1 NumPy array. "
                          "More specifically, for either predicted output or ground truth output: "
                          f"{self.data_format_output_prompt}"]

                prompt +=[self.integer_classification_output_prompt]

            else :

                prompt += ["The raw output consists of the predicted output and the ground truth output. For either "
                            "predicted output or ground truth output, there are 1 or more NumPy arrays, each array represents an output for a "
                            "batch of datapoints. More specifically, for either predicted output or ground truth output: "
                            f"{self.data_format_output_prompt}"]


            outputs_argument_string = self.output_arguments_formation()

            prompt += ["You should pay special attention to the shape of each type of tensor in the output data. "
                        "The generated Python code should meet the following requirements: (1): there is a function named "
                        "'generate_evaluation' in the code. (2) The function 'generate_evaluation' should always include and only "
                        f"include the following arguments: {outputs_argument_string}(3): the function 'generate_evaluation' "
                        "should convert the raw output (in NumPy array format) into the format acceptable by the evaluation "
                        "metrics. (4): the function 'generate_evaluation' should compare the predicted output(s) and actual "
                        "output(s) based on the specified evaluation metrics in this machine learning task. (5): the function "
                        "'generate_evaluation' should return one object, which is the numerical score based on the evaluation "
                        "metrics in the format of Python built-in float. (6): the generated code should not contain any example usage. The generated "
                        "code should only contain code about the function 'generate_evaluation'."]

        prompt += ["Here is the Python code:"]

        self.prompt_message = self.prompt_formatting_GPT(prompt)

    def modeling_module_prompt(self , connector_choice , version_choice,fixed_variable_detail=None) :
        modeling_plan = self.generated_connector_plan[connector_choice]["modeling"][version_choice]
        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]

        task_type = self.connector_plan[0]

        if task_type == "traditional algorithms" :
            self.connector_plans_answers_prompt()

            prompt = [self.task_description_prompt]
            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]

            modeling_choice = list(modeling_plan.keys())[0]
            self.modeling_string = f"The chosen model for the machine learning task is {modeling_choice}. "

            prompt+=["Your task is to generate the Python code for the modeling part of the machine learning task. "
                     f"{self.modeling_string}"]


            prompt+=["The generated python code should meet the following requirements:\n(1): "
                     "There is a function named 'generate_model' in the code.\n(2): The function 'generate_model' must utilize"
                     "an model to process data.\n(3): The model should be from the following libraries: "
                     " Scikit-learn, XGBoost, CatBoost, and LightGBM.\n"
                     "(4): The arguments of the function 'generate_model' should include the following arguments: "
                     "an argument named 'X_train', which represents the training input data, "
                     "an argument named 'y_train', which represents the training output data, and "
                     "an argument named 'X_test', which represents the testing input data.\n"
                     "(5): the returned values of the function must be the predicted testing output data.\n"
                     "(6): the for both the training data and testing data, the format of the data is as follows: "]


            prompt += [self.data_format_prompt]


            prompt += ["(7): The generated code is forbidden to include any example usage. Just generate the function for "
                       "the modeling step.\n(8): Besides X_train, y_train, and X_test, all other arguments of the function "
                       "'generate_model' should always be keyword (default) arguments.\n(9): The generated code should only be about creating the "
                       "function 'generate_model'. The generated code should not include any code about dataset creation, "
                       " or evaluation."]

        elif task_type == "natural language processing" or task_type == "computer vision":

            self.connector_plans_answers_prompt(fixed_variable_info=True , fixed_variable_detail=fixed_variable_detail ,
                                                per_tensor_explanation=True)

            prompt = [self.task_description_prompt]
            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]
            prompt += [self.specific_task_types_prompt]

            modeling_choice = list(modeling_plan.keys())[0]
            model_entries = len(list(modeling_plan.values()))

            if model_entries == 1 :
                self.modeling_string = f"The chosen model for the machine learning task is {modeling_choice}. "
            if model_entries > 1 :
                model_explanations = list(list(modeling_plan.values())[0].values())
                self.modeling_string = f"The chosen models for the machine learning task are {modeling_choice}. "
                sub_models = list(modeling_plan[modeling_choice].keys())
                for idx , n in enumerate(sub_models[1 :]) :
                    self.modeling_string += f"The role of the model {n} is {model_explanations[idx + 1]}. "
                self.modeling_string += f"The reason for choosing the combination of models is {model_explanations[0]} "

            prompt += ["Your task is to generate the Python code for the modeling part of the machine learning task. "
                          f"{self.modeling_string} You should always use pretrained model(s). "]

            prompt += ["The generated python code should meet the following requirements:\n(1): "
                        "There is a function named 'generate_model' in the code.\n(2): The returned values of the function must "
                        "be a torch.nn.Module object.\n(3): The torch.nn.Module object should be a have "
                        "a forward method that takes all types of tensor in input data as arguments. The value of each argument is "
                        "a single-type tensor for a batch of data points. The forward method"
                        "should not have any arguments that do not have a corresponding type of tensor in the input data. "
                        "The forward method should return all types of tensors in output data. The value of each "
                        "returned object is a single-type tensor for a batch of data points. The returned objects of the "
                        "forward method should not include any objects that do not have a corresponding type of tensor in the output data.\n"
                        "(4): The input data and (ground-truth) output data are from"
                        " a torch.utils.data.dataloader.DataLoader object. The input and output data"
                        " from the torch.utils.data.dataloader.DataLoader object are already processed by "
                        "data preparation steps (including tokenization for natural language processing tasks)."
                        " The content of the processed input and output data is as follows:"]



            if self.connector_plan[2] == "integer representation of class labels" :
                prompt += [self.data_format_prompt]
                prompt += [self.integer_classification_output_prompt]
            else :
                prompt += [self.data_format_prompt]

            self.connector_specific_arguments_formation(connector_choice)

            prompt += ["(5): The __init__ method might need to include additional layers to transform the input tensors or output tensors, "
                        "or the forward method might need additional code that accommodates the shapes of processed input data or "
                        "output data.\n(6): To take the shapes of the tensors in processed input or output data into account, "
                        "always include the following arguments of the function 'generate_model (even if the argument is not used in the generated code):"
                        f" {self.connector_specific_arguments_formation_string}"]

            prompt += ["(7): The generated code is forbidden to include any example usage. Just generate the function for "
                        "the modeling step.\n(8): All arguments of the function 'generate_model' should always and "
                        "absolutely be keyword (default) arguments.\n(9): The function 'generate_model' is forbidden to "
                        "include any positional (non-default) arguments.\n(10): The generated code should only be about creating the "
                        "function 'generate_model'. The generated code should not include any code about dataset creation, "
                        "optimizer, loss function, training, validation, or testing."]


        prompt +=[self.feed_backs_prompt]


        if self.feed_backs_prompt == "":
            prompt += ["Here is the Python code:"]

        else :
            prompt +=["Here is the modified Python code:"]

        self.prompt_message += self.prompt_formatting_GPT(prompt)


    def data_preparation_module_prompt(self , connector_choice , version_choice , fixed_variable_detail=None ,
                                       train_val_ratio=None) :

        data_preparation_plan = self.generated_connector_plan[connector_choice]["data preparation"][
            version_choice]
        self.connector_plan = self.generated_connector_plan[connector_choice]["connector"]

        task_type = self.connector_plan[0]

        self.data_preparation_string = f"data loading: load the raw data from the files. data splitting: split the " \
                                       f"data into training data and validation data by allocating {train_val_ratio * 100}% " \
                                       f"of datapoints as validation data (before augmentation for deep learning tasks). "

        data_preparation_choice_list = list(data_preparation_plan.keys())
        data_preparation_entry_list = list(data_preparation_plan.values())
        for idx , n in enumerate(data_preparation_entry_list) :
            if n != "none" :
                self.data_preparation_string += f"{data_preparation_choice_list[idx]}: {n}. "

        if task_type == "traditional algorithms" :

            self.connector_plans_answers_prompt()

            prompt = [self.task_description_prompt]
            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]

            prompt +=["Your task is to generate the Python code for the data preparation module of the machine learning task. "]


            prompt += ["The generated python code should meet the following requirements:\n(1): There is a function named "
                        "'process_data' in the code.\n(2): The data preparation module should contain the following "
                        f"steps in a suitable order: {self.data_preparation_string}\n(3): The returned objects of the function 'process_data' must be ("
                        f"X_train,  X_test, y_train, y_test). X_train is an numpy array that represents the training input data, "
                        f"X_test is an numpy array that represents the testing input data, "
                        f"y_train is an numpy array that represents the training output data, and "
                        f"y_test is an numpy array that represents the testing output data.\n"
                        f"(4): For both the training data and testing data, the format of the data is as follows:"]

            prompt +=[self.data_format_prompt]

            prompt += ["(5): The generated code is forbidden to include any example usage. Just generate the function for "
                        "data preparation.\n(6): All arguments of the function 'process_data' should always and "
                        "absolutely be keyword (default) arguments.\n(7): The function 'process_data' is forbidden to "
                        "include any positional (non-default) arguments. The generated code is forbidden to leave the task of "
                        "specifying any arguments to the user.\n(8): The generated code should only be about creating the "
                        "function 'process_data'. The generated code should not include any code about modeling, "
                        "training, validation, or testing.\n(9): You are forbidden to include any"
                        "positional (non-default) arguments about the path of the files. You must specify the path in the "
                        "generated code.\n(10): You are forbidden to leave any part of the code, such as any function, "
                        "unimplemented."]

        elif task_type == "natural language processing" or task_type == "computer vision" :

            self.connector_plans_answers_prompt(fixed_variable_info=True , fixed_variable_detail=fixed_variable_detail ,
                                                per_tensor_explanation=True)

            prompt=[self.task_description_prompt]
            prompt += [self.basic_task_types_1_and_2_prompt]
            prompt += [self.output_types_prompt]
            prompt += [self.specific_task_types_prompt]

            if self.connector_plan[0] == "natural language processing" :

                if self.connector_plan[0] == "natural language processing" :
                    self.data_preparation_string += "tokenization: tokenizer = transformers.AutoTokenizer.from_pretrained('bert-base-uncased') "


            prompt += ["Your task is to generate the Python code for the data preparation module of the machine learning task. "]

            prompt += ["The generated python code should meet the following requirements:\n(1): There is a function named "
                        "'generate_dataloader' in the code.\n(2): The data preparation module should load the files. All "
                        "available files are listed in the user's description of the available files. You "
                        "are forbidden to assume the availability of any files not listed in the user's description.\n"
                        "(3): The data preparation module should contain the following "
                        f"steps in a suitable order: {self.data_preparation_string}\n(4): The returned objects of the function must be ("
                        f"train_loader,val_loader). train_loader is a torch.utils.data.DataLoader object that generates the "
                        f"training data and val_loader is a torch.utils.data.DataLoader object that generates the validation "
                        f"data. Both torch.utils.data.DataLoader objects should be an iterable that generates batch of data "
                        f"in the format of a tuple of tensors. Each tensor in the tuple represents one type of tensor."
                        f"\n(5): The input and output data from the "
                        f"torch.utils.data.DataLoader objects must satisfy the following requirements:"]

            prompt += [self.data_format_prompt]

            self.connector_specific_arguments_formation(connector_choice)

            prompt += ["(6): To take the shapes of the tensors in processed input or output data into account, "
                        "always include the following arguments of the function 'generate_dataloader' (even if the argument "
                        "is not used in the generated code): "
                        f"{self.connector_specific_arguments_formation_string}"]

            prompt += ["(7): The generated code is forbidden to include any example usage. Just generate the function for "
                        "data preparation.\n(8): All arguments of the function 'generate_dataloader' should always and "
                        "absolutely be keyword (default) arguments.\n(9): The function 'generate_dataloader' is forbidden to "
                        "include any positional (non-default) arguments. The generated code is forbidden to leave the task of "
                        "specifying any arguments to the user.\n(10): The generated code should only be about creating the "
                        "function 'generate_dataloader'. The generated code should not include any code about modeling, "
                        "optimizer, loss function, training, validation, or testing.\n(11): You are forbidden to include any"
                        "positional (non-default) arguments about the path of the files. You must specify the path in the "
                        "generated code.\n(12): You are forbidden to leave any part of the code, such as any function, "
                        "unimplemented."]

        prompt+=[self.feed_backs_prompt]
        if self.feed_backs_prompt == "":
            prompt += ["Here is the Python code:"]
        else :
            prompt += ["Here is the modified Python code:"]

        self.prompt_message += self.prompt_formatting_GPT(prompt)

    def explain_prompt(self) :

        prompt=["The user is trying to write the code for a machine learning task. "
                "The code was not working. Your task is to tell the user how to modify the code."
                " Below are the previously generated code:\n\n"
                f"{self.feed_backs_string}You should analyze the previous codes, error messages, and tracebacks "
                f"and then offer an suggestion about how to change the code.\n\n"
                f"The user cannot make any changes to the files/folders/directories, create and files/folders/directories, or delete any files/folders/directories. "
                f"Your answer should not be changes to the files/folders/directories."
                f"If the code cannot work without changes to the files/folders/directories,"
                f"you should suggest the user to use a different approach or library for the code."]

        prompt+=["The user should change the code by:"]

        self.prompt_message=self.prompt_formatting_GPT(prompt)

    @staticmethod
    def list_to_and_form(input_list) :
        if not input_list :
            return ""
        elif len(input_list) == 1 :
            return input_list[0]
        elif len(input_list) == 2 :
            return f"{input_list[0]} and {input_list[1]}"
        else :
            last_element = input_list[-1]
            other_elements = ", ".join(input_list[:-1])
            return f"{other_elements}, and {last_element}"

    @staticmethod
    def int_to_ordinal(num) :
        """
        Converts an integer to its ordinal string representation.

        Args:
          num: The integer to convert.

        Returns:
          The ordinal string representation of the number.
        """
        suffixes = ["th" , "st" , "nd" , "rd" , "th" , "th" , "th" , "th" , "th" , "th"]
        if 11 <= num <= 13 :
            return f"{num}{suffixes[0]}"
        else :
            return f"{num}{suffixes[num % 10]}"

    @staticmethod
    def tensor_description_extraction(tensor , fixed_variable_info , fixed_variable_detail) :

        if fixed_variable_info :
            data_format_description = "\nThe meaning of the dimension batch_size is the size of the batch. The size of the dimension batch_size is a variable."
            for n in tensor['shape'][1 :] :
                if tensor[n]['fixed_or_variable'] == "fixed" :
                    if fixed_variable_detail != None :
                        data_format_description += f"\nThe meaning of the dimension {n} is {tensor[n]['meaning']}. The size of the dimension {n} is fixed as {fixed_variable_detail[n]}. "
                    else :
                        data_format_description += f"\nThe meaning of the dimension {n} is {tensor[n]['meaning']}. The size of the dimension {n} is fixed. "
                elif tensor[n]['fixed_or_variable'] == "variable" :
                    data_format_description += f"\nThe meaning of the dimension {n} is {tensor[n]['meaning']}. The size of the dimension {n} is a variable. "
        else :
            data_format_description = "\nThe meaning of the dimension batch_size is the size of the batch. "
            for n in tensor['shape'][1 :] :
                data_format_description += f"\nThe meaning of the dimension {n} is {tensor[n]['meaning']}. "
        return data_format_description

    @staticmethod
    def multi_tensor_description_extraction(tensor_list , fixed_variable_info , fixed_variable_detail ,
                                            per_tensor_explanation) :
        data_format_description = ""
        for idx , n in enumerate(tensor_list) :

            if per_tensor_explanation :
                data_format_description += f"For the {prompter.int_to_ordinal(idx + 1)} type of tensor, " \
                                           f"this type of tensor represents {n['name']}. " \
                                           f"The shape of the tensor is {n['shape']}. {prompter.tensor_description_extraction(n , fixed_variable_info , fixed_variable_detail)} "
            else :
                data_format_description += f"For the {prompter.int_to_ordinal(idx + 1)} type of tensor, " \
                                           f"the shape of the tensor is {n['shape']}. {prompter.tensor_description_extraction(n , fixed_variable_info , fixed_variable_detail)} "

        return data_format_description


    @staticmethod
    def truncate_string(list_of_strings , max_length=7000) :
        # Check if the total length of all segments is less than or equal to max_length
        total_length = sum(len(segment) for segment in list_of_strings)
        if total_length <= max_length :
            return ''.join(list_of_strings)
        else :
            print(
                f"Warning: the total length of all messages is {total_length}, which is greater than max_length, message will be truncated")
        # Initialize an empty string to store the concatenated segments
        result = ""

        # Iterate over the segments in reverse order
        for segment in reversed(list_of_strings) :
            if len(result) + len(segment) <= max_length :
                result = segment + result
            else :
                break

        # Return the resultant string
        return result

    def connector_plans_answers_prompt(self , fixed_variable_info=False , fixed_variable_detail=None ,
                                       per_tensor_explanation=False) :

        self.basic_task_types_1_and_2_prompt = \
            f"The machine learning task is about {self.connector_plan[0]} and {self.connector_plan[1]}."
        self.output_types_prompt = \
            f"The ground truth output type of the machine learning task is {self.connector_plan[2]}."

        if self.connector_plan[0] == "traditional algorithms" :

            self.data_format_input_prompt = \
                "The input data is a 2D NumPy array. The shape of the input data is (n_samples, n_features)."

            if self.connector_plan[1] == "single-output regression" :

                self.data_format_output_prompt = \
                    "The output data is a 1D NumPy array of floats. The shape of the output data is (n_samples,)."

            elif self.connector_plan[1] == "multi-output regression" :

                self.data_format_output_prompt = \
                    "The output data is a 2D NumPy array of floats. The shape of the output data is (n_samples, n_outputs)."

            elif self.connector_plan[1] == "binary classification" :
                if self.connector_plan[2] == "integer representation of class labels" :

                    self.data_format_output_prompt = \
                        "The output data is a 1D NumPy array of integer. The shape of the output data is (n_samples,)."

                elif self.connector_plan[2] == "probability representation of class labels" :

                    self.data_format_output_prompt = \
                        "The output data is a 2D NumPy array of floats. The shape of the output data is (n_samples, 2)."

            elif self.connector_plan[1] == "multi-class classification" :
                if self.connector_plan[2] == "integer representation of class labels" :

                    self.data_format_output_prompt = \
                    "The output data is a 1D NumPy array of integer. The shape of the output data is (n_samples,)."

                elif self.connector_plan[2] == "probability representation of class labels" :

                    self.data_format_output_prompt = \
                        "The output data is a 2D NumPy array of floats. The shape of the output data is (n_samples, n_classes)."

            elif self.connector_plan[1] == "multi-label classification" :
                if self.connector_plan[2] == "integer representation of class labels" :


                    self.data_format_output_prompt = \
                        "The output data is a 2D NumPy array of integers. The shape of the output data is (n_samples, n_classes)."

                elif self.connector_plan[2] == "probability representation of class labels" :

                    self.data_format_output_prompt = \
                        "The output data is a 2D NumPy array of floats. The shape of the output data is (n_samples, n_classes)."


        if self.connector_plan[0] == "computer vision" or self.connector_plan[0] == "natural language processing" :

            self.specific_task_types_prompt = "The machine learning task is about " + self.connector_plan[3] + "."

            if self.connector_plan[0] == "natural language processing" or self.connector_plan[0] == "computer vision":

                input_tensor_list = self.connector_plan[4]['input']
                output_tensor_list = self.connector_plan[4]['output']

                if len(input_tensor_list) == 1 :
                    tensor = input_tensor_list[0]

                    if per_tensor_explanation :

                        self.data_format_input_prompt = \
                            "There is only 1 type of tensor in the input data. This type of tensor represents " \
                            f"{tensor['name']}. The shape of the input tensor is {tensor['shape']}. " \
                            f"{prompter.tensor_description_extraction(tensor , fixed_variable_info , fixed_variable_detail)} "

                    else :

                        self.data_format_input_prompt = \
                            "There is only 1 type of tensor in the input data. The shape of the input tensor is " \
                            f"{tensor['shape']}. {prompter.tensor_description_extraction(tensor , fixed_variable_info , fixed_variable_detail)}"


                if len(output_tensor_list) == 1 :
                    tensor = output_tensor_list[0]

                    if per_tensor_explanation :

                        self.data_format_output_prompt = \
                            "There is only 1 type of tensor in the output data. This type of tensor represents " \
                            f"{tensor['name']}. The shape of the output tensor is {tensor['shape']}. " \
                            f"{prompter.tensor_description_extraction(tensor , fixed_variable_info , fixed_variable_detail)} "

                    else :

                        self.data_format_output_prompt = \
                            "There is only 1 type of tensor in the output data. The shape of the output tensor is " \
                            f"{tensor['shape']}. {prompter.tensor_description_extraction(tensor , fixed_variable_info , fixed_variable_detail)}"

                if len(input_tensor_list) >= 2 :

                    self.data_format_input_prompt =\
                        f"There are {len(input_tensor_list)} types of tensor in the input data. " \
                        f"{prompter.multi_tensor_description_extraction(input_tensor_list , fixed_variable_info , fixed_variable_detail , per_tensor_explanation)}"

                if len(output_tensor_list) >= 2 :

                    self.data_format_output_prompt = \
                        f"There are {len(output_tensor_list)} types of tensor in the output data. " \
                        f"{prompter.multi_tensor_description_extraction(output_tensor_list , fixed_variable_info , fixed_variable_detail , per_tensor_explanation)}"

        self.data_format_prompt = self.data_format_input_prompt+"\n"+self.data_format_output_prompt


    def hyperparameter_explanation_base_prompt(self) :
        tensor_list = self.connector_plan[4]
        list_input = "["
        list_output = "["
        for I_O in tensor_list :
            for tensor in tensor_list[I_O] :
                tensor_name = tensor["name"]
                dimensions = tensor['shape']
                dimension_prompt = ""
                dimension_prompt += "{"
                for dimension in dimensions :
                    fixed_or_variable = tensor[dimension]["fixed_or_variable"]
                    if fixed_or_variable == "fixed" :
                        dimension_prompt += f"'{dimension}': [an integer or float in the list that represents a single suitable value for the size of the dimension {dimension} of the tensor {tensor_name}],"
                    if fixed_or_variable == "variable" :
                        dimension_prompt += f"'{dimension}': [an integer or float in the list that represents the minimum suitable value of the size of the dimension {dimension} of the tensor {tensor_name}, an integer or float in the list that represents the maximum suitable value of the size of the dimension {dimension} of the tensor {tensor_name}],"
                dimension_prompt += "},"
                if I_O == "input" :
                    list_input += dimension_prompt
                if I_O == "output" :
                    list_output += dimension_prompt
        list_input += "]"
        list_output += "]"
        base_prompt = f"{{'input':{list_input},'output':{list_output}}}"

        if self.connector_plan[2] == "integer representation of class labels" :
            base_prompt += "{'num_classes':an integer that represents the number of classes in the machine learning task}"

        return base_prompt

    def connector_specific_arguments_formation(self , connector_choice) :

        data_format = self.generated_connector_plan[connector_choice]["connector"][4]
        variable_list = extract_variable_name(data_format)
        self.connector_specific_arguments_formation_string = f"An argument named batch_size, the argument batch_size " \
                                                             f"is a Python integer which represents the size of the batch. "

        isomorphic_dimensions = self.generated_connector_plan[connector_choice]["connector"][5]
        conversion_values = []
        for dictionary in isomorphic_dimensions :
            for key in dictionary :
                conversion_values += dictionary[key]
        shared_dimensions = []
        for idx , variable_dimension in enumerate(variable_list) :
            I_O , tensor_idx , dimension_idx = locate_dimension_in_connector(data_format , variable_dimension)
            if variable_dimension != "batch_size" :
                if (variable_dimension in conversion_values) and (variable_dimension not in shared_dimensions) :
                    for dictionary in isomorphic_dimensions :
                        for key in dictionary :
                            if variable_dimension in dictionary[key] :
                                unified_name = key
                                shared_dimensions.extend(dictionary[key])

                    self.connector_specific_arguments_formation_string += f"An argument named {unified_name}, the argument {unified_name} " \
                                                                          f"is a Python integer which represents the size of "

                    shared_dimensions_string_list = []
                    for shared_dimension in shared_dimensions :
                        shared_dimension_I_O , shared_dimension_tensor_idx , shared_dimension_dimension_idx = locate_dimension_in_connector(
                            data_format , shared_dimension)

                        shared_dimensions_string_list.append(
                            f"the {prompter.int_to_ordinal(shared_dimension_dimension_idx + 1)} " \
                            f"dimension of the {prompter.int_to_ordinal(shared_dimension_tensor_idx + 1)} tensor in {shared_dimension_I_O} data")

                    self.connector_specific_arguments_formation_string += prompter.list_to_and_form(
                        shared_dimensions_string_list)
                    self.connector_specific_arguments_formation_string += ". "
                if variable_dimension not in conversion_values :
                    self.connector_specific_arguments_formation_string += f"An argument named {variable_dimension}, the argument {variable_dimension} " \
                                                                          f"is a Python integer which represents the size of the {prompter.int_to_ordinal(dimension_idx + 1)} " \
                                                                          f"dimension of the {prompter.int_to_ordinal(tensor_idx + 1)} tensor in {I_O} data. "

    def output_arguments_formation(self) :
        outputs_argument_string = ""
        for tensor in self.connector_plan[4]["output"] :
            outputs_argument_string += f"An argument named 'predicted_{tensor['name']}'. The value of the argument is the torch.Tensor of predicted output about {tensor['name']}. "
        for tensor in self.connector_plan[4]["output"] :
            outputs_argument_string += f"An argument named 'actual_{tensor['name']}'. The value of the argument is the torch.Tensor of actual (ground truth) output about {tensor['name']}. "

        return outputs_argument_string

    def add_feedbacks(self , target_code_history , target_error_history , target_traceback_history ,
                      explanation_history=None) :

        feed_backs_string_list = []
        for idx , code in enumerate(target_code_history) :
            feed_backs_string = ""
            feed_backs_string += f"The piece of code is:\n\n"
            feed_backs_string += f"{code}\n\n"
            feed_backs_string += f"The error message for the code is:\n\n"
            feed_backs_string += f"{target_error_history[idx]}\n\n"
            if target_traceback_history is not None :
                feed_backs_string += f"The traceback for the code is:\n\n"
                feed_backs_string += f"{target_traceback_history[idx]}\n\n"
            if (explanation_history is not None) and (explanation_history != []) and (
                    idx <= len(explanation_history) - 1) :
                feed_backs_string += f"The explanation for the error in the code is:\n\n"
                feed_backs_string += f"{explanation_history[idx]}\n\n"
            feed_backs_string_list.append(feed_backs_string)

        self.feed_backs_string = prompter.truncate_string(feed_backs_string_list)

        explanation_attention = ""
        if explanation_history is not None :
            explanation_attention = ", the explanations"

        self.feed_backs_prompt= f"In your previous responses, you generated the following incorrect codes:\n\n{self.feed_backs_string}You " \
                                f"should pay special attention to: the error messages, the tracebacks{explanation_attention}. The Python code in your answer" \
                                f" should avoid the errors in your previous response and you should provide a piece of Python code different " \
                                f"than previous ones."


        if self.feed_backs_string == "" :
            print(
                "Warning: the length of the feedback string is 0 (probably due to truncation), which means that this query is sent without feedbacks. ")
            self.feed_backs_prompt=""
            self.feed_backs_prompt_formatted = []