import pandas as pd

from copy import deepcopy


def generate_prompt_normal(dataset, x1, x2, x1p, x2p):
    
    if dataset == 'heloc':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the HELOC Dataset which uses these features: 

        MSinceMostRecentInqexcl7days: Number of months passed since the last credit inquiry on the individual.
        NumRevolvingTradesWBalance: The number of the individual's current credit accounts (e.g. credit cards) that have balances on them.
        NumTradesOpeninLast12M: The number of new credit accounts opened in the last 12 months.
        NumInqLast6M: The number of credit inquiries carried out on the individual in the last 6 months.

        The data is represented in array form like ['MSinceMostRecentInqexcl7days', 'NumRevolvingTradesWBalance', 'NumTradesOpeninLast12M', 'NumInqLast6M']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        Which of these two mutations would take more effort? You must provide an answer.
    
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """  
        
    elif dataset == 'adult':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex applying for a loan, and a mutations of it, 
        You will also be shown a datapoint representing a person Jaden applying for a loan, and a mutations of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the Adult Census Dataset which uses these features: 

        isMale: If the person is male, or female, represented as 1 or 0, respectively.
        age: The person's age, represented as a floating point number.
        native-country-United-States: If the person's birthplace is the United States, or not, represented as 1 or 0, respectively.
        marital-status-Married: If the person is married, or not,  represented as 1 or 0, respectively.
        education-num: The person's level of eduction, represented by a positive integer, where higher numbers are higher levels of education. The 16 possible integers correspond to:
            1: Preschool
            2: 1st-4th
            3: 5th-6th
            4: 7th-8th
            5: 9th
            6: 10th
            7: 11th
            8: 12th
            9: HS-grad
            10: Some-college
            11: Assoc-acdm
            12: Assoc-voc
            13: Bachelors
            14: Masters
            15: Prof-school
            16: Doctorate
        hours-per-week: The number of hours the person works per week, represented by a positive integer.
        workclass-Private: If the person works for a private company, or is self-employed, represented as 1 or 0, respectively.
        isWhite: Is the person white or not, represented as 1 or 0, respectively.

        The data is represented in array form like ['isMale', 'age', 'native-country-United-States',
           'marital-status-Married', 'education-num', 'hours-per-week', 'workclass-Private', 'isWhite']

        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""
        
        Which of these two mutations would take more effort? You must provide an answer.
    
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """     
        
    elif dataset == 'german_credit':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the German Credit Dataset which uses these features: 

        status: Status of existing checking account ['>= 200 DM / salary for at least 1 year', 'no checking account',
           '< 0 DM', '0 <= ... <= 200 DM']
        duration: The proposed duration of the loan in months, expressed as an integer.
        credit_history: The person's credit history with the options ['No credits taken/all credits paid back duly',
           'All credits at this bank paid back duly',
           'Existing credits paid back duly till now',
           'Critical account/other credits elsewhere',
           'Delay in paying off in the past']
        purpose: The purpose of the loan, with the options ['Furniture/equipment', 'Others', 'Car (Used)', 'Car (new)',
           'Retraining', 'Repairs', 'Domestic Applicances', 'Business',
           'Radio/television', 'Vacation']
        amount: The size of the loan asked for, represented as a positive integer.

        The data is represented in array form like ['status', 'duration', 'credit_history', 'purpose', 'amount']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        
        Which of these two mutations would take more effort? You must provide an answer.

        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """   
    else:
        raise TypeError('Incorrect dataset name entered')
        
    return prompt

    
    
def generate_prompt_custom(dataset, x1, x2, x1p, x2p):
    
    if dataset == 'heloc':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the HELOC Dataset which uses these features: 

        MSinceMostRecentInqexcl7days: Number of months passed since the last credit inquiry on the individual.
        NumRevolvingTradesWBalance: The number of the individual's current credit accounts (e.g. credit cards) that have balances on them.
        NumTradesOpeninLast12M: The number of new credit accounts opened in the last 12 months.
        NumInqLast6M: The number of credit inquiries carried out on the individual in the last 6 months.

        The data is represented in array form like ['MSinceMostRecentInqexcl7days', 'NumRevolvingTradesWBalance', 'NumTradesOpeninLast12M', 'NumInqLast6M']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        Which of these two mutations would take more effort? 
    
        Remeber the following information and use it in your labelling:
        
        1. The hardest features to change, in order from the hardest to easiest are 
        [MSinceMostRecentInqexcl7days, NumRevolvingTradesWBalance, NumTradesOpeninLast12M, NumInqLast6M]
        2. For the numerical features, they are all harder to increase the higher they get.
        3. If NumInqLast6M is greater than zero, then increasing 'NumTradesOpeninLast12M' becomes more difficult.
        
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """  
        
    elif dataset == 'adult':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex applying for a loan, and a mutations of it, 
        You will also be shown a datapoint representing a person Jaden applying for a loan, and a mutations of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the Adult Census Dataset which uses these features: 

        isMale: If the person is male, or female, represented as 1 or 0, respectively.
        age: The person's age, represented as a floating point number.
        native-country-United-States: If the person's birthplace is the United States, or not, represented as 1 or 0, respectively.
        marital-status-Married: If the person is married, or not,  represented as 1 or 0, respectively.
        eduction-num: The person's level of eduction, represented by a positive integer, where higher numbers are higher levels of eduction. The 16 possible integers correspond to:
            1: Preschool
            2: 1st-4th
            3: 5th-6th
            4: 7th-8th
            5: 9th
            6: 10th
            7: 11th
            8: 12th
            9: HS-grad
            10: Some-college
            11: Assoc-acdm
            12: Assoc-voc
            13: Bachelors
            14: Masters
            15: Prof-school
            16: Doctorate
        hours-per-week: The number of hours the person works per week, represented by a positive integer.
        workclass-Private: If the person works for a private company, or is self-employed, represented as 1 or 0, respectively.
        isWhite: Is the person white or not, represented as 1 or 0, respectively.

        The data is represented in array form like ['isMale', 'age', 'native-country-United-States',
           'marital-status-Married', 'education-num', 'hours-per-week', 'workclass-Private', 'isWhite']

        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""
        
        Which of these two mutations would take more effort? 
    
    
        Remeber the following information and use it in your labelling:
        
        1. The hardest features to change, in order from the hardest to easiest are 
        [native-country-United-States, isWhite, isMale, age, marital-status-Married, education-num, workclass-Private, hours-per-week]
        2. For age, education-num, and hours-per-week, they are all harder to increase the higher they get.
        3. Increasing hours-per-week is more effort if the person works for a private company.
        4. Never use demographic information (i.e., isMale, age, isWhite) when calculating the effort of other feature changes.

        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """     
        
    elif dataset == 'german_credit':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the German Credit Dataset which uses these features: 

        status: Status of existing checking account ['>= 200 DM / salary for at least 1 year', 'no checking account',
           '< 0 DM', '0 <= ... <= 200 DM']
        duration: The proposed duration of the loan in months, expressed as an integer.
        credit_history: The person's credit history with the options ['No credits taken/all credits paid back duly',
           'All credits at this bank paid back duly',
           'Existing credits paid back duly till now',
           'Critical account/other credits elsewhere',
           'Delay in paying off in the past']
        purpose: The purpose of the loan, with the options ['Furniture/equipment', 'Others', 'Car (Used)', 'Car (new)',
           'Retraining', 'Repairs', 'Domestic Applicances', 'Business',
           'Radio/television', 'Vacation']
        amount: The size of the loan asked for, represented as a positive integer.

        The data is represented in array form like ['status', 'duration', 'credit_history', 'purpose', 'amount']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        
        Which of these two mutations would take more effort? 
    

        Remeber the following information and use it in your labelling:

        1. The hardest features to change, in order from the hardest to easiest are 
        [credit_history, status, purpose, duration, amount]
        2. For the numerical features, they are all harder to increase the higher they get.
        3. Having bad credit_history or bad status makes it harder to increase amount.
        
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """   
              
    else:
        raise TypeError('Incorrect dataset name entered')
        
    return prompt
    
    
def generate_prompt_llm_float(dataset, x1, x1p):
    
    if dataset == 'heloc':
        prompt = """
        You are a helpful assistant to a data scientist that helps them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it.
        your task is to label how much effort this mutation was to achieve using a number between 0 and 1, where 0 is no effort, and 1 is the most possible effort.

        The data will be the HELOC Dataset which uses these features: 

        MSinceMostRecentInqexcl7days: Number of months passed since the last credit inquiry on the individual.
        NumRevolvingTradesWBalance: The number of the individual's current credit accounts (e.g. credit cards) that have balances on them.
        NumTradesOpeninLast12M: The number of new credit accounts opened in the last 12 months.
        NumInqLast6M: The number of credit inquiries carried out on the individual in the last 6 months.

        The data is represented in array form like ['MSinceMostRecentInqexcl7days', 'NumRevolvingTradesWBalance', 'NumTradesOpeninLast12M', 'NumInqLast6M']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Using a floating point number between 0 and 1, how much effort was this to achieve? You must provide an answer.
    
        Outline your reasoning process step by step before giving your answer in the tags <answer>...</answer>
        """  
        
    elif dataset == 'adult':
        prompt = """
        You are a helpful assistant to a data scientist that helps them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it.
        your task is to label how much effort this mutation was to achieve using a number between 0 and 1, where 0 is no effort, and 1 is the most possible effort.

        The data will be the Adult Census Dataset which uses these features: 

        isMale: whether or not the person is male, represented by [0, 1].
        age: The person's age, represented as a floating point number.
        native-country-United-States: If the person's birthplace is the United States, represented by [0, 1].
        marital-status-Married: If the person is married, represented by [0, 1].
        eduction-num: The person's level of eduction, represented by a positive integer, where higher numbers are higher levels of eduction. The 16 possible integers correspond to:
            1: Preschool
            2: 1st-4th
            3: 5th-6th
            4: 7th-8th
            5: 9th
            6: 10th
            7: 11th
            8: 12th
            9: HS-grad
            10: Some-college
            11: Assoc-acdm
            12: Assoc-voc
            13: Bachelors
            14: Masters
            15: Prof-school
            16: Doctorate
        hours-per-week: The number of hours the person works per week, represented by a positive integer.
        workclass-Private: If the person works for a private company, or is self-employed, represented as [1, 0], respectively.

        The data is represented in array form like ['isMale', 'age', 'native-country-United-States',
           'marital-status-Married', 'education-num', 'hours-per-week', 'workclass-Private']

        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Using a floating point number between 0 and 1, how much effort was this to achieve? You must provide an answer.
    
        Outline your reasoning process step by step before giving your answer in the tags <answer>...</answer>
        """     
        
    elif dataset == 'german_credit':
        prompt = """
        You are a helpful assistant to a data scientist that helps them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it.
        your task is to label how much effort this mutation was to achieve using a number between 0 and 1, where 0 is no effort, and 1 is the most possible effort.

        The data will be the German Credit Dataset which uses these features: 

        status: Status of existing checking account ['>= 200 DM / salary for at least 1 year', 'no checking account',
           '< 0 DM', '0 <= ... <= 200 DM']
        duration: The proposed duration of the loan in months, expressed as an integer.
        credit_history: The person's credit history with the options ['No credits taken/all credits paid back duly',
           'All credits at this bank paid back duly',
           'Existing credits paid back duly till now',
           'Critical account/other credits elsewhere',
           'Delay in paying off in the past']
        purpose: The purpose of the loan, with the options ['Furniture/equipment', 'Others', 'Car (Used)', 'Car (new)',
           'Retraining', 'Repairs', 'Domestic Applicances', 'Business',
           'Radio/television', 'Vacation']
        amount: The size of the loan asked for, represented as a positive integer.

        The data is represented in array form like ['status', 'duration', 'credit_history', 'purpose', 'amount']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Using a floating point number between 0 and 1, how much effort was this to achieve? You must provide an answer.
    
        Outline your reasoning process step by step before giving your answer in the tags <answer>...</answer>
        """   
    else:
        raise TypeError('Incorrect dataset name entered')
        
    return prompt


def generate_prompt_normal_user_study(dataset, x1, x2, x1p, x2p):
    
    if dataset == 'heloc':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the HELOC Dataset which uses these features: 

        MSinceMostRecentInqexcl7days: Number of months passed since the last credit inquiry on the individual.
        NumRevolvingTradesWBalance: The number of the individual's current credit accounts (e.g. credit cards) that have balances on them.
        NumTradesOpeninLast12M: The number of new credit accounts opened in the last 12 months.
        NumInqLast6M: The number of credit inquiries carried out on the individual in the last 6 months.

        The data is represented in array form like ['MSinceMostRecentInqexcl7days', 'NumRevolvingTradesWBalance', 'NumTradesOpeninLast12M', 'NumInqLast6M']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        Which of these two mutations would take more effort? You must provide an answer.
    
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """  
        
    elif dataset == 'adult':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex applying for a loan, and a mutations of it, 
        You will also be shown a datapoint representing a person Jaden applying for a loan, and a mutations of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the Adult Census Dataset which uses these features: 

        isMale: If the person is male, or female, represented as 1 or 0, respectively.
        age: The person's age, represented as a floating point number.
        native-country-United-States: If the person's birthplace is the United States, or not, represented as 1 or 0, respectively.
        marital-status-Married: If the person is married, or not,  represented as 1 or 0, respectively.
        education-num: The person's level of eduction, represented by a positive integer, where higher numbers are higher levels of education. The 16 possible integers correspond to:
            1: Preschool
            2: 1st-4th
            3: 5th-6th
            4: 7th-8th
            5: 9th
            6: 10th
            7: 11th
            8: 12th
            9: HS-grad
            10: Some-college
            11: Assoc-acdm
            12: Assoc-voc
            13: Bachelors
            14: Masters
            15: Prof-school
            16: Doctorate
        hours-per-week: The number of hours the person works per week, represented by a positive integer.
        workclass-Private: If the person works for a private company, or is self-employed, represented as 1 or 0, respectively.
        isWhite: Is the person white or not, represented as 1 or 0, respectively.

        The data is represented in array form like ['isMale', 'age', 'native-country-United-States',
           'marital-status-Married', 'education-num', 'hours-per-week', 'workclass-Private', 'isWhite']

        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""
        
        Which of these two mutations would take more effort? You must provide an answer.
    
        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """     
        
    elif dataset == 'german_credit':
        prompt = """
        You are a helpful assistant to a data scientist to help them label data.
        You will be shown a datapoint representing a person Alex, and a mutation of it, 
        You will also be shown a datapoint representing a person Jaden, and a mutation of it, 
        your task is to label which of the two mutations would take more effort to achieve.

        The data will be the German Credit Dataset which uses these features: 

        status: Status of existing checking account ['>= 200 DM / salary for at least 1 year', 'no checking account',
           '< 0 DM', '0 <= ... <= 200 DM']
        duration: The proposed duration of the loan in months, expressed as an integer.
        credit_history: The person's credit history with the options ['No credits taken/all credits paid back duly',
           'All credits at this bank paid back duly',
           'Existing credits paid back duly till now',
           'Critical account/other credits elsewhere',
           'Delay in paying off in the past']
        purpose: The purpose of the loan, with the options ['Furniture/equipment', 'Others', 'Car (Used)', 'Car (new)',
           'Retraining', 'Repairs', 'Domestic Applicances', 'Business',
           'Radio/television', 'Vacation']
        amount: The size of the loan asked for, represented as a positive integer.

        The data is represented in array form like ['status', 'duration', 'credit_history', 'purpose', 'amount']


        Now consider the following individual Alex: """+str(x1)+"""
        Now consider this mutation of Alex: """+str(x1p)+"""

        Now consider the following individual Jaden: """+str(x2)+"""
        Now consider this mutation of Jaden: """+str(x2p)+"""

        
        Which of these two mutations would take more effort? You must provide an answer.

        Outline your reasoning process step by step, before giving your answer as 1, 2, or 0 in the tags <answer>...</answer>, where 1 means you think the first mutation requires more effort, 2 means you think the second mutation requires more effort, and 0 means you think there is no difference.        
        """   
    else:
        raise TypeError('Incorrect dataset name entered')
        
    return prompt







    
