import pandas as pd
import numpy as np

df_test = pd.read_json(path_or_buf="../../INC-Math/ft_data/llama3.1-70b/test/data_lvl_54321_greedy.jsonl", lines=True)

total_count_data = len(df_test)

meta_descrip = '''<description>\nTo analyze the possibility and potential to solve the given math question, we can consider the different approaches mentioned: COT, PAL, CodeNL, and NLCode. Let\'s explore each approach in detail:\n\n### COT (Chain of Thought in Natural Language)\n\n1. **Understand the Problem**: We have two lines in 3D space. The first line is given by a point and a direction vector, and the second line is given similarly. The lines are coplanar if there exists a plane that contains both lines.\n\n2. **Condition for Coplanarity**: Two lines are coplanar if the vector connecting any point on the first line to any point on the second line is perpendicular to the cross product of the direction vectors of the two lines.\n\n3. **Mathematical Formulation**:\n   - Let \\(\\mathbf{a} = \\begin{pmatrix} 2 \\\\ 3 \\\\ 4 \\end{pmatrix}\\), \\(\\mathbf{b} = \\begin{pmatrix} 1 \\\\ 1 \\\\ -k \\end{pmatrix}\\).\n   - Let \\(\\mathbf{c} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 5 \\end{pmatrix}\\), \\(\\mathbf{d} = \\begin{pmatrix} k \\\\ 2 \\\\ 1 \\end{pmatrix}\\).\n   - The vector connecting a point on the first line to a point on the second line is \\(\\mathbf{c} + u\\mathbf{d} - (\\mathbf{a} + t\\mathbf{b})\\).\n   - The cross product of the direction vectors is \\(\\mathbf{b} \\times \\mathbf{d}\\).\n   - The coplanarity condition is \\((\\mathbf{c} - \\mathbf{a}) \\cdot (\\mathbf{b} \\times \\mathbf{d}) = 0\\).\n\n4. **Solve for k**: Calculate the cross product and the dot product, set the equation to zero, and solve for \\(k\\).\n\n### PAL (Program-Aided Language)\n\n1. **Write a Python Program**: Use Python to perform vector operations and solve the equation for \\(k\\).\n\n```python\nimport numpy as np\n\n# Define vectors\na = np.array([2, 3, 4])\nb = np.array([1, 1, -1])  # Placeholder for k\nc = np.array([1, 4, 5])\nd = np.array([1, 2, 1])   # Placeholder for k\n\n# Define a function to calculate k\ndef find_k():\n    # Cross product of b and d\n    def cross_product(k):\n        b_k = np.array([1, 1, -k])\n        d_k = np.array([k, 2, 1])\n        return np.cross(b_k, d_k)\n\n    # Dot product of (c - a) and cross product\n    def dot_product(k):\n        cross_prod = cross_product(k)\n        return np.dot(c - a, cross_prod)\n\n    # Solve for k such that dot_product(k) = 0\n    for k in range(-10, 11):  # Example range, adjust as needed\n        if dot_product(k) == 0:\n            print(f"Possible value of k: {k}")\n\nfind_k()\n```\n\n### CodeNL (Code First, Natural Language Explanation)\n\n1. **Write the Code**: Implement the solution in Python as shown above.\n2. **Analyze the Code**: \n   - The code calculates the cross product of the direction vectors for each \\(k\\).\n   - It then computes the dot product of this cross product with the vector connecting the two points.\n   - It checks for which values of \\(k\\) this dot product is zero, indicating coplanarity.\n\n3. **Obtain the Final Answer**: Run the code to find all possible values of \\(k\\).\n\n### NLCode (Natural Language to Code)\n\n1. **Explain the Solution**: \n   - Explain the condition for coplanarity and how it translates into a mathematical equation involving \\(k\\).\n   - Describe how to calculate the cross and dot products.\n\n2. **Translate to Code**: Implement the explanation in Python code.\n\n3. **Execute and Verify**: Run the code to find the values of \\(k\\) that satisfy the condition.\n\n### Conclusion\n\nEach approach has its strengths. COT is useful for a deep understanding and manual solving, PAL leverages programming for efficient computation, CodeNL combines both for clarity, and NLCode ensures a thorough understanding before coding. For this problem, using PAL or CodeNL can efficiently find the possible values of \\(k\\) by leveraging Python\'s computational capabilities.\n</description>'''

prompt = meta_descrip + ' Please choose the correct method to solve the problem, you have four methods to choose from: cot, pal, codenl, nlcode. '

np.random.seed(2024)
train_list = []
for item in df_test.index:
    row = df_test.loc[item]
    if len(row['label']) == 0:
        continue
    else:
        dict_list = {}
        instruction = prompt + f"Here is the question: {row['question']} Your choice: "  # prompt should be defined externally
        inputs = ''
        output = row['label']
        dict_list['instruction'] = instruction
        dict_list['input'] = inputs 
        dict_list['output'] = output
        train_list.append(dict_list)
        

len(train_list)

df = pd.read_json("./save_out/4classchoicenew_llama8b.pkl") #4 classoutput


input_data = []
labels = []
for item in train_list:
    input_data.append(item['instruction'])
    labels.append(item['output'])

generated_text = df[0].values

# Calculate accuracy
correct_count = 0
total_count = len(labels)
predicted_class = []

for i in range(total_count):
    added = False
    for j in range(len(labels[i])):
        print(j)
        print(labels[i][j])
        if labels[i][j] in generated_text[i].lower():
            correct_count += 1
            added = True
            break

    if not added:
        if 'cot' not in generated_text[i].lower() and 'pal' not in generated_text[i].lower() \
                and 'codenl' not in generated_text[i].lower() and 'nlcode' not in generated_text[i].lower():
            if 'cot' in labels[i]:
                correct_count += 1

accuracy = correct_count / total_count

print(correct_count / total_count_data)



df = pd.read_json("./save_out/2class4description_llama8b.pkl") #2 classoutput (cot,pal,both,none)

df

label_list

# Calculate accuracy
correct_count = 0
total_count = len(labels)
predicted_class = []

for i in range(total_count):
    label_list = df_test['label'].values[i]
#     decision = generated_text[i].split(": ")[-1].split('.')[0]
    if 'both' in generated_text[i] or ('Both' in generated_text[i]):
        decision = 'both'
    elif 'COT' in generated_text[i]:
        decision = 'cot'
    elif 'PAL' in generated_text[i]:
        decision = 'pal'
    else:
        decision = 'cot'
    
    
    if ((decision =='both') and ('cot' in label_list ) or ('pal' in label_list)):
        correct_count += 1
    elif ((decision =='cot') and ('cot' in label_list ) and ('pal' not in label_list)):
        correct_count += 1
    elif ((decision =='pal') and ('cot'  not in label_list ) and ('pal' in label_list)):
        correct_count += 1

accuracy = correct_count / total_count_data

print(accuracy)





df = pd.read_json("./save_out/2classcotpal_llama8b.pkl") #2 classoutput (cot,pal)

df_test = pd.read_json(path_or_buf="../../INC-Math/ft_data/llama3.1-70b/test/data_lvl_54321_greedy.jsonl", lines=True)

meta_descrip = '''<description>\nTo analyze the possibility and potential to solve the given math question, we can consider the different approaches mentioned: COT, PAL, CodeNL, and NLCode. Let\'s explore each approach in detail:\n\n### COT (Chain of Thought in Natural Language)\n\n1. **Understand the Problem**: We have two lines in 3D space. The first line is given by a point and a direction vector, and the second line is given similarly. The lines are coplanar if there exists a plane that contains both lines.\n\n2. **Condition for Coplanarity**: Two lines are coplanar if the vector connecting any point on the first line to any point on the second line is perpendicular to the cross product of the direction vectors of the two lines.\n\n3. **Mathematical Formulation**:\n   - Let \\(\\mathbf{a} = \\begin{pmatrix} 2 \\\\ 3 \\\\ 4 \\end{pmatrix}\\), \\(\\mathbf{b} = \\begin{pmatrix} 1 \\\\ 1 \\\\ -k \\end{pmatrix}\\).\n   - Let \\(\\mathbf{c} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 5 \\end{pmatrix}\\), \\(\\mathbf{d} = \\begin{pmatrix} k \\\\ 2 \\\\ 1 \\end{pmatrix}\\).\n   - The vector connecting a point on the first line to a point on the second line is \\(\\mathbf{c} + u\\mathbf{d} - (\\mathbf{a} + t\\mathbf{b})\\).\n   - The cross product of the direction vectors is \\(\\mathbf{b} \\times \\mathbf{d}\\).\n   - The coplanarity condition is \\((\\mathbf{c} - \\mathbf{a}) \\cdot (\\mathbf{b} \\times \\mathbf{d}) = 0\\).\n\n4. **Solve for k**: Calculate the cross product and the dot product, set the equation to zero, and solve for \\(k\\).\n\n### PAL (Program-Aided Language)\n\n1. **Write a Python Program**: Use Python to perform vector operations and solve the equation for \\(k\\).\n\n```python\nimport numpy as np\n\n# Define vectors\na = np.array([2, 3, 4])\nb = np.array([1, 1, -1])  # Placeholder for k\nc = np.array([1, 4, 5])\nd = np.array([1, 2, 1])   # Placeholder for k\n\n# Define a function to calculate k\ndef find_k():\n    # Cross product of b and d\n    def cross_product(k):\n        b_k = np.array([1, 1, -k])\n        d_k = np.array([k, 2, 1])\n        return np.cross(b_k, d_k)\n\n    # Dot product of (c - a) and cross product\n    def dot_product(k):\n        cross_prod = cross_product(k)\n        return np.dot(c - a, cross_prod)\n\n    # Solve for k such that dot_product(k) = 0\n    for k in range(-10, 11):  # Example range, adjust as needed\n        if dot_product(k) == 0:\n            print(f"Possible value of k: {k}")\n\nfind_k()\n```\n\n### CodeNL (Code First, Natural Language Explanation)\n\n1. **Write the Code**: Implement the solution in Python as shown above.\n2. **Analyze the Code**: \n   - The code calculates the cross product of the direction vectors for each \\(k\\).\n   - It then computes the dot product of this cross product with the vector connecting the two points.\n   - It checks for which values of \\(k\\) this dot product is zero, indicating coplanarity.\n\n3. **Obtain the Final Answer**: Run the code to find all possible values of \\(k\\).\n\n### NLCode (Natural Language to Code)\n\n1. **Explain the Solution**: \n   - Explain the condition for coplanarity and how it translates into a mathematical equation involving \\(k\\).\n   - Describe how to calculate the cross and dot products.\n\n2. **Translate to Code**: Implement the explanation in Python code.\n\n3. **Execute and Verify**: Run the code to find the values of \\(k\\) that satisfy the condition.\n\n### Conclusion\n\nEach approach has its strengths. COT is useful for a deep understanding and manual solving, PAL leverages programming for efficient computation, CodeNL combines both for clarity, and NLCode ensures a thorough understanding before coding. For this problem, using PAL or CodeNL can efficiently find the possible values of \\(k\\) by leveraging Python\'s computational capabilities.\n</description>'''

prompt = meta_descrip + ' Please choose the correct method to solve the problem, you have four methods to choose from: cot, pal, codenl, nlcode. '

np.random.seed(2024)
train_list = []
for item in df_test.index:
    row = df_test.loc[item]
    dict_list = {}
    instruction = prompt + f"Here is the question: {row['question']} Your choice: "  # prompt should be defined externally
    inputs = ''
    output = row['label']
    dict_list['instruction'] = instruction
    dict_list['input'] = inputs 
    dict_list['output'] = output
    train_list.append(dict_list)

df_test_update = pd.DataFrame(train_list)

df_test_input = pd.read_json("../../INC-Math/ft_data/llama3.1-70b/test/data_lvl_54321_greedy_2classcotpal.json")

df_test_input['predict'] = df[0].values

df_test_input['predict'].values[0].split(' ')[1]

df_update_drop = df_test_input.drop_duplicates(subset='instruction')

noquestion1 = '''<description>\nTo analyze the possibility and potential to solve the given math question, we can consider the different approaches mentioned: COT and PAL. Let\'s explore each approach in detail:\n\n### COT (Chain of Thought in Natural Language)\n\n1. **Understand the Problem**: We have two lines in 3D space. The first line is given by a point and a direction vector, and the second line is given similarly. The lines are coplanar if there exists a plane that contains both lines.\n\n2. **Condition for Coplanarity**: Two lines are coplanar if the vector connecting any point on the first line to any point on the second line is perpendicular to the cross product of the direction vectors of the two lines.\n\n3. **Mathematical Formulation**:\n   - Let \\(\\mathbf{a} = \\begin{pmatrix} 2 \\\\ 3 \\\\ 4 \\end{pmatrix}\\), \\(\\mathbf{b} = \\begin{pmatrix} 1 \\\\ 1 \\\\ -k \\end{pmatrix}\\).\n   - Let \\(\\mathbf{c} = \\begin{pmatrix} 1 \\\\ 4 \\\\ 5 \\end{pmatrix}\\), \\(\\mathbf{d} = \\begin{pmatrix} k \\\\ 2 \\\\ 1 \\end{pmatrix}\\).\n   - The vector connecting a point on the first line to a point on the second line is \\(\\mathbf{c} + u\\mathbf{d} - (\\mathbf{a} + t\\mathbf{b})\\).\n   - The cross product of the direction vectors is \\(\\mathbf{b} \\times \\mathbf{d}\\).\n   - The coplanarity condition is \\((\\mathbf{c} - \\mathbf{a}) \\cdot (\\mathbf{b} \\times \\mathbf{d}) = 0\\).\n\n4. **Solve for k**: Calculate the cross product and the dot product, set the equation to zero, and solve for \\(k\\).\n\n### PAL (Program-Aided Language)\n\n1. **Write a Python Program**: Use Python to perform vector operations and solve the equation for \\(k\\).\n\n```python\nimport numpy as np\n\n# Define vectors\na = np.array([2, 3, 4])\nb = np.array([1, 1, -1])  # Placeholder for k\nc = np.array([1, 4, 5])\nd = np.array([1, 2, 1])   # Placeholder for k\n\n# Define a function to calculate k\ndef find_k():\n    # Cross product of b and d\n    def cross_product(k):\n        b_k = np.array([1, 1, -k])\n        d_k = np.array([k, 2, 1])\n        return np.cross(b_k, d_k)\n\n    # Dot product of (c - a) and cross product\n    def dot_product(k):\n        cross_prod = cross_product(k)\n        return np.dot(c - a, cross_prod)\n\n    # Solve for k such that dot_product(k) = 0\n    for k in range(-10, 11):  # Example range, adjust as needed\n        if dot_product(k) == 0:\n            print(f"Possible value of k: {k}")\n\nfind_k()\n```\n\n\n</description> Please choose the correct method to solve the problem, you have two methods to choose from: cot, pal. Here is the question: '''
noquestion2 = ''' Your choice: '''

df_update_drop.loc[:,'question'] = [i.replace(noquestion1,'').replace(noquestion2,'') for i in df_update_drop['instruction'].values]

df_update_drop

row['question']

df_test['question'].values[0]

correct_count = 0
for index in df_update_drop.index:
    row = df_update_drop.loc[index]
    df_ground = df_test[df_test['question'].isin([row['question']])]
#     print(df_ground)
    decision = row['predict'].split(' ')[1].split('.')[0]
    print(decision)
    if decision not in ['cot','pal']:
        decision = 'cot'
    if decision in df_ground['label'].values[0]:
        correct_count+=1

df_ground['label'].values[0]

print(correct_count / total_count_data)



df = pd.read_json("./save_out/llama8b_70bdata_binaryout.pkl") #2 classoutput (yes no)

df

generated_text = df[0].values



len(labels)

import random

np.random.seed(2024)

# Calculate accuracy
correct_count = 0
total_count = len(df)
predicted_class = []
for i in range(0,total_count,2):
#     q1_i = i
#     q2_i = i+1
#     decision = generated_text[i].split(": ")[-1].split('.')[0]
    try:
        decision1 = generated_text[i].split(" ")[1].split('.')[0] #cot
        decision2 = generated_text[i+1].split(" ")[1].split('.')[0] #pal
    except:
        predicted_class.append([np.random.choice(['cot','pal'])])
        continue
    if (decision1 =='Yes') and (decision2 =='Yes'):
        predicted_class.append(['cot','pal'])
    elif (decision1 =='Yes') and (decision2 =='No'):
        predicted_class.append(['cot'])
    elif (decision1 =='No') and (decision2 =='Yes'):
        predicted_class.append(['pal'])
    else:
        predicted_class.append([])

correct_count = 0
for true,pred in zip(df_test['label'].values, predicted_class):
    if true != []:
        if len(pred)==2:
            if len(set(true).intersection(set(pred))) ==2:
                correct_count += 1
        else:
            if len(set(true).intersection(set(pred))) ==1:
                correct_count += 1
        

print(correct_count / total_count_data)

