import re
from enum import Enum
import numpy as np
import formulas


# formulas
# https://github.com/vinci1it2000/formulas


class FormulaOperator(Enum):
    """
    Formula operators.
    Supported operators include:
      - Math: PLUS, MINUS, MULTIPLY, DIVIDE, SUM, AVERAGE, COUNT, MAX, MIN
      - Logical: AND, OR, NOT, IF, TRUE, FALSE
      - Comparison: EQUAL, NOT_EQUAL, GREATER_THAN, LESS_THAN, GREATER_THAN_OR_EQUAL, LESS_THAN_OR_EQUAL
      - Lookup: INDEX, MATCH
    """
    # Math operators
    PLUS = '+'
    MINUS = '-'
    MULTIPLY = '*'
    DIVIDE = '/'
    SUM = 'SUM'
    AVERAGE = 'AVERAGE'
    COUNT = 'COUNT'
    MAX = 'MAX'
    MIN = 'MIN'

    # Comparison operators
    EQUAL = '='
    NOT_EQUAL = '<>'
    GREATER_THAN = '>'
    LESS_THAN = '<'
    GREATER_THAN_OR_EQUAL = '>='
    LESS_THAN_OR_EQUAL = '<='

    # Logical operators
    AND = 'AND'
    OR = 'OR'
    NOT = 'NOT'
    IF = 'IF'
    TRUE = 'TRUE'
    FALSE = 'FALSE'

    # Lookup operators
    INDEX = 'INDEX'
    MATCH = 'MATCH'



formula_operator_instruction = [
    {
        'name': 'PLUS',
        'symbol': '+',
        'description': 'Adds two numbers together',
        'example': '=A1 + A2'
    },
    {
        'name': 'MINUS',
        'symbol': '-',
        'description': 'Subtracts one number from another',
        'example': '=A1 - A2'
    },
    {
        'name': 'MULTIPLY',
        'symbol': '*',
        'description': 'Multiplies two numbers together',
        'example': '=A1 * A2'
    },
    {
        'name': 'DIVIDE',
        'symbol': '/',
        'description': 'Divides one number by another',
        'example': '=A1 / A2'
    },
    {
        'name': 'SUM',
        'symbol': 'SUM',
        'description': 'Sums a range of numbers',
        'example': '=SUM(A1:A10)'
    },
    {
        'name': 'AVERAGE',
        'symbol': 'AVERAGE',
        'description': 'Calculates the average of a range of numbers',
        'example': '=AVERAGE(A1:A10)'
    },
    {
        'name': 'COUNT',
        'symbol': 'COUNT',
        'description': 'Counts the number of numbers in a range',
        'example': '=COUNT(A1:A10)'
    },
    {
        'name': 'MAX',
        'symbol': 'MAX',
        'description': 'Finds the maximum number in a range',
        'example': '=MAX(A1:A10)'
    },
    {
        'name': 'MIN',
        'symbol': 'MIN',
        'description': 'Finds the minimum number in a range',
        'example': '=MIN(A1:A10)'
    },
    {
        'name': 'EQUAL',
        'symbol': '=',
        'description': 'Returns TRUE if the two values are equal',
        'example': '=A1 = A2'
    },
    {
        'name': 'NOT_EQUAL',
        'symbol': '<>',
        'description': 'Returns TRUE if the two values are not equal',
        'example': '=A1 <> A2'
    },
    {
        'name': 'GREATER_THAN',
        'symbol': '>',
        'description': 'Returns TRUE if the first value is greater than the second value',
        'example': '=A1 > A2'
    },
    {
        'name': 'LESS_THAN',
        'symbol': '<',
        'description': 'Returns TRUE if the first value is less than the second value',
        'example': '=A1 < A2'
    },
    {
        'name': 'GREATER_THAN_OR_EQUAL',
        'symbol': '>=',
        'description': 'Returns TRUE if the first value is greater than or equal to the second value',
        'example': '=A1 >= A2'
    },
    {
        'name': 'LESS_THAN_OR_EQUAL',
        'symbol': '<=',
        'description': 'Returns TRUE if the first value is less than or equal to the second value',
        'example': '=A1 <= A2'
    },
    {
        'name': 'AND',
        'symbol': 'AND',
        'description': 'Returns TRUE if all arguments are TRUE',
        'example': '=AND(A1, A2)'
    },
    {
        'name': 'OR',
        'symbol': 'OR',
        'description': 'Returns TRUE if any argument is TRUE',
        'example': '=OR(A1, A2)'
    },
    {
        'name': 'NOT',
        'symbol': 'NOT',
        'description': 'Returns TRUE if the argument is FALSE',
        'example': '=NOT(A1)'
    },
    {   
        'name': 'IF',
        'symbol': 'IF',
        'description': 'Returns one value if a condition is TRUE and another value if it is FALSE',
        'example': '=IF(A1 > 10, "Yes", "No")'
    },
    {
        'name': 'TRUE',
        'symbol': 'TRUE',
        'description': 'Returns TRUE',
        'example': '=TRUE'
    },
    {
        'name': 'FALSE',
        'symbol': 'FALSE',
        'description': 'Returns FALSE',
        'example': '=FALSE'
    },
    {
        'name': 'INDEX',
        'symbol': 'INDEX',
        'description': 'Returns the value of a cell at a specific row and column',
        'example': '=INDEX(A1:A10, 1)'
    },
    {
        'name': 'MATCH',
        'symbol': 'MATCH',
        'description': 'Returns the relative position of an item in an array that matches a specified value. Syntax: =MATCH(lookup_value, lookup_array, [match_type])',
        'example': '=MATCH("value", A1:A10, 0)'
    }
]



class FormulaParser:
    def __init__(self, formula: str):
        """
        Initializes the FormulaParser with the provided formula.
        Before compiling, any range reference (e.g., "A1:A5") in the formula
        is expanded into individual cell references (e.g., "A1,A2,A3,A4,A5").
        """
        self.formula = formula
        
        # fix bugs of INDEX function
        FUNCTIONS = formulas.get_functions()
        def new_index(array, row_num, col_num=1, area_num=1):
            array = np.array(array)
            assert area_num == 1, "[new INDEX Error] Only one area is supported"
            try:
                row_num = int(row_num) - 1
                col_num = int(col_num) - 1
                array = array[row_num, col_num]
                return array.tolist()
            except IndexError as e:
                raise ValueError(f"[new INDEX Error] {row_num}, {col_num} is out of table range {array.shape}")
            except Exception as e:
                raise ValueError(f"[new INDEX Error] {e}")
        FUNCTIONS['INDEX'] = new_index

        # parse the formula
        self.func = formulas.Parser().ast(self.formula)[1].compile()


  
    # ----------------- Utility Functions -----------------
    @staticmethod
    def cell_to_indices(cell: str):
        """
        Converts a cell reference (e.g., "A1") into (column, row) indices (0-indexed).
        """
        m = re.fullmatch(r"([A-Z]+)(\d+)", cell)
        if not m:
            raise ValueError(f"Invalid cell reference: {cell}")
        col_letters, row_str = m.groups()
        row = int(row_str)
        col = 0
        for char in col_letters:
            col = col * 26 + (ord(char) - ord('A') + 1)
        return col - 1, row - 1

    @staticmethod
    def indices_to_cell(col: int, row: int):
        """
        Converts 0-indexed (col, row) into a cell reference (e.g., "A1").
        """
        col_str = ""
        while col >= 0:
            col, remainder = divmod(col, 26)
            col_str = chr(65 + remainder) + col_str
            col -= 1
        return f"{col_str}{row + 1}"

    @staticmethod
    def expand_range(start: str, end: str):
        """
        Expands a range defined by a start and end cell reference into a list of individual cell references.
        For example, expand_range("A1", "B2") returns ["A1", "A2", "B1", "B2"].
        """
        start_col, start_row = FormulaParser.cell_to_indices(start)
        end_col, end_row = FormulaParser.cell_to_indices(end)
        if start_col > end_col:
            start_col, end_col = end_col, start_col
        if start_row > end_row:
            start_row, end_row = end_row, start_row

        # Initialize the result as a list of rows
        cells = []
        for row in range(start_row, end_row + 1):
            row_cells = []  # List to hold cells of the current row
            for col in range(start_col, end_col + 1):
                row_cells.append(FormulaParser.indices_to_cell(col, row))  # Add cell to the current row
            cells.append(row_cells)  # Add the row to the result list
        return cells

    @staticmethod
    def try_normalize_number(s: str):
        s = s.strip()
        is_percent = s.endswith('%')
        s_clean = s.replace('%', '').replace('$', '').replace(',', '').strip()
        try:
            value = float(s_clean)
        except ValueError:
            return str(s)
        if is_percent:
            value /= 100.0
        return value

    @staticmethod
    def normalize_table_data(table_data):
        """
        Normalizes the table data (a 2D list) by converting any value that can be converted to a float.
        If conversion fails, the original value is kept.
        """
        new_table = []
        for row in table_data:
            new_row = []
            for cell in row:
                try:
                    # if the cell is an integer, convert it to an integer without influencing the string of cell content
                    cell = FormulaParser.try_normalize_number(cell)
                    if float(cell) == int(float(cell)):
                        new_row.append(int(float(cell)))
                    else:
                        new_row.append(float(cell))
                except (ValueError, TypeError):
                    new_row.append(cell)
            new_table.append(new_row)
        return new_table

    # ----------------- Execute Function -----------------
    def execute(self, table_data: list):
        """
        Executes the compiled formula using the provided table data (a 2D list).
        For each expected output cell reference (from self.func.outputs):
          - If the reference is a range (i.e., contains a colon such as "B2:B6"),
            extract a list of values from that range.
          - Otherwise, extract the single cell value.
        The extracted arguments are passed in order to the compiled function.
        """
        table_data = self.normalize_table_data(table_data)
        arguments = []
        # Use func.inputs (an OrderedDict) to determine the expected references.
        # print(self.func.inputs)
        # print(self.func.outputs)
        for ref in self.func.inputs.keys():
            if ":" in ref:
                # If the reference is a range, extract all corresponding cell values.
                start, end = ref.split(":")
                cell_refs = FormulaParser.expand_range(start, end)
                value_range = []
                for row in cell_refs:
                    value_row = []
                    for cell in row:
                        col, row = FormulaParser.cell_to_indices(cell)
                        try:
                            tmp_value = table_data[row][col]
                            value_row.append(tmp_value)
                        except IndexError:
                            raise ValueError(f"Cell {cell} is out of table range")
                    value_range.append(value_row)
            else:
                # Single cell reference.
                col, row = FormulaParser.cell_to_indices(ref)
                try:
                    value = table_data[row][col]
                except IndexError:
                    raise ValueError(f"Cell {ref} is out of table range")
                # value_range = [[value]]
                value_range = value
            arguments.append(value_range)
            # print(f"ref: {ref}, value: {value_range}")

        results = self.func(*arguments)
        return results



if __name__ == '__main__':
    # Define the shared table for all test cases (2D list)
    # example_table = [
    #     ['proximal determinants - physical environments and resources', 'at least one chronic condition', 'fair or poor self-rated general health', 'fair or poor self-rated mental health'],
    #     ['less than high school education', 0.66, 0.26, 0.15],
    #     ['high school education or higher', 0.61, 0.21, 0.13],
    #     ['unemployed', 0.61, 0.25, 0.17],
    #     ['employed', 0.60, 0.15, 0.10],
    #     ['annual household income in lowest tercile', 0.64, 0.26, 0.16],
    #     ['annual household income above lowest tercile', 0.62, 0.20, 0.12],
    #     ['food insecure', 0.71, 0.28, 0.19],
    #     ['food secure', 0.61, 0.20, 0.12]
    # ]

    # # Test case 1: CONCAT function to concatenate values
    # formula1 = "=B1"
    # parser1 = FormulaParser(formula1)
    # result1 = parser1.execute(example_table)
    # print(f"Test 1 - CONCAT result: {result1}")

    # # Test case 2: SUM function to sum a range
    # formula2 = "=SUM(B2:C4)"
    # parser2 = FormulaParser(formula2)
    # result2 = parser2.execute(example_table)
    # print(f"Test 2 - SUM result: {result2}")

    # # Test case 3: MAX function to find the maximum value in a range
    # formula3 = "=MAX(C2:C5)"
    # parser3 = FormulaParser(formula3)
    # result3 = parser3.execute(example_table)
    # print(f"Test 3 - MAX result: {result3}")

    # # Test case 4: AND function (logical operation)
    # formula4 = "=AND(B2 > 0.5, B3 > 0.5)"
    # parser4 = FormulaParser(formula4)
    # result4 = parser4.execute(example_table)
    # print(f"Test 4 - AND result: {result4}")

    # # Test case 5: OR function (logical operation)
    # formula5 = "=OR(B2 > 0.7, B3 > 0.5)"
    # parser5 = FormulaParser(formula5)
    # result5 = parser5.execute(example_table)
    # print(f"Test 5 - OR result: {result5}")

    # # Test case 6: NOT function (logical operation)
    # formula6 = "=NOT(B2 > 0.7)"
    # parser6 = FormulaParser(formula6)
    # result6 = parser6.execute(example_table)
    # print(f"Test 6 - NOT result: {result6}")

    # # Test case 7: IF function (True condition)
    # formula7 = "=IF(B2 > 0.5, TRUE, FALSE)"
    # parser7 = FormulaParser(formula7)
    # result7 = parser7.execute(example_table)
    # print(f"Test 7 - IF result (True condition): {result7}")

    # # Test case 8: AVERAGE function
    # formula8 = "=AVERAGE(B2:B5)"
    # parser8 = FormulaParser(formula8)
    # result8 = parser8.execute(example_table)
    # print(f"Test 8 - AVERAGE result: {result8}")

    # # Test case 9: COUNT function to count numeric values
    # formula9 = "=COUNT(B2:B5)"
    # parser9 = FormulaParser(formula9)
    # result9 = parser9.execute(example_table)
    # print(f"Test 9 - COUNT result: {result9}")

    # # Test case 10: MAX function across columns
    # formula11 = "=MAX(B2, B3, B4)"
    # parser11 = FormulaParser(formula11)
    # result11 = parser11.execute(example_table)
    # print(f"Test 10 - MAX across columns result: {result11}")

    # # Test case 11: INDEX function to retrieve a value from a range
    # formula_index = "=INDEX(B2:C4, 2)"
    # parser_index = FormulaParser(formula_index)
    # result_index = parser_index.execute(example_table)
    # print(f"Test 11 - INDEX result: {result_index}")

    # # Test case 12: MATCH function to find the position of a value in a range
    # # For example, find the position of 0.61 in range B2:B9 (should return 2 if the first occurrence is in B3)
    # formula_match = "=MATCH(0.66, (B2:B3), 0)"
    # parser_match = FormulaParser(formula_match)
    # result_match = parser_match.execute(example_table)
    # print(f"Test 12 - MATCH result: {result_match}")

    # print("All test cases executed!")

    example_table_2 = [
        ['club', 'season', 'league', '', '', 'cup', '', 'continental', '', 'other', '', 'total', ''],
        ['', '', 'division', 'apps', 'goals', 'apps', 'goals', 'apps', 'goals', 'apps', 'goals', 'apps', 'goals'],
        ['shkumbini peqin', '2013-14', 'first division', '8', '0', '0', '0', '-', '', '0', '0', '8', '0'],
        ['espinho', '2014-15', 'campeonato nacional', '5', '0', '1', '0', '-', '', '0', '0', '6', '0'],
        ['atletico camioneros', '2015', 'torneo federal b', '4', '0', '0', '0', '-', '', '0', '0', '4', '0'],
        ['ferro carril oeste', '2016', 'primera b nacional', '3', '0', '0', '0', '-', '', '0', '0', '3', '0'],
        ['estudiantes', '2016-17', 'primera b metropolitana', '22', '1', '0', '0', '-', '', '2', '0', '24', '1'],
        ['uai urquiza', '2017-18', '', '23', '3', '0', '0', '-', '', '5', '2', '28', '5'],
        ['temperley', '2018-19', 'primera b nacional', '10', '1', '4', '2', '-', '', '0', '0', '14', '3'],
        ['career total', '', '', '75', '5', '5', '2', '-', '', '7', '2', '87', '9']
    ]


    formula_match = '=INDEX(A3:A9, MATCH("2015", B3:B9, 0))'
    parser_match = FormulaParser(formula_match)
    result_match = parser_match.execute(example_table_2)
    print(f"Test 12 - MATCH result: {result_match}")
