import pandas as pd
import numpy as np
file_name = ''

def read_xlsx_file(file_path, sheet_name=None):
    try:
        df = pd.read_excel(
            io=file_path,
            sheet_name=sheet_name,
            engine='openpyxl'  
        )
        return df
    
    except FileNotFoundError:
        print(f"{file_path}'")
    except Exception as e:
        print(f"{str(e)}")
        return None

df = read_xlsx_file(file_path=file_name)['Sheet1']

real_selectivity = np.array(df['Selectivity'].to_list())
prediction_selectivity = np.array(df['Selectivity(prediction)'].to_list())

real_recall = df['recall(true)']
prediction_recall_10000_full_data = np.array(df['recall(predicted 10000 samples with full dataset)'].to_list())
prediction_recall_100_full_data = np.array(df['recall(predicted 100 samples with full dataset)'].to_list())
prediction_recall_100_500000_data = np.array(df['recall(predicted 100 samples with 500000 dataset)'].to_list())
prediction_recall_100_100000_data = np.array(df['recall(predicted 100 samples with 100000 dataset)'].to_list())

mape_selectivity = np.mean((np.abs(prediction_selectivity - real_selectivity) / real_selectivity))
mae_selectivity =  np.mean((np.abs(prediction_selectivity - real_selectivity)))

print("================mape_selectivity===============")
print(mape_selectivity * 100)
print("=================mae_selectivity=================")
print(mae_selectivity)

mape_recall_10000_full_data = np.mean((np.abs(real_recall - prediction_recall_10000_full_data) / real_recall))
mae_recall_10000_full_data = np.mean(np.abs(real_recall - prediction_recall_10000_full_data))

print("===================mape_recall_10000_full_data==================")
print(mape_recall_10000_full_data * 100)
print("===================mae_recall_10000_full_data==================")
print(mae_recall_10000_full_data)


mape_recall_100_full_data = np.mean((np.abs(real_recall - prediction_recall_100_full_data) / real_recall))
mae_recall_100_full_data = np.mean(np.abs(real_recall - prediction_recall_100_full_data))

print("===================mape_recall_100_full_data==================")
print(mape_recall_100_full_data * 100)
print("===================mae_recall_100_full_data==================")
print(mae_recall_100_full_data)


mape_recall_100_500000_data = np.mean((np.abs(real_recall - prediction_recall_100_500000_data) / real_recall))
mae_recall_100_500000_data = np.mean(np.abs(real_recall - prediction_recall_100_500000_data))

print("===================mape_recall_100_500000_data==================")
print(mape_recall_100_500000_data * 100)
print("===================mae_recall_100_500000_data==================")
print(mae_recall_100_500000_data)


mape_recall_100_100000_data = np.mean((np.abs(real_recall - prediction_recall_100_100000_data) / real_recall))
mae_recall_100_100000_data = np.mean(np.abs(real_recall - prediction_recall_100_100000_data))

print("===================mape_recall_100_100000_data==================")
print(mape_recall_100_100000_data * 100)
print("===================mae_recall_100_100000_data==================")
print(mae_recall_100_100000_data)