'''
Modified from https://github.com/viviensiu/bankruptcy_prediction/
'''

#%% Import
import pandas as pd
import chardet
file = 'TEJ - Normalized_2013_0430_Taiwan_data.csv'
# Step 3: Use chardet to identify encoding
f = open(file, 'rb')
rawdata = f.readline()
f.close()
chardet.detect(rawdata)

#%% 
df = pd.read_csv(file, encoding = 'Big5')
df.info()

translated_columns = ['Flag','ROA(C) before tax and interest before depreciation','ROA(A) after tax and before interest%','ROA(B) after tax and before interest and depreciation','Operating gross profit margin',
       'Realized gross profit margin of sales','operating profit rate','net profit margin before tax','net profit margin after tax','non-industry revenue and expenditure/revenue','continuous profit rate (after tax)',
       'Operating expense ratio','Research and development expense ratio','Cash flow ratio','Interest-bearing debt interest rate','Tax rate (A)','Net value per share (B)',
       'Net value per share (A)','Net value per share (C)','Persistent EPS in the last four seasons','Cash flow per share','Return per share (yuan)','Operating profit per share ( Yuan)',
       'Net profit per share before tax (yuan)','Realized gross profit growth rate of sales','Operating profit growth rate','After-tax net profit growth rate','Regular net profit growth rate','Continuous net profit growth rate' ,
       'Total Asset Growth Rate','Net Worth Growth Rate','Return on Total Assets Growth Rate','Cash Reinvestment %','Current Ratio','Quick Ratio','Interest Expenditure Rate',
       'Total liabilities/total net worth','debt ratio%','net worth/assets','long-term fund suitability ratio (A)','borrowing dependency','contingent liabilities/net worth',
       'Operating profit/paid-in capital','pre-tax net profit/paid-up capital','inventory and accounts receivable/net value','total asset turnover times','accounts receivable turnover times',
       'Average collection days','Inventory turnover rate (times)','Fixed asset turnover times','Net worth turnover rate (times)','Revenue per person','Operating profit per person',
       'Equipment rate per person','working capital to total assets', 'Quick asset/Total asset',
       'current assets/total assets', 'cash / total assets',
       'Quick asset/current liabilities', 'cash / current liability',
       'current liability to assets', 'operating funds to liability',
       'Inventory/working capital', 'Inventory/current liability',
       'current liability / liability', 'working capital/equity',
       'current liability/equity', 'long-term liability to current assets',
       'Retained Earnings/Total assets', 'total income / total expense',
       'total expense /assets','Liquid asset turnover rate','Quick asset turnover rate','working capital turnover rate','Cash turnover rate',
                    'Cash flow to Sales', 'fix assets to assets',
       'current liability to liability', 'current liability to equity',
       'equity to long-term liability', 'Cash flow to total assets',
       'cash flow to liability', 'CFO to ASSETS', 'cash flow to equity',
       'current liabilities to current assets',
       'one if total liabilities exceeds total assets  zero otherwise',
       'net income to total assets', 'total assets to GNP price',
       'No-credit interval', 'Gross profit to Sales',
       'Net income to stockholder\'s Equity', 'liability to equity',
       'Degree of financial leverage (DFL)', 'Interest coverage ratio( Interest expense to EBIT )',
       'one if net income was negative for the last two year  zero otherwise',
       'equity to liability']

# Step 7: Make your column casing consistent
translated_columns = [col.title() for col in translated_columns]

# Step 8: Replace the column names with the new list
df.columns = translated_columns
df.columns

#%% 
df.to_csv('Taiwan_data_ENG.csv', index=False)

test = pd.read_csv('Taiwan_data_ENG.csv')
test.head()

#%% 
pd.set_option("display.max_columns",None)
df = pd.read_csv('Taiwan_data_ENG.csv', index_col=False)
df

# Step 4: Call .describe
df.describe()

df.drop(columns='One If Net Income Was Negative For The Last Two Year  Zero Otherwise', inplace=True)

df.to_csv('Taiwan_data_ENG_95.csv', index=False)


# %%
