from yahoofinancials import YahooFinancials
import pandas as pd 
from tqdm import tqdm
import time
import os 

StartDate = "2019-03-01"
EndDate = "2024-03-01"

payload = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
df = pd.DataFrame(payload[0])[['Symbol', 'GICS Sector']]
df = df.sort_values(by=["GICS Sector", "Symbol"])
df.to_csv("stock_categories.csv", index=False)

all_stocks = df['Symbol'].values.tolist()
print(len(all_stocks))

# Downloading all stock and dividend data.
for stock in tqdm(all_stocks):
    yahoo_financials_stock = YahooFinancials(stock)
    # stock_price_data = yahoo_financials_stock.get_historical_price_data(StartDate, EndDate, 'daily')
    dividend_data = yahoo_financials_stock.get_daily_dividend_data(StartDate, EndDate)
    if stock in dividend_data.keys():
        if not dividend_data[stock] is None:
            df = pd.DataFrame(dividend_data[stock])
            df.to_csv("{}_dividend.csv".format(stock), index=False)
        else:
            df = pd.DataFrame({'date' : [], 'formatted_date' :[], 'amount': []})
            df.to_csv("{}_dividend.csv".format(stock), index=False)
            print("{} not found".format(stock))

    # if stock in stock_price_data.keys():
    #     if not stock_price_data[stock] is None:
    #         if "prices" in stock_price_data[stock].keys():
    #             df = pd.DataFrame(stock_price_data[stock]["prices"])
    #             df.to_csv("{}.csv".format(stock), index=False)
    #             df = pd.DataFrame(dividend_data[stock])
    #             df.to_csv("{}_dividend.csv".format(stock), index=False)
    #         else:
    #             print("{} not found".format(stock))
    #     else:
    #         print("{} not found".format(stock))


# if __name__ == '__main__':
#     files = []
#     for r, filename_data in enumerate(os.listdir(".")):
#         if filename_data.split(".")[-1] == "csv" and filename_data.split("_")[0] != "X" and filename_data.split("_")[1] != "dividend":
#             index = filename_data.split("_")[-1].split(".")[0]
#             files.append([pd.read_csv("{}".format(filename_data)), index])
#             print(index, filename_data)

#     for argument in ["Open", "Close", "High", "Low"]:
#         X = files[0][0][["Date", argument]]
#         X_index = files[0][1]
#         X = X.rename(columns={"Date" : "Date", argument : argument + "_{}".format(X_index)})
#         print(X.head())
#         X[argument + "_{}".format(X_index)] = X[argument + "_{}".format(X_index)].apply(lambda x: float(x.replace(",","")))
#         for i in range(len(files) - 1):
#             Y = files[i + 1][0][["Date", argument]]
#             Y_index = files[i + 1][1]
#             Y = Y.rename(columns={"Date" : "Date", argument : argument + "_{}".format(Y_index)})
#             print(Y.head())
#             Y[argument + "_{}".format(Y_index)] = Y[argument + "_{}".format(Y_index)].apply(lambda x: float(x.replace(",","")))
#             X = pd.merge(X, Y, on="Date", how="inner") #considers the intersection of keys

#         X = X.iloc[::-1] # reverse the order of rows so the oldest stock value is first
#         X.to_csv("X_{}.csv".format(argument), index=False)

#         if (argument == "Open"):
#             dfOpen = X
#         elif (argument == "Close"):
#             dfClose = X

#     df = pd.merge(dfOpen, dfClose, on="Date", how="inner")
#     df.to_csv("X_OpenClose.csv", index=False)