import numpy as np
import pandas as pd

def is_significant(value, threshold_upper, threshold_lower):
    return value > threshold_upper or value < threshold_lower


def sliding_window_avg(df, window_size=2):
    log_ratios = []
    for i in range(len(df) - window_size + 1):
        window_rows = df.iloc[i - 1 : i + window_size]
        if len(window_rows) == (window_size + 1):
            price_day1 = window_rows["Price"].iloc[0]
            price_day3 = window_rows["Price"].iloc[2]

            log_ratio = np.log(price_day1 / price_day3)
            log_ratios.append(log_ratio)
    result_df = pd.DataFrame(
        {
            "TradeDay": df.iloc[window_size - 1 : len(df) - (window_size - 1)][
                "TradeDay"
            ].reset_index(drop=True),
            "LogRatio": log_ratios,
        }
    )
    price_std_dev = np.std(log_ratios)
    price_mean = np.mean(log_ratios)
    return result_df, price_std_dev, price_mean


def sliding_window_avg_for_announcements(
    df, threshold_upper, threshold_lower, window_size=2, date_list=None
):
    significant_announcement = []

    for announcement_date in date_list:
        if announcement_date in df["TradeDay"].values:
            TA_index = df.index[df["TradeDay"] == announcement_date]
            window_rows = df.iloc[TA_index[0] - 1 : TA_index[0] + window_size]

            if len(window_rows) == (window_size + 1):
                price_day1 = window_rows["Price"].iloc[0]
                price_day3 = window_rows["Price"].iloc[2]
                log_ratio = np.log(price_day1 / price_day3)

                if is_significant(log_ratio, threshold_upper, threshold_lower):
                    significant_announcement.append(announcement_date)

    return significant_announcement


def get_investors_and_last_last_day_before_announcement_day(
    transaction_df, grouped_trans_df, announcement_day
):
    grouped_trans_df = grouped_trans_df.sort_values(by="TradeDay")
    announcement_day = pd.to_datetime(announcement_day)

    previous_days_filtered = grouped_trans_df[
        grouped_trans_df["TradeDay"] < announcement_day
    ].tail(4)
    previous_days = previous_days_filtered["TradeDay"]

    previous_days_transaction_df = transaction_df[
        transaction_df["TradeDay"].isin(previous_days)
    ]
    grouped_investor_trade_df = previous_days_transaction_df.groupby("InvestorID_EC")

    last_transaction_day = {}
    unique_investor = []

    for investor, group in grouped_investor_trade_df:
        last_transaction = group.sort_values(by="TradeDay", ascending=False).iloc[0]
        last_transaction_day[investor] = last_transaction["TradeDay"]
        unique_investor.append(investor)

    return unique_investor, last_transaction_day


def get_seeds_and_edges(company_name):
    transaction_df = pd.read_csv(
        "Learning_transmission_probs/Data/transaction_data.csv", sep=","
    )
    transaction_df["InvestorID_EC"] = transaction_df["InvestorID_EC"].astype(int)
    transaction_df["TradeDay"] = pd.to_datetime(
        transaction_df["TradeDay"], format="%Y%m%d", errors="coerce"
    )
    transaction_df = transaction_df.sort_values(by="TradeDay", ascending=True)

    company_data = transaction_df[transaction_df["CompanyName"] == company_name].copy()

    unique_investors = company_data["InvestorID_EC"].unique().tolist()

    company_data["Distance2Seed"] = pd.to_numeric(
        company_data["Distance2Seed"], errors="coerce"
    )
    seed_investors = (
        company_data[company_data["Distance2Seed"] == 0]["InvestorID_EC"]
        .unique()
        .tolist()
    )
    edges_df = pd.read_csv(
        "Learning_transmission_probs/Data/insiders_network_links.csv",
        sep=";"
    )
    source_investors = edges_df["source_id"].unique().tolist()

    seeds = list(set(source_investors) & set(seed_investors))

    return unique_investors, edges_df, seeds


def get_company_specific_transaction_df(
    unique_investors_from_company, unique_investors_from_graph, company_name="Nokia"
):
    transaction_df = pd.read_csv(
        f"Learning_transmission_probs/Data/transaction_data.csv", sep=","
    )
    transaction_df["InvestorID_EC"] = transaction_df["InvestorID_EC"].astype(int)

    matched_investors = set(unique_investors_from_company).intersection(
        unique_investors_from_graph
    )

    company_data = transaction_df[transaction_df["CompanyName"] == company_name]
    company_data = company_data[company_data["InvestorID_EC"].isin(matched_investors)]

    company_data["TradeDay"] = pd.to_datetime(company_data["TradeDay"], format="%Y%m%d")
    company_data = company_data.sort_values(by="TradeDay").reset_index(drop=True)

    return company_data


def get_significant_announcement_day_with_investors_traded_on_window(
    companyName, consider_all_announcements=False
):
    transaction_df = pd.read_csv(
        f"Learning_transmission_probs/Data/transaction_data.csv", sep=","
    )
    transaction_df["InvestorID_EC"] = transaction_df["InvestorID_EC"].astype(int)

    announcement_df = pd.read_csv(f"Learning_transmission_probs/Data/announcements.csv")
    companyNameDf = pd.read_csv(f"Learning_transmission_probs/Data/company_names.csv")
    uniqueCompanyFromPaper = companyNameDf["company"].tolist()

    transaction_df = transaction_df[
        transaction_df["CompanyName"].isin(uniqueCompanyFromPaper)
    ]
    transaction_grouped = (
        transaction_df.groupby("CompanyName")
        .size()
        .reset_index(name="num_transactions")
    )

    announcement_df = announcement_df[
        announcement_df["CompanyName"].isin(uniqueCompanyFromPaper)
    ]
    Company_transaction_df = transaction_df[
        transaction_df["CompanyName"] == companyName
    ].copy()
    Company_transaction_df["TradeDay"] = pd.to_datetime(
        Company_transaction_df["TradeDay"], format="%Y%m%d", errors="coerce"
    )
    Company_transaction_df = Company_transaction_df.sort_values(
        by="TradeDay", ascending=True
    )

    company_grouped_tradeday_df = Company_transaction_df.groupby(
        "TradeDay", as_index=False
    ).agg({"Price": "mean"})

    first_trade_day_of_company = company_grouped_tradeday_df["TradeDay"].min()
    last_trade_day_of_company = company_grouped_tradeday_df["TradeDay"].max()
    price_df, price_std_dev, price_mean = sliding_window_avg(
        company_grouped_tradeday_df, window_size=2
    )
    threshold_upper = price_mean + 1 * price_std_dev
    threshold_lower = price_mean - 1 * price_std_dev

    Company_announcement_df = announcement_df[
        announcement_df["CompanyName"] == companyName
    ].copy()

    Company_announcement_df["AnnouncementDate"] = pd.to_datetime(
        Company_announcement_df["AnnouncementDate"], format="%Y%m%d", errors="coerce"
    )
    Company_announcement_df = Company_announcement_df.sort_values(
        by="AnnouncementDate", ascending=True
    )
    Company_announcement_df = Company_announcement_df[
        (Company_announcement_df["AnnouncementDate"] >= first_trade_day_of_company)
        & (Company_announcement_df["AnnouncementDate"] <= last_trade_day_of_company)
    ]
    company_announcement_days = (
        Company_announcement_df["AnnouncementDate"].unique().tolist()
    )

    significant_announcement = sliding_window_avg_for_announcements(
        company_grouped_tradeday_df,
        threshold_upper,
        threshold_lower,
        window_size=2,
        date_list=company_announcement_days,
    )
    announcement_day_and_investor_on_previous_window = []
    for announcement_day in (
        company_announcement_days
        if consider_all_announcements
        else significant_announcement
    ):
        current_window_unique_investor, last_transactions_list = (
            get_investors_and_last_last_day_before_announcement_day(
                Company_transaction_df, company_grouped_tradeday_df, announcement_day
            )
        )
        announcement_day_and_investor_on_previous_window.append(
            [announcement_day, current_window_unique_investor, last_transactions_list]
        )
    announcementDayWithWindowedInvestorDf = pd.DataFrame(
        announcement_day_and_investor_on_previous_window,
        columns=["AnnouncementDay", "InvestorIDs", "LastTransaction"],
    )
    unique_investors = set(
        sum(announcementDayWithWindowedInvestorDf["InvestorIDs"], [])
    )
    return announcementDayWithWindowedInvestorDf
