
#pragma once

#include "template/domain_independent/player_terminal_reach.hpp"
#include "template/domain_independent/strategy.hpp"
#include "template/algorithm/iteration/iteration_util.hpp"

#include <forward_list>

template<typename>
class BestResponse;

template<typename Poker>
class BestResponse{
public:
    using Poker_t = Poker;
    BestResponse();
    ~BestResponse();

    // void compute_brs(const Strategy<Poker_t>* strategies[Poker_t::num_players], int player, Strategy<Poker_t>& strat);
    double compute_br(const PlayerTerminalReach<Poker_t>* const player_terminal_reaches[Poker_t::num_players], int player, Strategy<Poker>& strat);

protected:
    void clear_();
    void init_();
    void handle_terminal_(const PlayerTerminalReach<Poker_t>* const player_terminal_reaches[Poker_t::num_players], int player, type::card_t holes[Poker_t::num_players][Poker_t::hole_len[Poker_t::num_rounds-1]], type::card_t board[Poker_t::board_len[Poker_t::num_rounds-1]]);
    void iterate_current_round_seqs_(Strategy<Poker_t>& strat, int player);
    bool update_and_still_in_current_round_();//Strategy<Poker_t>& strat, int player
    void pass_up_chance_(Strategy<Poker_t>& strat, int player);


protected:
    typedef double award_t;
    award_t** seqs_awards;//[sequence][isomorphism]
    int **br_strategy_equivalent;//[internal_sequence][isomorphism]
    int *unhandled_successor;//[Game<Poker_t>::num_total]
    std::forward_list<int> unhandled_seqs_round[Poker_t::num_rounds];
    std::vector<int> tohandle_seqs_round[Poker_t::num_rounds];
    int current_round;

protected:
    const std::array<std::forward_list<int>, Poker_t::num_rounds> seqs_result_from_last_round/*[Poker_t::num_rounds]*/ = iteration::get_seqs_result_from_last_round<Poker_t>();
};

template<typename Poker>
BestResponse<Poker>::BestResponse(){
    unhandled_successor = new int[Game<Poker_t>::num_total];

    seqs_awards = new award_t*[Game<Poker_t>::num_total+1];//Game<Poker_t>::num_total位置存的是全部evs的加权和，isomorphism == 1
    std::memset(seqs_awards, 0, sizeof(award_t*) * (Game<Poker_t>::num_total+1));

    br_strategy_equivalent = new int*[Game<Poker_t>::num_internal];
    std::memset(br_strategy_equivalent, 0, sizeof(int*) * Game<Poker_t>::num_internal);
}

template<typename Poker>
BestResponse<Poker>::~BestResponse(){
    clear_();
    delete[] seqs_awards;
    delete[] br_strategy_equivalent;
    delete[] unhandled_successor;
}

template<typename Poker>
void BestResponse<Poker>::clear_(){

    std::memset(unhandled_successor, 0, sizeof(int)*Game<Poker_t>::num_total);

    for(int i = 0; i < Game<Poker_t>::num_total+1; ++i){
        if(seqs_awards[i])
            delete[] seqs_awards[i];
    }
    std::memset(seqs_awards, 0, sizeof(award_t*)*(Game<Poker_t>::num_total+1));

    for(int i = 0; i < Game<Poker_t>::num_internal; ++i){
        if(br_strategy_equivalent[i])
            delete[] br_strategy_equivalent[i];
    }
    std::memset(br_strategy_equivalent, 0, sizeof(int*) * Game<Poker_t>::num_internal);


    for(int i = 0; i < Poker_t::num_rounds; ++i){
        unhandled_seqs_round[i].clear();
        tohandle_seqs_round[i].clear();
    }
}

template<typename Poker>
void BestResponse<Poker>::init_(){
    current_round = Poker_t::num_rounds-1;

    for(int i = 0; i<Game<Poker_t>::num_total; ++i){
        int round = Game<Poker_t>::round[i];
        unhandled_seqs_round[round].push_front(i);
    }

    std::memset(unhandled_successor, 0, sizeof(int) * Game<Poker_t>::num_total); //对所有seq都赋值0
    for(int i = 0; i<Game<Poker_t>::num_internal; ++i){ //只对前num_internal初始化动作数，后terminal个都是0
        unhandled_successor[i] = Game<Poker_t>::num_actions[i];
    }

    for(int r = Poker_t::num_rounds-1; r>=0; --r){
        for(auto bt = unhandled_seqs_round[r].before_begin(), it = std::next(bt), et = unhandled_seqs_round[r].end(); it!=et; it = std::next(bt)) {
            if(*it >= Game<Poker_t>::num_internal){
                tohandle_seqs_round[r].push_back(*it);
                unhandled_seqs_round[r].erase_after(bt); 
            }
            else{
                ++bt;
            }
        }
    }
}

template<typename Poker>
double BestResponse<Poker>::compute_br(const PlayerTerminalReach<Poker_t>* const player_terminal_reaches[Poker_t::num_players], int player, Strategy<Poker>& strat) {
    clear_();
    init_();
    strat.register_player(player);

    while(current_round >= 0) {
        // 这个位置要准备terminal的空间

        // 处理本层terminal
        iteration::dealing_upto_current_round_hands<Poker_t>(std::bind(&BestResponse<Poker>::handle_terminal_, this, player_terminal_reaches, player, std::placeholders::_1, std::placeholders::_2), current_round);
        // 

        // 在当前层不断往前回滚
        do {
            iterate_current_round_seqs_(strat, player);
        } while(update_and_still_in_current_round_());

        // 将本层chance传递到上一层
        pass_up_chance_(strat, player);
    }

    return seqs_awards[Game<Poker_t>::num_total][0];
}

template<typename Poker>
void BestResponse<Poker>::handle_terminal_(const PlayerTerminalReach<Poker_t>* const player_terminal_reaches[Poker_t::num_players], int player, type::card_t holes[Poker_t::num_players][Poker_t::hole_len[Poker_t::num_rounds-1]], type::card_t board[Poker_t::board_len[Poker_t::num_rounds-1]]) {
    Hand<Poker_t> temp_hand(holes[0], board, current_round);
    uint64_t round_buckets[Poker_t::num_players];
    round_buckets[0] = temp_hand.get_hand_isomorphism(0);
    for(int i = 1; i<Poker_t::num_players; ++i){
        temp_hand.change_hole(holes[i]);
        round_buckets[i] = temp_hand.get_hand_isomorphism(0);
    }
//////////////////////////////////////////////////////////////////
    uint64_t isomorphism_size = Hand<Poker_t>::get_isomorphism_size(current_round, 0);
    int ranks[Poker_t::num_players];
    Evaluator<Poker_t>::evaluate_ranks(holes, board, ranks);

    double awards[Poker_t::num_players];
    for(const int u : tohandle_seqs_round[current_round]){
        Sequence<Poker_t> seq(u);
        assert(seq.is_terminal());
        assert(Game<Poker_t>::round[u] == current_round);
        if(seq.is_fold()){
            seq.deal_fold(awards);
        }
        else {
            seq.deal_showdown(ranks, awards);
        }

        if(!seqs_awards[u]){
            seqs_awards[u] = new double[isomorphism_size];
            std::memset(seqs_awards[u], 0, sizeof(double) * isomorphism_size);
        }
        double pr = 1.;
        for (int i = 0; i < Poker_t::num_players; ++i){
            if(player == i)
                continue;
            pr *= player_terminal_reaches[i]->get_reach(u, round_buckets[i]);
        }

        seqs_awards[u][round_buckets[player]] += pr * awards[player];
    }
}

template<typename Poker>
void BestResponse<Poker>::iterate_current_round_seqs_(Strategy<Poker_t>& strat, int player) {

    uint64_t isomorphism_size = Hand<Poker_t>::get_isomorphism_size(current_round, 0);

    for(const int u : tohandle_seqs_round[current_round]){
        int parent = Game<Poker_t>::parent[u];

        if(parent == -1 || Game<Poker_t>::round[parent] != current_round)
            continue;

        if (player == Game<Poker_t>::whose_turn[parent]) {
            // 临时使用的策略组
            int action_size = Game<Poker_t>::num_actions[parent];
            double *temp_tuple = new double[action_size];

            if(!seqs_awards[parent]) {
                seqs_awards[parent] = new double[isomorphism_size];
                std::fill(seqs_awards[parent], seqs_awards[parent] + isomorphism_size, std::numeric_limits<double>::lowest());
            }

            if(!br_strategy_equivalent[parent]) {
                br_strategy_equivalent[parent] = new int[isomorphism_size];
                std::memset(br_strategy_equivalent[parent], 0, sizeof(int) * isomorphism_size);
            }

            for(uint64_t i = 0; i<isomorphism_size; ++i){
                if(seqs_awards[parent][i] > seqs_awards[u][i])
                    // 目标玩家的收益较小不予考虑
                    continue;

                // 遇到更大的目标玩家收益更新计数
                if(seqs_awards[parent][i] < seqs_awards[u][i]){ 
                    br_strategy_equivalent[parent][i] = 0;
                }
                // 获得之前的策略
                const double* original_tuple = strat.get_strategy(Sequence<Poker_t>(parent), i);
                if(original_tuple){
                    std::memcpy(temp_tuple, original_tuple, sizeof(double)*action_size);
                }
                else{
                    std::memset(temp_tuple, 0, sizeof(double)*action_size);
                }
                // 对策略进行逆归一化，每个目标位置应该为1.
                for(int j = 0; j<action_size; ++j)
                    temp_tuple[j] *= br_strategy_equivalent[parent][i];
                // 新的同策略位置也加1
                int action_idx = Game<Poker_t>::action_result_from_idx[u];
                temp_tuple[action_idx] += 1.;
                strat.set_strategy(Sequence<Poker_t>(parent), i, temp_tuple); // 这里会自动归一化


                ++br_strategy_equivalent[parent][i];

                seqs_awards[parent][i] = ( seqs_awards[parent][i] * (br_strategy_equivalent[parent][i] - 1)
                                            + seqs_awards[u][i] )
                                            / br_strategy_equivalent[parent][i];
            }
            delete[] temp_tuple;

            delete[] seqs_awards[u];
            seqs_awards[u] = nullptr;

            if(u < Game<Poker_t>::num_internal) {
                delete[] br_strategy_equivalent[u];
                br_strategy_equivalent[u] = nullptr;
            }
        }
        else /*不是目标玩家与计算expect_value一样*/{

            if(!seqs_awards[parent]) {
                seqs_awards[parent] = new double[isomorphism_size];
                std::memset(seqs_awards[parent], 0, sizeof(double) * isomorphism_size);
            }

            for(uint64_t i = 0; i<isomorphism_size; ++i){
                seqs_awards[parent][i] += seqs_awards[u][i];
            }

            delete[] seqs_awards[u];
            seqs_awards[u] = nullptr;
        }
    }
}

template<typename Poker>
bool BestResponse<Poker>::update_and_still_in_current_round_() {

    // 层迭代之后，迭代节点的后继就减少了
    for(const int u : tohandle_seqs_round[current_round]) {
        int parent = Game<Poker_t>::parent[u];
        if(parent != -1){
            --unhandled_successor[parent];
            assert(unhandled_successor[parent]>=0);
        }
    }    

    // 本轮次后继节点为0的点进入下次迭代
    tohandle_seqs_round[current_round].clear();
    for(auto bt = unhandled_seqs_round[current_round].before_begin(), it = std::next(bt), et = unhandled_seqs_round[current_round].end(); it!=et; it = std::next(bt)) {
        int u = *it;
        if(unhandled_successor[u] == 0){
            tohandle_seqs_round[current_round].push_back(u);
            unhandled_seqs_round[current_round].erase_after(bt); 
        }
        else{
            ++bt;
        }
    }

    if(tohandle_seqs_round[current_round].empty()){
        --current_round;
        return false;
    }
    else{
        return true;
    }
}

template<typename Poker>
void BestResponse<Poker>::pass_up_chance_(Strategy<Poker_t>& strat, int player){
    
    uint64_t current_iso_size = current_round < 0? 1: Hand<Poker_t>::get_isomorphism_size(current_round, 0);

    int next_round = current_round + 1;
    uint64_t next_iso_size = Hand<Poker_t>::get_isomorphism_size(next_round, 0);

    std::unordered_map<int, award_t*> temp_seqs_awards;

    // 先给上一层分配空间
    for(int nxt : seqs_result_from_last_round[next_round]) {

        int parent = Game<Poker_t>::parent[nxt] < 0 ? Game<Poker_t>::num_total : Game<Poker_t>::parent[nxt];

        if (parent != Game<Poker_t>::num_total && player == Game<Poker_t>::whose_turn[parent]) {

            if(!seqs_awards[parent]) {
                seqs_awards[parent] = new double[current_iso_size];
                for(uint64_t i = 0; i<current_iso_size; ++i)
                    seqs_awards[parent][i] = std::numeric_limits<double>::lowest();
            }

            if(!br_strategy_equivalent[parent]) {
                br_strategy_equivalent[parent] = new int[current_iso_size];
                std::memset(br_strategy_equivalent[parent], 0, sizeof(int) * current_iso_size);
            }

            temp_seqs_awards[nxt] = new double[current_iso_size];
            std::memset(temp_seqs_awards[nxt], 0, sizeof(double) * current_iso_size);
        }
        else {

            // 不是目标玩家，只管累计收益
            if(!seqs_awards[parent]) {
                seqs_awards[parent] = new double[current_iso_size];
                std::memset(seqs_awards[parent], 0, sizeof(double) * current_iso_size);
            }
        }
    }

    //先进行传递
    type::card_t next_hand_c[Poker_t::hand_len[Poker_t::num_rounds-1]];
    for (uint64_t next_isomorphism = 0; next_isomorphism < next_iso_size; ++next_isomorphism){

        //计算上一层的idx
        uint64_t current_isomorphism;

        if(current_round<0) {
            current_isomorphism = 0;
        }
        else {
            Hand<Poker_t>::hand_unisomorphism(next_isomorphism, next_round, 0, next_hand_c);
            Hand<Poker_t> current_hand(next_hand_c, next_hand_c + Poker_t::hole_len[next_round], current_round);
            current_isomorphism = current_hand.get_hand_isomorphism(0);
        }

        for(int nxt : seqs_result_from_last_round[next_round]) {
            int parent = Game<Poker_t>::parent[nxt] < 0 ? Game<Poker_t>::num_total : Game<Poker_t>::parent[nxt];

            if (parent != Game<Poker_t>::num_total && player == Game<Poker_t>::whose_turn[parent]) {
                // 对于上层是目标玩家的chance状态， 先在下一层进行isomorphism的对应压缩
                temp_seqs_awards[nxt][current_isomorphism] += seqs_awards[nxt][next_isomorphism];
            }
            else {
                // 对于上层是不目标玩家的chance状态，直接进行转移
                seqs_awards[parent][current_isomorphism] += seqs_awards[nxt][next_isomorphism];
            }
        }
    }

    // 再进行选择，选择只涉及目标玩家
    for (uint64_t current_isomorphism = 0; current_isomorphism < current_iso_size; ++current_isomorphism){

        for(int nxt : seqs_result_from_last_round[next_round]) {
            int parent = Game<Poker_t>::parent[nxt] < 0 ? Game<Poker_t>::num_total : Game<Poker_t>::parent[nxt];

            if (parent != Game<Poker_t>::num_total && player == Game<Poker_t>::whose_turn[parent]) {
                if(seqs_awards[parent][current_isomorphism] > temp_seqs_awards[nxt][current_isomorphism])
                    // 目标玩家的收益较小不予考虑
                    continue;
                
                // 临时使用的策略组
                int action_size = Game<Poker_t>::num_actions[parent];
                double *temp_tuple = new double[action_size];

                // 遇到更大的目标玩家收益更新计数
                if(seqs_awards[parent][current_isomorphism] < temp_seqs_awards[nxt][current_isomorphism]){
                    br_strategy_equivalent[parent][current_isomorphism] = 0;
                }
                // 获得之前的策略
                const double* original_tuple = strat.get_strategy(Sequence<Poker_t>(parent), current_isomorphism);
                if(original_tuple){
                    std::memcpy(temp_tuple, original_tuple, sizeof(double)*action_size);
                }
                else{
                    std::memset(temp_tuple, 0, sizeof(double)*action_size);
                }
                // 对策略进行逆归一化，每个目标位置应该为1.
                for(int j = 0; j<action_size; ++j)
                    temp_tuple[j] *= br_strategy_equivalent[parent][current_isomorphism];
                // 新的同策略位置也加1
                int action_idx = Game<Poker_t>::action_result_from_idx[nxt];
                temp_tuple[action_idx] += 1.;


                ++br_strategy_equivalent[parent][current_isomorphism];

                seqs_awards[parent][current_isomorphism] = ( seqs_awards[parent][current_isomorphism] * (br_strategy_equivalent[parent][current_isomorphism] - 1)
                                                                  + temp_seqs_awards[nxt][current_isomorphism] )
                                                                / br_strategy_equivalent[parent][current_isomorphism];

                strat.set_strategy(Sequence<Poker_t>(parent), current_isomorphism, temp_tuple); // 这里会自动归一化

                delete[] temp_tuple;
            }
        }
    }

    //释放下一层空间，并乘发牌权重
    for(int nxt : seqs_result_from_last_round[next_round]) {
        int parent = Game<Poker_t>::parent[nxt] < 0 ? Game<Poker_t>::num_total : Game<Poker_t>::parent[nxt];

        for(uint64_t current_isomorphism = 0; current_isomorphism < current_iso_size; ++current_isomorphism){
            seqs_awards[parent][current_isomorphism] /= Poker_t::deal_combine_num_round[next_round];
        }

        if (parent != Game<Poker_t>::num_total && player == Game<Poker_t>::whose_turn[parent]) {
            delete[] br_strategy_equivalent[nxt];
            br_strategy_equivalent[nxt] = nullptr;

            delete[] temp_seqs_awards[nxt];
        }

        delete[] seqs_awards[nxt];
        seqs_awards[nxt] = nullptr;
    }
}