
// template<typename T>
// std::tuple<T, int> max_and_count(const T* begin, const T* end){
//     int cnt = 0;
//     T max_value = std::numeric_limits<T>::min();
//     for (T* it = begin; it < end; ++it){
//         if (max_value < *it){
//             max_value = *it;
//             cnt = 0;
//         }
//         ++cnt;
//     }
//     return {max_value, cnt};
// }

template<typename Poker_t>
class CS_MCCFR{
    CS_MCCFR() = delete;
public:
    static void update_regret( const Sequence<Poker_t>& seq
                             , const uint64_t buckets[Poker_t::num_players][Poker_t::num_rounds]
                             , const type::rank_t ranks[Poker_t::num_players]
                             , double reach[Poker_t::num_players]
                             , double chance
                             , double ev[Poker_t::num_players]
                             , double cfr[Poker_t::num_players]
                             , RegretStrategy<Poker_t> strat[Poker_t::num_players]
                             )
    {

        if (seq.is_terminal()) {

            double award[Poker_t::num_players];

            if (seq.is_fold()) {

                seq.deal_fold(award);
            }
            else {

                seq.deal_showdown(ranks, award);
            }

            for(int i = 0; i<Poker_t::num_players; ++i){

                double player_minus_probability = 1;
                for(int j=0; j<Poker_t::num_players; ++j){
                    player_minus_probability *= (j == i?1:reach[j]);
                }
                ev[i] = award[i] * player_minus_probability * chance;
            }
        }
        else if (double max_reach = *std::max_element(reach, reach+Poker_t::num_players); max_reach < overall_define::epsilon){

            std::memset(ev, 0, sizeof(double)*Poker_t::num_players);
        }
        else {

            std::memset(ev, 0, sizeof(double)*Poker_t::num_players);
            
            int u = seq.get_id();
            int player = Game<Poker_t>::whose_turn[u];
            int round = Game<Poker_t>::round[u];
            int action_size = Game<Poker_t>::num_actions[u];

            /* get the probabilty tuple for each player */
            double *probability = new double [action_size];
            strat[player].get_strategy(seq, buckets[player][round], probability);

            /* first average the strategy for the player */
            // double * average_probability = strat[player].get_average_probability(seq, buckets[player][round]);
            // for(int i=0; i<action_size; ++i) {
                
            //     average_probability[i] += reach[player]*probability[i];
            // }
            for(int a=0; a<action_size; ++a){
                strat[player].accumulate_probability(seq, buckets[player][round], a, reach[player]*probability[a]);
            }

            /* now compute the regret on each of our actions */
            double old_reach = reach[player];
            double* delta_regret = new double[action_size];
            double ret_ev[Poker_t::num_players];
            for(int i=0; i<action_size; ++i) {
                    
                reach[player] = old_reach*probability[i];
                update_regret(seq.do_action(i), buckets, ranks, reach, chance, ret_ev, cfr, strat);
        
                delta_regret[i] = ret_ev[player];// todo: delta_regret[i] = ev[i] - expected;
                for (int j = 0; j < Poker_t::num_players; ++j){
                    if (j == player)
                        ev[j] += ret_ev[j]*probability[i]; //对于更新玩家i，补充\sigma_i(I,a)
                    else 
                        ev[j] += ret_ev[j]; //对于非更新玩家{-i}，\sigma_{-i}(I,a)在ev中已经包括了 ev = \pi_{-i}(h)\sum_{z}\pi(z|h)u(z)
                }
            }
        
            /* restore reachability value */
            reach[player] = old_reach;
        
            /* subtract off expectation */
            // double * regret = strat[player].get_regret(seq, buckets[player][round]);
            // for(int i=0; i<action_size; ++i) {
                    
            //     delta_regret[i] -= ev[player];
            //     regret[i]       += delta_regret[i];
            //     cfr[player]     += std::max(0., delta_regret[i]);
            // }
            for(int a=0; a<action_size; ++a) {
                    
                delta_regret[a] -= ev[player];
                strat[player].accumulate_regret(seq, buckets[player][round], a, delta_regret[a]);
                cfr[player]     += std::max(0., delta_regret[a]);
            }

            //free
            delete[] probability;
            delete[] delta_regret;
        }
    }
};