use tokio;
use tokio::net::{TcpListener, TcpSocket, TcpStream};
use tokio::time::{sleep, Duration};
use std::net::{IpAddr, SocketAddr};
use futures::stream::{FuturesUnordered, StreamExt};
use tokio::io::{AsyncWriteExt, AsyncReadExt};
use serde_json::{json, from_str};
use flate2::read::GzDecoder;
use flate2::write::GzEncoder;
use flate2::Compression;

use crate::game::component::{self, *};
use rand::distributions::{WeightedIndex, Distribution, weighted};
use rocksdb::{ColumnFamilyDescriptor, DBCommon, IteratorMode, Options, SingleThreaded, DB};
use rust_optimal_transport::exact::EarthMovers;
use std::{borrow::BorrowMut, collections::{HashSet, LinkedList}};
use std::marker::PhantomData;
use num::traits::cast::ToPrimitive;
use rayon::iter::{IntoParallelRefIterator, IntoParallelIterator, IndexedParallelIterator, ParallelIterator, IntoParallelRefMutIterator};
use std::sync::{Arc, atomic::{AtomicUsize, Ordering}, Mutex};
use ndarray::prelude::*;
use rust_optimal_transport as ot;
use ot::prelude::*;
use std::io::Read;
use super::*;

pub struct KrwEmdDistributer {
    pub ips: Vec<IpAddr>,
    pub machine_id: usize,
    pub broadcast: (Vec<LinkedList<usize>>, Vec<usize>),
    pub listeners: Vec<tokio::sync::Mutex<TcpListener>>,
}
pub struct KrwEmdClusterPP<T> {
    kriso2bucket_street: Vec<Vec<usize>>,
    bucket_size_street: Vec<usize>,
    abstr_configs: Vec<AbstractAlgorithmStreet>,
    _marker: PhantomData<T>
}
#[derive(serde::Serialize, serde::Deserialize)]
struct KrwEmdDistributedWeightedKmeanspp {
    pub points : Vec<Vec<usize>>,
    pub weights: Vec<usize>,
    pub nrid2wdistnorms: Vec<Vec<Array1<f64>>>,
    pub street_weights: Vec<f64>,
}

impl KrwEmdDistributer {
    pub const BASE_PORT: u16 = 52520;
    pub const buf_size: usize = 1024;
    pub const bigbuf_size: usize = 1024*1024;

    /// KrwEmdDistributer {
    ///     ips: -, 
    ///     machine_id: -,
    ///     broadcast: -,
    ///     listeners: 这个位置BASE_PORT+offset, 需要确定offset的含义, 1) offset为machine_size首先是注册用不论是收还是发, 主要涉及广播；
    /// }
    /// 
    pub async fn new(ips: Vec<IpAddr>, machine_id: usize) -> Arc<Self> {
        let machine_size = ips.len();
        let broadcast = Self::construct_broadcast(machine_size);
        let listeners = (0..=machine_size as u16)
            .into_iter()
            .map(|port_offset| {
                let port = Self::BASE_PORT + port_offset;
                let addr: SocketAddr = SocketAddr::new(ips[machine_id], port);
                let socket = TcpSocket::new_v4().unwrap();
                socket.set_reuseaddr(true).unwrap();
                socket.bind(addr).expect(format!("绑定端口失败:{}", ips[machine_id]).as_str());
                tokio::sync::Mutex::new(socket.listen(128).unwrap())
            })
            .collect::<Vec<_>>();

        if machine_id == 0 {
            let mut ipflags: HashSet<IpAddr> = ips[1..]
                .iter()
                .cloned()
                .collect();
            let listener = &listeners[machine_size];

             // Loop until all expected IPs have connected
            while !ipflags.is_empty() {
                let (mut stream, addr) = listener.lock().await.accept().await.unwrap();
                let mut buf = vec![0; Self::buf_size];
                let n = stream.read(&mut buf).await.unwrap();
                if &buf[..n] != b"Register"{
                    println!("Unexpected message: {}, from: {}", std::str::from_utf8(&buf[..n]).unwrap(), addr);
                    stream.write_all(b"Err(Register)").await.unwrap();
                    drop(stream);
                    continue;
                }
                if ipflags.contains(&addr.ip()) {
                    stream.write_all(b"Ok(Register)").await.unwrap();
                    println!("Port {} connected from Worker {}", Self::BASE_PORT+machine_size as u16, addr);
                    ipflags.remove(&addr.ip()); // Mark this IP as connected
                } else {
                    println!("Unexpected or duplicate connection from {}", addr.ip());
                    stream.write_all(b"Err(Register)").await.unwrap();
                    // Optionally drop the stream immediately
                    drop(stream);
                }
            }
    
            println!("All workers have reported in.");
        } else {
            // Worker code to report to master
            loop {
                let master_addr: SocketAddr = SocketAddr::new(ips[0], Self::BASE_PORT+machine_size as u16);
                match TcpStream::connect(master_addr).await {
                    Ok(mut stream) => {
                        stream.write_all(b"Register").await.unwrap();
                        let mut buf = vec![0; Self::buf_size];
                        let n = stream.read(&mut buf).await.unwrap();
                        if &buf[..n] == b"Ok(Register)" {
                            // 发送数据
                            println!("Connected to master at {}", master_addr);
                        }
                        else {
                            panic!();
                        }
                        break;
                    },
                    Err(e) =>{
                        eprintln!("Failed to connect: {}. Retrying in 5 seconds...", e);
                        sleep(Duration::from_secs(5)).await;
                    }
                }
            }
        }

        Arc::new(
            Self {
                ips,
                machine_id,
                broadcast,
                listeners,
            }
        )
    }

    /// 生成节点的路由
    /// # 参数
    /// - `machine_size`: 机器总数，包括master和worker
    /// 
    /// # 返回值
    /// - `(broadcast_route: Vec<LinkedList<usize>>, broadcast_parent: Vec<usize>)`
    /// `broadcast_route` 机器的下级广播；
    /// `broadcast_parent[idx]` idx号机器是谁负责广播的
    /// 
    /// # 示例
    /// 
    /// ```
    ///     machine_size = 9;
    ///     broadcast_route: [[1, 2, 4, 8], [3, 5], [6], [7], [], [], [], [], []];
    ///     broadcast_parent: [18446744073709551615/*usize::MAX */, 0, 0, 1, 0, 1, 2, 3, 0];
    /// ```
    fn construct_broadcast(machine_size: usize) -> (Vec<LinkedList<usize>>, Vec<usize>) {
        let mut broadcast_route = vec![LinkedList::new(); machine_size];
        let mut broadcast_parent = vec![usize::MAX; machine_size];
    
        let mut num_nodes = 1; // 第一层只有一个节点 (0号)
    
        while num_nodes < machine_size {
            let num_parents = num_nodes;
    
            for i in 0..num_parents {
                if i+num_parents < machine_size {
                    broadcast_route[i].push_back(i+num_parents);
                    broadcast_parent[i+num_parents] = i;
                    num_nodes += 1;
                }
            }// 准备下一层的迭代
        }
    
        (broadcast_route, broadcast_parent)
    }

    pub async fn do_cluster<T>(self: Arc<Self>, alg_configs: &[AbstractAlgorithmStreet]) -> Option<KrwEmdClusterPP<T>> 
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {

        assert_eq!(alg_configs.len(), T::NUM_STREET as usize);
        if self.machine_id == 0 {
            let mut kriso2bucket_street = vec![];
            let mut bucket_size_street= vec![];
            for street in (0..alg_configs.len()).rev(){
                if let AbstractAlgorithmStreet::KrwEmd { recall_from, st_weights, centroid_size, train_iteration } = &alg_configs[street] {
                    let (recall_from, centroid_size, train_iteration) = (*recall_from, *centroid_size, *train_iteration);
                    assert!(recall_from <= street);
                    assert_eq!(st_weights.len(), street-recall_from+1);
                    let kriso_assign = self.clone().do_street_cluster_master::<T>(street, recall_from, st_weights.clone(), centroid_size, train_iteration).await;
                    let bucket_size = centroid_size;
                    kriso2bucket_street.insert(0, kriso_assign);
                    bucket_size_street.insert(0, bucket_size);
                }
                else if let AbstractAlgorithmStreet::Isomorphism{recall_from} = alg_configs[street] {
                    assert!(recall_from <= street);
                    let (kriso_assign, bucket_size) = load_isomorphism_abstr::<T>(street, recall_from);
                    kriso2bucket_street.insert(0, kriso_assign);
                    bucket_size_street.insert(0, bucket_size);
                }
                else if let AbstractAlgorithmStreet::Krwi{recall_from} = alg_configs[street] {
                    assert!(recall_from <= street);
                    let (kriso_assign, bucket_size) = load_krxi_abstr::<T>(street, recall_from, true);
                    kriso2bucket_street.insert(0, kriso_assign);
                    bucket_size_street.insert(0, bucket_size);
                }
                else if let AbstractAlgorithmStreet::Kroi{recall_from} = alg_configs[street] {
                    assert!(recall_from <= street);
                    let (kriso_assign, bucket_size) = load_krxi_abstr::<T>(street, recall_from, false);
                    kriso2bucket_street.insert(0, kriso_assign);
                    bucket_size_street.insert(0, bucket_size);
                }   
                else {
                    panic!("不能到这里");
                }
            }
            Some(KrwEmdClusterPP::<T>::new(                
                kriso2bucket_street,
                bucket_size_street,
                alg_configs.iter().cloned().collect::<Vec<_>>(),
            ))
        }
        else {
            for street in (0..alg_configs.len()).rev(){
                if let AbstractAlgorithmStreet::KrwEmd { recall_from, st_weights, centroid_size, train_iteration } = &alg_configs[street] {
                    let (recall_from, centroid_size, train_iteration) = (*recall_from, *centroid_size, *train_iteration);
                    assert!(recall_from <= street);
                    assert_eq!(st_weights.len(), street-recall_from+1);
                    self.clone().do_street_cluster_worker::<T>(street, recall_from, st_weights.clone(), centroid_size, train_iteration).await;
                }
            }
            None
        }
    }

    async fn send_large_data(stream: &mut TcpStream, large_data: &Vec<u8>) {
        let chunk_size = Self::bigbuf_size;

        let data_len = large_data.len() as u64;
        stream.write_all(&data_len.to_be_bytes()).await.unwrap();
        

        for chunk in large_data.chunks(chunk_size) {
            let len = chunk.len() as u64;
            stream.write_all(&len.to_be_bytes()).await.unwrap();
            stream.write_all(&chunk).await.unwrap();
        }
        stream.write_all(&0u64.to_be_bytes()).await.unwrap();
    }

    async fn stream_shake_hand(stream: &mut TcpStream, do_what: &str) {
        stream.write_all(do_what.as_bytes()).await.unwrap();
        let mut buf = vec![0; Self::buf_size];
        let n = stream.read(&mut buf).await.unwrap();
        if &buf[..n] == format!("Ok({})", do_what).as_bytes() {
            // 发送数据
            println!("{} to {}", do_what,  stream.peer_addr().unwrap());
        }
        else {
            eprintln!("Failed to {} to {}.", do_what,  stream.peer_addr().unwrap());
            panic!();
        }
    }

    async fn listener_shake_hand(self: Arc<Self>, listener_id: usize, expect_what: &str, expect_addr: IpAddr) -> (TcpStream, IpAddr) {

        let listener = self.listeners[listener_id].lock().await;

        let (mut stream, peer_addr) = loop {
            match listener.accept().await {
                Ok((mut stream, peer_addr)) => {
                    // 检查连接的 IP 地址是否在授权列表中
                    let peer_addr = peer_addr.ip();
                    if !self.ips.contains(&peer_addr) {
                        eprintln!("Unauthorized connection from {}", peer_addr);
                        stream.write_all(b"who r u? u are unauthorized").await.unwrap();
                        stream.shutdown().await.unwrap(); // 立即关闭连接
                    }
                    else {
                        break (stream, peer_addr)
                    }
                }
                Err(e) => eprintln!("Failed to accept connection: {}", e),
            }
        };
        let mut buf = vec![0; Self::buf_size];
        let n = stream.read(&mut buf).await.unwrap();
        if &buf[..n] != expect_what.as_bytes() || expect_addr != peer_addr{
            eprintln!("I wait task [{}] from {}, but come task [{}] from {}", expect_what, expect_addr, std::str::from_utf8(&buf[..n]).unwrap(), stream.peer_addr().unwrap());
            stream.write_all(format!("Err({})", expect_what).as_bytes()).await.unwrap();
            panic!();
        }
        else {
            stream.write_all(format!("Ok({})", expect_what).as_bytes()).await.unwrap();
        }

        (stream, peer_addr)
    }

    // async fn listener_shake_hand(stream: &mut TcpStream, expect_what: &str, expect_addr: IpAddr) {
    //     let mut buf = vec![0; Self::buf_size];
    //     let n = stream.read(&mut buf).await.unwrap();
    //     if &buf[..n] != expect_what.as_bytes() {
    //         eprintln!("I wait task [{}] from {}, but come task [{}] from {}", expect_what, expect_addr, std::str::from_utf8(&buf[..n]).unwrap(), stream.peer_addr().unwrap());
    //         stream.write_all(format!("Err({})", expect_what).as_bytes()).await.unwrap();
    //         panic!();
    //     }
    //     else {
    //         stream.write_all(format!("Ok({})", expect_what).as_bytes()).await.unwrap();
    //     }
    // }

    async fn receive_large_data(stream: &mut TcpStream) -> Vec<u8>{
        let chunk_size = Self::bigbuf_size;

        let mut data_len_buf = [0u8; 8];
        stream.read_exact(&mut data_len_buf).await.unwrap();
        let data_len = u64::from_be_bytes(data_len_buf) as usize;
        
        let mut large_data = Vec::with_capacity(data_len); // 初始化用于存储完整数据的向量
        
        loop {
            let mut len_buf = [0u8; 8];
            stream.read_exact(&mut len_buf).await.unwrap();
            let len = u64::from_be_bytes(len_buf) as usize;
    
            if len == 0 {
                break;  // 如果读取的长度为0，表示数据传输完成
            }
    
            let mut chunk = vec![0u8; len];
            stream.read_exact(&mut chunk).await.unwrap();
    
            // 追加数据块到完整向量
            large_data.extend(chunk);
        }

        large_data
    }

    async fn do_street_cluster_master<T>(self: Arc<Self>, street: usize, recall_from: usize, st_weight: Vec<f64>, centroids_size: usize, max_iter: usize) -> Vec<usize> 
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {
        assert_eq!(street-recall_from+1, st_weight.len());
        assert_eq!(self.machine_id, 0);
        let start = Arc::new(std::time::Instant::now());

        // 1.读数据
        // kriso2distid, krid2wtdist, nrid2wdistnorms
        println!("开始读数据 {:?}", start.elapsed());
        let (kriso2distid, distsize) = Self::load_wtdistid_with_kriso::<T>(street, recall_from);
        println!("完成load_wtdistid_with_kriso {:?}", start.elapsed());
        let krid2wtdist = Self::load_wtdist_with_id::<T>(street, recall_from);
        println!("完成load_wtdist_with_id {:?}", start.elapsed());
        let nrid2wdistnorms = Self::load_wdists_with_ids::<T>(street, recall_from);
        println!("完成load_wdists_with_ids {:?}", start.elapsed());

        // // 2.找weight
        // let temp = {
        //     let krid2weight: Arc<Vec<AtomicUsize>> = Arc::new((0..krid2wtdist.len()).into_par_iter().map(|_| AtomicUsize::new(0)).collect());
        //     println!("完成初始化krid2weight {:?}", start.elapsed());
        //     kriso2distid
        //         .par_iter()
        //         .enumerate()
        //         .for_each( |(kriso, &distid)| {
        //             let adder = T::instance().hand_index_volumn(kriso, street, recall_from);
        //             krid2weight[distid].fetch_add(adder, Ordering::Relaxed);
        //             if kriso % 1000003 == 0 { // 质数
        //                 println!("完成初始化kriso2distid 部分{},  {:?}", kriso / 1000003, start.elapsed());
        //             }
        //         });
        //     println!("完成krid2weight 打点{:?}", start.elapsed());
        //     let krid2weight: Vec<usize> = krid2weight
        //         .par_iter()
        //         .map(|x| x.load(Ordering::Relaxed))
        //         .collect();
        //     println!("完成krid2weight{:?}", start.elapsed());
        //     let checksum = krid2weight.par_iter().sum::<usize>();
        //     println!("checksum1: {}", checksum);
        //     krid2weight
        // };

        // 2.找weight
        let (kriso2distid, krid2weight) = {
            // 发partial_kriso2distid与distsize
            let kriso2distid = Arc::new(kriso2distid);
            let mut broad_tasks = Vec::new();
            let machine_size = self.ips.len();
            for machine_id in 1..machine_size{
                let (partial_kriso_begin, partial_kriso_len) = Self::local_split(kriso2distid.len(), machine_size, machine_id);
                let kriso2distid = kriso2distid.clone();
                let broad_addr: SocketAddr = SocketAddr::new(self.ips[machine_id], Self::BASE_PORT + machine_size as u16);
                let do_what = format!("Broad kriso2distid");
                let bin_parameters = bincode::serialize(&(partial_kriso_begin, partial_kriso_len, distsize)).unwrap();
                let handle = tokio::spawn(async move{
                    let partial_kriso2distid = &kriso2distid[partial_kriso_begin..{partial_kriso_begin + partial_kriso_len}];
                    let bin_partial_kriso2distid = bincode::serialize(partial_kriso2distid).unwrap();
                    let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                    encoder.write_all(&bin_partial_kriso2distid).unwrap();
                    let compressed_bin_partial_kriso2distid = encoder.finish().unwrap();

                    let mut stream = TcpStream::connect(broad_addr).await.unwrap();
                    Self::stream_shake_hand(&mut stream, do_what.as_str()).await;
                    Self::send_large_data(&mut stream, &bin_parameters).await;
                    Self::send_large_data(&mut stream, &compressed_bin_partial_kriso2distid).await;
                });
                broad_tasks.push(handle);
            }
            futures::future::join_all(broad_tasks).await;
            println!("完成广播partial_kriso2distid {:?}", start.elapsed());

            // 打点
            let (partial_kriso_begin, partial_kriso_len) = Self::local_split(kriso2distid.len(), machine_size, self.machine_id);
            let partial_kriso2distid = &kriso2distid[partial_kriso_begin..{partial_kriso_begin+partial_kriso_len}];
            let krid2weight: Arc<Vec<AtomicUsize>> = Arc::new((0..distsize).into_par_iter().map(|_| AtomicUsize::new(0)).collect());
            println!("完成初始化krid2weight {:?}", start.elapsed());
            partial_kriso2distid
                .par_iter()
                .enumerate()
                .for_each( |(partial_kriso, &distid)| {
                    let kriso = partial_kriso + partial_kriso_begin;
                    let adder = T::instance().hand_index_volumn(kriso, street, recall_from);
                    krid2weight[distid].fetch_add(adder, Ordering::Relaxed);
                    if kriso % 1000003 == 0 { // 质数
                        println!("完成kriso2distid打点 部分{},  {:?}", kriso / 1000003, start.elapsed());
                    }
                });
            println!("完成krid2weight 打点{:?}", start.elapsed());
            let krid2weight: Vec<usize> = krid2weight
                .par_iter()
                .map(|x| x.load(Ordering::Relaxed))
                .collect();
            println!("完成krid2weight{:?}", start.elapsed());
            
            // reduce
            let krid2weight = self.clone().reduce_krid2weight(krid2weight).await;
            println!("完成reduce_krid2weight{:?}", start.elapsed());

            let kriso2distid = Arc::try_unwrap(kriso2distid).unwrap();
            (kriso2distid, krid2weight)
        };
        let checksum = krid2weight.par_iter().sum::<usize>();
        println!("checksum2: {}", checksum);
        // assert_eq!(temp, krid2weight);

        // 3.构造kmeans并转发
        let kmeans = KrwEmdDistributedWeightedKmeanspp {
            points: krid2wtdist,
            weights: krid2weight,
            nrid2wdistnorms,
            street_weights: st_weight.clone()
        };
        println!("完成kmeans 构造{:?}", start.elapsed());
        {
            let bin_kmeans = bincode::serialize(&kmeans).unwrap();
            println!("bin_kmeans size original: {}", bin_kmeans.len());
            let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
            encoder.write_all(&bin_kmeans).unwrap();
            let compressed_data = encoder.finish().unwrap();
            println!("bin_kmeans size compressed: {}", compressed_data.len());
            
            let mut connections = FuturesUnordered::new();
            let machine_size = self.clone().ips.len();
            self.broadcast.0[self.machine_id]
                .iter()
                .enumerate()
                .for_each(|(list_idx, &child_idx)|{
                    let child_ip = &self.ips[child_idx];
                    let child_addr: SocketAddr = SocketAddr::new(self.ips[child_idx], Self::BASE_PORT + machine_size as u16);
                    let delay = tokio::time::Duration::from_millis(list_idx as u64 * 10);
                    connections.push(async move{
                        tokio::time::sleep(delay).await;
                        TcpStream::connect(child_addr).await
                    });
                });

            let do_what = format!("Send Kmeans[street:{}, recall_from:{}]", street, recall_from);
            while let Some(mut stream) =  connections.next().await {
                let mut stream = stream.unwrap();
                Self::stream_shake_hand(&mut stream, do_what.as_str()).await;
                Self::send_large_data(&mut stream, &compressed_data).await;
            }
        }
        println!("完成kmeans广播{:?}", start.elapsed());

        // 4. kmeanspp
        let dist_assign = self.krwemd_kmeanspp_process_master(kmeans, centroids_size, max_iter).await;
    
        // 5. dist_assign -> kriso_assign
        let kriso_size = T::instance().hand_isomorphism_size_street(street, recall_from);
        let kriso_assign = (0..kriso_size)
            .into_par_iter()
            .map(|kriso| {
                let krid = kriso2distid[kriso];
                let bucket = dist_assign[krid];
                bucket
            })
            .collect::<Vec<_>>();

        kriso_assign
    }

    async fn do_street_cluster_worker<T>(self: Arc<Self>, street:usize, recall_from:usize, st_weight: Vec<f64>, centroids_size:usize, max_iter:usize) 
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {
        assert_eq!(street-recall_from+1, st_weight.len());
        assert!(self.machine_id != 0);
        let start = Arc::new(std::time::Instant::now());

        // 0.找weight
        let krid2weight = {
            // 接partial_kriso2distid与distsize
            let expect_addr: IpAddr = self.ips[0];
            let machine_size = self.ips.len();
            let do_what = format!("Broad kriso2distid");
            let (mut stream, parent_addr) = self.clone().listener_shake_hand(machine_size, do_what.as_str(), expect_addr).await;
            println!("完成建立连接{:?}", start.elapsed());
            let bin_parameters = Self::receive_large_data(&mut stream).await;
            let (partial_kriso_begin, partial_kriso_len, distsize): (usize, usize, usize) = bincode::deserialize(&bin_parameters).unwrap();
            let compressed_bin_partial_kriso2distid = Self::receive_large_data(&mut stream).await;
            println!("完成接收compressed_bin_partial_kriso2distid{:?}", start.elapsed());
            let mut decoder = GzDecoder::new(&compressed_bin_partial_kriso2distid[..]);
            let mut bin_partial_kriso2distid = Vec::<u8>::new();
            decoder.read_to_end(&mut bin_partial_kriso2distid).unwrap();
            println!("完成解压compressed_bin_partial_kriso2distid{:?}", start.elapsed());
            let partial_kriso2distid: Vec<usize> = bincode::deserialize(&bin_partial_kriso2distid).unwrap();
            println!("完成反序列化partial_kriso2distid{:?}", start.elapsed());

            // 打点
            let krid2weight: Arc<Vec<AtomicUsize>> = Arc::new((0..distsize).into_par_iter().map(|_| AtomicUsize::new(0)).collect());
            println!("完成初始化krid2weight {:?}", start.elapsed());
            partial_kriso2distid
                .par_iter()
                .enumerate()
                .for_each( |(partial_kriso, &distid)| {
                    let kriso = partial_kriso + partial_kriso_begin;
                    let adder = T::instance().hand_index_volumn(kriso, street, recall_from);
                    krid2weight[distid].fetch_add(adder, Ordering::Relaxed);
                    if kriso % 1000003 == 0 { // 质数
                        println!("完成初始化kriso2distid 部分{},  {:?}", kriso / 1000003, start.elapsed());
                    }
                });
            println!("完成krid2weight 打点{:?}", start.elapsed());
            let krid2weight: Vec<usize> = krid2weight
                .par_iter()
                .map(|x| x.load(Ordering::Relaxed))
                .collect();
            println!("完成krid2weight{:?}", start.elapsed());

            // reduce
            self.clone().reduce_krid2weight(krid2weight).await         
        };
        println!("完成reduce_krid2weight{:?}", start.elapsed());
        let checksum = krid2weight.par_iter().sum::<usize>();
        println!("checksum: {}", checksum);

        // 1. 从父节点读kmeans，并向下广播
        // let listener = &self.listeners[0];
        let machine_size = self.ips.len();
        let parent_id = self.broadcast.1[self.machine_id];
        let kmeans: KrwEmdDistributedWeightedKmeanspp = {
            let do_what = format!("Send Kmeans[street:{}, recall_from:{}]", street, recall_from);
            // 建立父连接
            let expect_addr: IpAddr = self.ips[parent_id];
            let (mut stream, parent_addr) = self.clone().listener_shake_hand(machine_size, do_what.as_str(), expect_addr).await;
            
            // 接收压缩后的kmeans，并继续传递
            let machine_size = self.ips.len();
            let compressed_data = Self::receive_large_data(&mut stream).await;
            println!("完成kmeans 序列化数据的接收{:?}", start.elapsed());
            let mut connections = FuturesUnordered::new();
            self.broadcast.0[self.machine_id]
                .iter()
                .enumerate()
                .for_each(|(list_idx, &child_idx)|{
                    let child_addr: SocketAddr = SocketAddr::new(self.ips[child_idx], Self::BASE_PORT + machine_size as u16);
                    let delay = tokio::time::Duration::from_millis(list_idx as u64 * 10);
                    connections.push(async move{
                        tokio::time::sleep(delay).await;
                        TcpStream::connect(child_addr).await
                    });
                });

            while let Some(mut stream) =  connections.next().await {
                let mut stream = stream.unwrap();
                Self::stream_shake_hand(&mut stream, do_what.as_str()).await;
                Self::send_large_data(&mut stream, &compressed_data).await;
            }
            println!("完成kmeans 序列化数据的继续广播{:?}", start.elapsed());

            // 恢复成kmeans
            let mut decoder = GzDecoder::new(&compressed_data[..]);
            let mut bin_kmeans:Vec<u8>= Vec::new();
            decoder.read_to_end(&mut bin_kmeans).unwrap();
            println!("完成kmeans 序列化数据的解压缩广播{:?}", start.elapsed());
            bincode::deserialize(&bin_kmeans).expect("反序列化kmeans数据失败")
        };
        println!("完成kmeans 反序列化数据{:?}", start.elapsed());

        // 4. kmeanspp
        self.krwemd_kmeanspp_process_worker(kmeans, centroids_size, max_iter).await;

    }

    fn local_split(total_size: usize, split_size: usize, split_id: usize) -> (usize, usize){
        let nonfirst_size = (total_size + split_size -1) / split_size;
        if split_id == 0{
            let local_size = total_size - nonfirst_size * (split_size-1);
            let idx_begin = 0;
            (idx_begin, local_size)
        } else {
            let local_size = nonfirst_size;
            let idx_begin = total_size - nonfirst_size * (split_size-split_id);
            (idx_begin, local_size)
        }
    }

    // fn local_split(total_size: usize, split_size: usize, split_id: usize) -> (usize, usize){
    //     let local_size = (total_size + split_size -1) / split_size;
    //     let idx_begin = split_id*local_size;
    //     let real_local_size = if idx_begin + local_size > total_size {
    //         total_size-idx_begin
    //     } else {
    //         local_size
    //     };
    //     (idx_begin, real_local_size)
    // }

    fn average_points_to_centroid(krwemd_kmeanspp: &KrwEmdDistributedWeightedKmeanspp, pntidcs: &[usize]) -> Vec<Array1<f64>>{
        let pnts: &[Vec<usize>] = &krwemd_kmeanspp.points;
        let w8s: &[usize] = &krwemd_kmeanspp.weights;
        let nrid2wdistnorms: &[Vec<Array1<f64>>] = &krwemd_kmeanspp.nrid2wdistnorms;
        let stage_size = pnts[0].len();
        let weight_sum = pntidcs.par_iter().map(|&pntidx| w8s[pntidx]).sum::<usize>().to_f64().unwrap();

        let norm_centroid = pntidcs
            .par_iter()
            .map(|&pntidx| {
                let mut weightednorm_pnt = Vec::with_capacity(stage_size);
                pnts[pntidx]
                    .iter()
                    .enumerate()
                    .for_each(|(stidx, &st_nrid)|{
                        weightednorm_pnt.push(&(nrid2wdistnorms[stidx][st_nrid]) * (w8s[pntidx].to_f64().unwrap() / weight_sum));
                    });
                weightednorm_pnt
            })
            .reduce( || vec![Array1::<f64>::zeros(3); stage_size], |mut a, b|{
                a.iter_mut()
                    .zip(b.iter())
                    .for_each(|(ast_dist, bst_dist)| {
                        *ast_dist += &(*bst_dist)
                    });
                a
            });

        norm_centroid
            .iter()
            .for_each(|ndarr| {
                assert!((ndarr.sum() - 1_f64).abs() < 1e-10, "(ndarr.sum() - 1_f64).abs() = {}", (ndarr.sum() - 1_f64).abs());
            });

        norm_centroid
    }

    fn heuristic_distance_batch(krwemd_kmeanspp: &KrwEmdDistributedWeightedKmeanspp, pntidcs: &[usize], ctrd: &Vec<Array1<f64>>) -> Vec<f64> {
        let pnts: &[Vec<usize>] = &krwemd_kmeanspp.points;
        let st_weight = &krwemd_kmeanspp.street_weights;
        let nrid2wdistnorms: &[Vec<Array1<f64>>] = &krwemd_kmeanspp.nrid2wdistnorms;

        let nrid2dises = nrid2wdistnorms
            .iter()
            .enumerate()
            .map(|(stidx, st_nrid2wdistnorms)| {
                let st_ctrd_distnorm: Array1<f64> = ctrd[stidx].clone();
                let st_nrid2dis = st_nrid2wdistnorms
                    .par_iter()
                    .map(|st_distnorm| {
                        let squared_diffs = *&st_distnorm - &st_ctrd_distnorm;
                        let weighted_sums = 2.0 * squared_diffs[0].powf(2.) as f64 + squared_diffs[1].powf(2.) as f64 + 2.0 * squared_diffs[2].powf(2.) as f64;
                        let result = f64::sqrt(weighted_sums);
                        result
                    })
                    .collect::<Vec<_>>();
                st_nrid2dis
            })
            .collect::<Vec<_>>();

        pntidcs
            .par_iter()
            .map(|&pntidx| {
                let k_recall_winrate_distance = pnts[pntidx]
                    .iter()
                    .enumerate()
                    .map(|(stidx, &st_nrid)| {
                        nrid2dises[stidx][st_nrid] * st_weight[stidx]
                    })
                    .sum::<f64>();
                k_recall_winrate_distance
            })
            .collect::<Vec<f64>>()
    }

    fn distance_batch(krwemd_kmeanspp: &KrwEmdDistributedWeightedKmeanspp, pntidcs: &[usize], ctrd: &Vec<Array1<f64>>) -> Vec<f64> {
        let pnts: &[Vec<usize>] = &krwemd_kmeanspp.points;
        let st_weight = &krwemd_kmeanspp.street_weights;
        let nrid2wdistnorms: &[Vec<Array1<f64>>] = &krwemd_kmeanspp.nrid2wdistnorms;
        let ldw_cost: Array2<f64> = ndarray::array![[0., 1., 2.],[1., 0., 1.],[2., 1., 0.]];

        let nrid2dises = nrid2wdistnorms
            .iter()
            .enumerate()
            .map(|(stidx, st_nrid2wdistnorms)| {
                let st_nrid2dis = st_nrid2wdistnorms
                    .par_iter()
                    .cloned()
                    .map(|mut st_distnorm| {
                        let mut st_ctrd_distnorm = ctrd[stidx].clone();
                        let mut mut_ldw_cost =  ldw_cost.clone();
                        let transport = EarthMovers::new(&mut st_distnorm, &mut st_ctrd_distnorm, &mut mut_ldw_cost).solve().unwrap();
                        (&transport * &ldw_cost).sum()
                    })
                    .collect::<Vec<_>>();
                st_nrid2dis
            })
            .collect::<Vec<_>>();

        pntidcs
            .par_iter()
            .map(|&pntidx| {
                let k_recall_winrate_distance = pnts[pntidx]
                    .iter()
                    .enumerate()
                    .map(|(stidx, &st_nrid)| {
                        nrid2dises[stidx][st_nrid] * st_weight[stidx]
                    })
                    .sum::<f64>();
                k_recall_winrate_distance
            })
            .collect::<Vec<f64>>()
    }

    async fn krwemd_kmeanspp_process_master(self: Arc<Self>, krwemd_kmeanspp: KrwEmdDistributedWeightedKmeanspp, centroids_size: usize, max_iter: usize) -> Vec<usize>{
        // # assist
        let machine_size = self.ips.len();
        let (local_ctrdidx_begin, local_centroids_size) = Self::local_split(centroids_size, machine_size, self.machine_id);
        let points_len = krwemd_kmeanspp.points.len();
        let total_pntidcs: Vec<usize> = (0..points_len).collect();

        // # Kmeanspp的pp，与单中心计算不同的是，每个分部会抽出相应的初始centroid
        let centroids = {
            let (partial_pntidx_begin, partial_points_len) = Self::local_split(points_len, machine_size, self.machine_id);
            let partial_pntidcs: Vec<usize> = (partial_pntidx_begin..{partial_pntidx_begin+partial_points_len}).collect();
            let mut partial_assign: Vec<usize> = (0..partial_points_len).into_par_iter().map(|_| centroids_size).collect();
            let mut partial_expmindis: Vec<f64> = (0..partial_points_len).into_par_iter().map(|_| f64::MAX / (partial_points_len+1) as f64).collect();
            let mut partial_weighted_distribution = WeightedIndex::new(&partial_expmindis).expect("概率抽样设置失败");
            let mut rng = rand::thread_rng();
            let mut local_centroid_pntidcs:Vec<Arc<Mutex<Vec<usize>>>> = (0..local_centroids_size).into_par_iter().map(|_| Arc::new(Mutex::new(vec![]))).collect();
            // 需要传的字段
            let mut local_centroids: Vec<Vec<Array1<f64>>> = vec![];

            for local_ctrdidx in 0..local_centroids_size {
                let ctrdidx = local_ctrdidx + local_ctrdidx_begin;
                
                // 找点
                let new_local_centroid_partial_pntidx = partial_weighted_distribution.sample(&mut rng);
                let new_local_centroid_pntidx = new_local_centroid_partial_pntidx + partial_pntidx_begin;
                partial_assign[new_local_centroid_partial_pntidx] = ctrdidx;
                partial_expmindis[new_local_centroid_partial_pntidx] = 0.0; // 以免distance(a, a) != 0的情况
                local_centroid_pntidcs[local_ctrdidx].lock().unwrap().push(new_local_centroid_pntidx);

                // 构建centroid
                let new_centorid = Self::average_points_to_centroid(&krwemd_kmeanspp, local_centroid_pntidcs[local_ctrdidx].lock().unwrap().as_ref());
                local_centroids.push(new_centorid);

                // 更新最小指数距离
                Self::distance_batch(&krwemd_kmeanspp, &partial_pntidcs, local_centroids.last().unwrap())
                    .into_par_iter()
                    .zip(partial_expmindis.par_iter_mut())
                    .for_each(|(new_dis, expdis)| {
                        let new_expdis =  f64::exp(new_dis) - 1.0;
                        *expdis = expdis.min(new_expdis);
                    });
                
                // 更新抽样
                partial_weighted_distribution = WeightedIndex::new(&partial_expmindis).expect("概率抽样设置失败");
            }

            self.clone().reduce_pp(local_centroids).await
        };

        // cluster
        let mut is_conv = false;
        let assign = {
            let mut assign: Vec<usize> = (0..points_len).into_par_iter().map(|_| centroids_size).collect();
            let mut local_centroids: Vec<Vec<Array1<f64>>> = centroids.iter()
                .skip(local_ctrdidx_begin).take(local_centroids_size)
                .cloned().collect();
            let mut local_centroid_pntidcs:Vec<Arc<Mutex<Vec<usize>>>> = (0..local_centroids_size).into_par_iter().map(|_| Arc::new(Mutex::new(vec![]))).collect();
            for itercnt in 0..max_iter {
                let mut new_assign: Vec<usize> = (0..points_len).into_par_iter().map(|_| centroids_size).collect();
                let mut mindis: Vec<f64> = (0..points_len).into_par_iter().map(|_| f64::MAX).collect();

                // point new_assign to centroid
                local_centroids
                    .iter()
                    .enumerate()
                    .for_each(|(local_ctrdidx, centroid)| {
                        Self::distance_batch(&krwemd_kmeanspp, total_pntidcs.as_ref(), centroid)
                            .into_par_iter()
                            .zip(mindis.par_iter_mut())
                            .zip(new_assign.par_iter_mut())
                            .for_each(|((newdis, dis), point2centroid)| {
                                if newdis < *dis {
                                    *dis = newdis;
                                    *point2centroid = local_ctrdidx + local_ctrdidx_begin;
                                }
                            });
                        println!("finish centroid {} batch distance computing in iter {}", local_ctrdidx, itercnt);    
                    });
                let mut new_assign = self.clone().reduce_cluster(new_assign, mindis).await;

                // update centroid
                local_centroid_pntidcs
                    .par_iter_mut()
                    .for_each(|v| v.lock().unwrap().clear());
                new_assign
                    .par_iter()
                    .enumerate()
                    .for_each(|(pntidx, &ctrdidx)| {
                        if (ctrdidx >= local_ctrdidx_begin) && (ctrdidx < local_ctrdidx_begin + local_centroids_size) {
                            let local_ctrdidx = ctrdidx - local_ctrdidx_begin;
                            local_centroid_pntidcs[local_ctrdidx].lock().unwrap().push(pntidx);
                        }
                    });
                local_centroids
                    .par_iter_mut()
                    .enumerate()
                    .for_each(|(local_ctrdidx, centroid)| {
                        *centroid = Self::average_points_to_centroid(&krwemd_kmeanspp, local_centroid_pntidcs[local_ctrdidx].lock().unwrap().as_ref());
                    });

                // is_conv
                is_conv = new_assign.par_iter().zip(assign.par_iter()).all(|(a,b)| a == b);
                if is_conv {
                    println!("update iter:{:?}--is_conv", itercnt);
                    break;
                }
                else if itercnt%1 == 0 {
                // else {
                    println!("have excute {} iteration", itercnt);
                }
                assign.swap_with_slice(&mut new_assign);
            }
            assign
        };

        if !is_conv {
            println!("is not converge until {} iteration", max_iter);
        }

        assign
    }

    async fn krwemd_kmeanspp_process_worker(self: Arc<Self>, krwemd_kmeanspp: KrwEmdDistributedWeightedKmeanspp, centroids_size: usize, max_iter: usize){
        // # assist
        let machine_size = self.ips.len();
        let (local_ctrdidx_begin, local_centroids_size) = Self::local_split(centroids_size, machine_size, self.machine_id);
        let points_len = krwemd_kmeanspp.points.len();
        let total_pntidcs: Vec<usize> = (0..points_len).collect();

        // # Kmeanspp的pp，与单中心计算不同的是，每个分部会抽出相应的初始centroid
        let centroids = {
            let (partial_pntidx_begin, partial_points_len) = Self::local_split(points_len, machine_size, self.machine_id);
            let partial_pntidcs: Vec<usize> = (partial_pntidx_begin..{partial_pntidx_begin+partial_points_len}).collect();
            let mut partial_assign: Vec<usize> = (0..partial_points_len).into_par_iter().map(|_| centroids_size).collect();
            let mut partial_expmindis: Vec<f64> = (0..partial_points_len).into_par_iter().map(|_| f64::MAX/ (partial_points_len+1) as f64).collect();
            let mut partial_weighted_distribution = WeightedIndex::new(&partial_expmindis).expect("概率抽样设置失败");
            let mut rng = rand::thread_rng();
            let mut local_centroid_pntidcs:Vec<Arc<Mutex<Vec<usize>>>> = (0..local_centroids_size).into_par_iter().map(|_| Arc::new(Mutex::new(vec![]))).collect();
            // 需要传的字段
            let mut local_centroids: Vec<Vec<Array1<f64>>> = vec![];

            for local_ctrdidx in 0..local_centroids_size {
                let ctrdidx = local_ctrdidx + local_ctrdidx_begin;
                
                // 找点
                let new_local_centroid_partial_pntidx = partial_weighted_distribution.sample(&mut rng);
                let new_local_centroid_pntidx = new_local_centroid_partial_pntidx + partial_pntidx_begin;
                partial_assign[new_local_centroid_partial_pntidx] = ctrdidx;
                partial_expmindis[new_local_centroid_partial_pntidx] = 0.0; // 以免distance(a, a) != 0的情况
                local_centroid_pntidcs[local_ctrdidx].lock().unwrap().push(new_local_centroid_pntidx);

                // 构建centroid
                let new_centorid = Self::average_points_to_centroid(&krwemd_kmeanspp, local_centroid_pntidcs[local_ctrdidx].lock().unwrap().as_ref());
                local_centroids.push(new_centorid);

                // 更新最小指数距离
                Self::distance_batch(&krwemd_kmeanspp, &partial_pntidcs, local_centroids.last().unwrap())
                    .into_par_iter()
                    .zip(partial_expmindis.par_iter_mut())
                    .for_each(|(new_dis, expdis)| {
                        let new_expdis =  f64::exp(new_dis) - 1.0;
                        *expdis = expdis.min(new_expdis);
                    });

                // 更新抽样
                partial_weighted_distribution = WeightedIndex::new(&partial_expmindis).expect("概率抽样设置失败");
            }

            self.clone().reduce_pp(local_centroids).await
        };

        // cluster
        let mut is_conv = false;
        {
            let mut assign: Vec<usize> = (0..points_len).into_par_iter().map(|_| centroids_size).collect();
            let mut local_centroids: Vec<Vec<Array1<f64>>> = centroids.iter()
                .skip(local_ctrdidx_begin).take(local_centroids_size)
                .cloned().collect();
            let mut local_centroid_pntidcs:Vec<Arc<Mutex<Vec<usize>>>> = (0..local_centroids_size).into_par_iter().map(|_| Arc::new(Mutex::new(vec![]))).collect();
            for itercnt in 0..max_iter {
                let mut new_assign: Vec<usize> = (0..points_len).into_par_iter().map(|_| centroids_size).collect();
                let mut mindis: Vec<f64> = (0..points_len).into_par_iter().map(|_| f64::MAX).collect();

                // point new_assign to centroid
                local_centroids
                    .iter()
                    .enumerate()
                    .for_each(|(local_ctrdidx, centroid)| {
                        Self::distance_batch(&krwemd_kmeanspp, total_pntidcs.as_ref(), centroid)
                            .into_par_iter()
                            .zip(mindis.par_iter_mut())
                            .zip(new_assign.par_iter_mut())
                            .for_each(|((newdis, dis), point2centroid)| {
                                if newdis < *dis {
                                    *dis = newdis;
                                    *point2centroid = local_ctrdidx + local_ctrdidx_begin;
                                }
                            });
                        println!("finish centroid {} batch distance computing in iter {}", local_ctrdidx, itercnt);    
                    });
                let mut new_assign = self.clone().reduce_cluster(new_assign, mindis).await;

                // update centroid
                local_centroid_pntidcs
                    .par_iter_mut()
                    .for_each(|v| v.lock().unwrap().clear());
                new_assign
                    .par_iter()
                    .enumerate()
                    .for_each(|(pntidx, &ctrdidx)| {
                        if (ctrdidx >= local_ctrdidx_begin) && (ctrdidx < local_ctrdidx_begin + local_centroids_size) {
                            let local_ctrdidx = ctrdidx - local_ctrdidx_begin;
                            local_centroid_pntidcs[local_ctrdidx].lock().unwrap().push(pntidx);
                        }
                    });
                local_centroids
                    .par_iter_mut()
                    .enumerate()
                    .for_each(|(local_ctrdidx, centroid)| {
                        *centroid = Self::average_points_to_centroid(&krwemd_kmeanspp, local_centroid_pntidcs[local_ctrdidx].lock().unwrap().as_ref());
                    });

                // is_conv
                is_conv = new_assign.par_iter().zip(assign.par_iter()).all(|(a,b)| a == b);
                if is_conv {
                    println!("update iter:{:?}--is_conv", itercnt);
                    break;
                }
                else if itercnt%1 == 0 {
                // else {
                    println!("have excute {} iteration", itercnt);
                }
                assign.swap_with_slice(&mut new_assign);
            }
        }

        if !is_conv {
            println!("is not converge until {} iteration", max_iter);
        }

    }

    /// 环循环传递局部产生的centroids和构建这个centroid的点的序号（全局）并把他们拼接到一起
    async fn reduce_pp(self: Arc<Self>, local_centroids: Vec<Vec<Array1<f64>>>) -> Vec<Vec<Array1<f64>>>{
        let machine_size = self.ips.len();       
        let mut circle_tasks = Vec::new();
        
        // 本机驱动的任务 先发，后接
        {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = self.machine_id;
            let do_what = format!("Circle initial centroids");
            let self_clone = self.clone();
            let handle = tokio::spawn(async move {
                // 先发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT+machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what.as_str()).await;
                let bin_local_centroids = bincode::serialize(&local_centroids).unwrap();
                Self::send_large_data(&mut stream, &bin_local_centroids).await;
                drop(stream);

                // 后接
                let expect_addr: IpAddr = self_clone.ips[prev_machine_id];
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what.as_str(), expect_addr).await;
                let bin_local_centroids = Self::receive_large_data(&mut stream).await;
                let local_centroids: Vec<Vec<Array1<f64>>> = bincode::deserialize(&bin_local_centroids).unwrap();
                drop(stream);
                
                local_centroids
            });
            circle_tasks.push(handle);
        }

        // peer驱动的任务，先接，后发
        for machine_offset in 1..machine_size {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = (self.machine_id + machine_offset) % machine_size;
            let do_what = format!("Circle initial centroids");
            let self_clone = self.clone();
            let handle = tokio::spawn(async move {
                // 先接
                let expect_addr: IpAddr = self_clone.ips[prev_machine_id];
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what.as_str(), expect_addr).await;
                let bin_local_centroids = Self::receive_large_data(&mut stream).await;
                let local_centroids: Vec<Vec<Array1<f64>>> = bincode::deserialize(&bin_local_centroids).unwrap();
                drop(stream);

                // 后发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT+machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what.as_str()).await;
                let bin_local_centroids = bincode::serialize(&local_centroids).unwrap();
                Self::send_large_data(&mut stream, &bin_local_centroids).await;
                drop(stream);

                local_centroids
            });
            circle_tasks.push(handle);
        }
        let mut results = futures::future::join_all(circle_tasks).await;
        results.rotate_right(self.machine_id);

        // Combine results from all machines
        let mut centroids: Vec<Vec<Array1<f64>>> = Vec::new();

        for result in results.into_iter() {
            match result {
                Ok(local_centroids) => {
                    centroids.extend(local_centroids);
                },
                Err(error) => {
                    // Handle the error (e.g., log it, return an error)
                    eprintln!("Error during communication: {}", error);
                    panic!();
                }
            }
        }

        centroids
    }

    async fn reduce_cluster(self: Arc<Self>, new_assign: Vec<usize>, mindis: Vec<f64>) -> Vec<usize>{
        let machine_size = self.ips.len();
        let points_len = new_assign.len();
        assert_eq!(points_len, mindis.len());       
        let mut circle_tasks = Vec::new();

        // 本机驱动的任务 先发，后接，再发
        {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = self.machine_id;
            let do_what1 = format!("Circle cluster");
            let do_what2 = format!("Circle cluster verified");
            let self_clone = self.clone();
            let (partial_pntidx_begin, partial_points_len) = Self::local_split(points_len, machine_size, machine_id);
            let mut partial_new_assign = new_assign.iter()
                .skip(partial_pntidx_begin).take(partial_points_len)
                .cloned().collect::<Vec<_>>();
            let mut partial_mindis = mindis.iter()
                .skip(partial_pntidx_begin).take(partial_points_len)
                .cloned().collect::<Vec<_>>();
            let handle = tokio::spawn(async move {
                // 先发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT + machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what1.as_str()).await;
                let bin_partial_new_assign = bincode::serialize(&partial_new_assign).unwrap();
                println!("bin_partial_new_assign size original: {}", bin_partial_new_assign.len());
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_new_assign).unwrap();
                let compressed_bin_partial_new_assign = encoder.finish().unwrap();
                println!("bin_partial_new_assign size compressed: {}", compressed_bin_partial_new_assign.len());
                Self::send_large_data(&mut stream, &compressed_bin_partial_new_assign).await;
                let bin_partial_mindis = bincode::serialize(&partial_mindis).unwrap();
                println!("bin_partial_mindis size: {}", bin_partial_mindis.len());
                Self::send_large_data(&mut stream, &bin_partial_mindis).await;
                drop(stream);

                // 后接
                let expect_addr: IpAddr = self_clone.ips[prev_machine_id];
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what1.as_str(), expect_addr).await;
                let compressed_bin_partial_new_assign = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_new_assign[..]);
                let mut bin_partial_new_assign = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_new_assign).unwrap();
                let circled_partial_new_assign: Vec<usize> = bincode::deserialize(&bin_partial_new_assign).unwrap();
                let bin_partial_mindis = Self::receive_large_data(&mut stream).await;
                let circled_partial_mindis: Vec<f64> = bincode::deserialize(&bin_partial_mindis).unwrap();
                drop(stream);
                
                partial_new_assign.par_iter_mut().zip(partial_mindis.par_iter_mut())
                    .zip(circled_partial_new_assign.par_iter().zip(circled_partial_mindis.par_iter()))
                    .for_each(|((pnt2ctrd, newdis), (pnt2cclctrd, cclnewdis))|{
                        if *cclnewdis < *newdis {
                            *newdis = *cclnewdis;
                            *pnt2ctrd = *pnt2cclctrd;
                        }
                    });
                
                // 发先
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what2.as_str()).await;
                let bin_partial_new_assign = bincode::serialize(&partial_new_assign).unwrap();
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_new_assign).unwrap();
                let compressed_bin_partial_new_assign = encoder.finish().unwrap();
                Self::send_large_data(&mut stream, &compressed_bin_partial_new_assign).await;
                drop(stream);

                // 接后
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what2.as_str(), expect_addr).await;
                let compressed_bin_partial_new_assign = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_new_assign[..]);
                let mut bin_partial_new_assign = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_new_assign).unwrap();
                let partial_new_assign: Vec<usize> = bincode::deserialize(&bin_partial_new_assign).unwrap();
                drop(stream);

                partial_new_assign
            });
            circle_tasks.push(handle);
        }

        // peer驱动的任务 先接，后发，再接
        for machine_offset in 1..machine_size {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = (self.machine_id + machine_offset) % machine_size;
            let do_what1 = format!("Circle cluster");
            let do_what2 = format!("Circle cluster verified");
            let self_clone = self.clone();
            let (partial_pntidx_begin, partial_points_len) = Self::local_split(points_len, machine_size, machine_id);
            let mut partial_new_assign = new_assign.iter()
                .skip(partial_pntidx_begin).take(partial_points_len)
                .cloned().collect::<Vec<_>>();
            let mut partial_mindis = mindis.iter()
                .skip(partial_pntidx_begin).take(partial_points_len)
                .cloned().collect::<Vec<_>>();
            let handle = tokio::spawn(async move {
                
                // 先接
                let expect_addr: IpAddr = self_clone.ips[prev_machine_id];
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what1.as_str(), expect_addr).await;
                let compressed_bin_partial_new_assign = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_new_assign[..]);
                let mut bin_partial_new_assign = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_new_assign).unwrap();
                let circled_partial_new_assign: Vec<usize> = bincode::deserialize(&bin_partial_new_assign).unwrap();
                let bin_partial_mindis = Self::receive_large_data(&mut stream).await;
                let circled_partial_mindis: Vec<f64> = bincode::deserialize(&bin_partial_mindis).unwrap();
                drop(stream);
                
                partial_new_assign.par_iter_mut().zip(partial_mindis.par_iter_mut())
                    .zip(circled_partial_new_assign.par_iter().zip(circled_partial_mindis.par_iter()))
                    .for_each(|((pnt2ctrd, newdis), (pnt2cclctrd, cclnewdis))|{
                        if *cclnewdis < *newdis {
                            *newdis = *cclnewdis;
                            *pnt2ctrd = *pnt2cclctrd;
                        }
                    });

                // 后发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT + machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what1.as_str()).await;
                let bin_partial_new_assign = bincode::serialize(&partial_new_assign).unwrap();
                println!("bin_partial_new_assign size original: {}", bin_partial_new_assign.len());
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_new_assign).unwrap();
                let compressed_bin_partial_new_assign = encoder.finish().unwrap();
                println!("bin_partial_new_assign size compressed: {}", compressed_bin_partial_new_assign.len());
                Self::send_large_data(&mut stream, &compressed_bin_partial_new_assign).await;
                let bin_partial_mindis = bincode::serialize(&partial_mindis).unwrap();
                println!("bin_partial_mindis size: {}", bin_partial_mindis.len());
                Self::send_large_data(&mut stream, &bin_partial_mindis).await;
                drop(stream);

                // 接先
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what2.as_str(), expect_addr).await;
                let compressed_bin_partial_new_assign = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_new_assign[..]);
                let mut bin_partial_new_assign = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_new_assign).unwrap();
                let partial_new_assign: Vec<usize> = bincode::deserialize(&bin_partial_new_assign).unwrap();
                drop(stream);
                
                // 发后
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what2.as_str()).await;
                let bin_partial_new_assign = bincode::serialize(&partial_new_assign).unwrap();
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_new_assign).unwrap();
                let compressed_bin_partial_new_assign = encoder.finish().unwrap();
                Self::send_large_data(&mut stream, &compressed_bin_partial_new_assign).await;
                drop(stream);
                
                partial_new_assign
            });
            circle_tasks.push(handle);
        }
        let mut results = futures::future::join_all(circle_tasks).await;
        results.rotate_right(self.machine_id);

        // Combine results from all machines
        let mut new_assign: Vec<usize> = Vec::new();

        for result in results.into_iter() {
            match result {
                Ok(partial_new_assign) => {
                    new_assign.extend(partial_new_assign);
                },
                Err(error) => {
                    // Handle the error (e.g., log it, return an error)
                    eprintln!("Error during communication: {}", error);
                    panic!();
                }
            }
        }

        new_assign
    }

    async fn reduce_krid2weight(self: Arc<Self>, krid2weight: Vec<usize>) -> Vec<usize>{
        let machine_size = self.ips.len();
        let distsize = krid2weight.len();    
        let mut circle_tasks = Vec::new();
        let krid2weight = Arc::new(krid2weight);

        // 本机驱动的任务 先发，后接，再发
        {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = self.machine_id;
            let do_what1 = format!("Circle krid2weight");
            let do_what2 = format!("Circle krid2weight verified");
            let self_clone = self.clone();
            let (partial_krid_begin, partial_krid_len) = Self::local_split(distsize, machine_size, machine_id);
            let krid2weight = krid2weight.clone();
            let handle = tokio::spawn(async move {
                let mut partial_krid2weight = &krid2weight[partial_krid_begin..{partial_krid_begin+partial_krid_len}];
                
                // 先发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT+machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what1.as_str()).await;
                let bin_partial_krid2weight = bincode::serialize(partial_krid2weight).unwrap();
                println!("bin_partial_krid2weight size original: {}", bin_partial_krid2weight.len());
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_krid2weight).unwrap();
                let compressed_bin_partial_krid2weight = encoder.finish().unwrap();
                println!("bin_partial_krid2weight size compressed: {}", compressed_bin_partial_krid2weight.len());
                Self::send_large_data(&mut stream, &compressed_bin_partial_krid2weight).await;
                drop(stream);

                // 后接
                let expect_addr: IpAddr = self_clone.ips[prev_machine_id];
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what1.as_str(), expect_addr).await;
                let compressed_bin_partial_krid2weight = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_krid2weight[..]);
                let mut bin_partial_krid2weight = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_krid2weight).unwrap();
                let mut circled_partial_krid2weight: Vec<usize> = bincode::deserialize(&bin_partial_krid2weight).unwrap();
                drop(stream);
                
                // 发先
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what2.as_str()).await;
                let bin_partial_krid2weight = bincode::serialize(&circled_partial_krid2weight).unwrap();
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_krid2weight).unwrap();
                let compressed_bin_partial_krid2weight = encoder.finish().unwrap();
                Self::send_large_data(&mut stream, &compressed_bin_partial_krid2weight).await;
                drop(stream);

                // 接后
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what2.as_str(), expect_addr).await;
                let compressed_bin_partial_krid2weight = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_krid2weight[..]);
                let mut bin_partial_krid2weight = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_krid2weight).unwrap();
                let circled_partial_krid2weight: Vec<usize> = bincode::deserialize(&bin_partial_krid2weight).unwrap();
                drop(stream);

                circled_partial_krid2weight
            });
            circle_tasks.push(handle);
        }

        // peer驱动的任务 先接，后发，再接
        for machine_offset in 1..machine_size {
            let next_machine_id = (self.machine_id + machine_size-1)% machine_size;
            let prev_machine_id = (self.machine_id + 1)% machine_size;
            let machine_id = (self.machine_id + machine_offset) % machine_size;
            let do_what1 = format!("Circle krid2weight");
            let do_what2 = format!("Circle krid2weight verified");
            let self_clone = self.clone();
            let (partial_krid_begin, partial_krid_len) = Self::local_split(distsize, machine_size, machine_id);
            let krid2weight = krid2weight.clone();
            let handle = tokio::spawn(async move {
                let mut partial_krid2weight = &krid2weight[partial_krid_begin..{partial_krid_begin+partial_krid_len}];
                
                // 先接
                let expect_addr: IpAddr = format!("{}", self_clone.ips[prev_machine_id]).parse().unwrap();
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what1.as_str(), expect_addr).await;
                let compressed_bin_partial_krid2weight = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_krid2weight[..]);
                let mut bin_partial_krid2weight = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_krid2weight).unwrap();
                let mut circled_partial_krid2weight: Vec<usize> = bincode::deserialize(&bin_partial_krid2weight).unwrap();
                drop(stream);
                
                circled_partial_krid2weight.par_iter_mut().zip(partial_krid2weight.par_iter())
                    .for_each(|(circled_weight, &weight)|{
                        *circled_weight += weight
                    });

                // 后发
                let next_addr: SocketAddr = SocketAddr::new(self_clone.ips[next_machine_id], Self::BASE_PORT+machine_id as u16);
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what1.as_str()).await;
                let bin_partial_krid2weight = bincode::serialize(&circled_partial_krid2weight).unwrap();
                println!("bin_partial_krid2weight size original: {}", bin_partial_krid2weight.len());
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_krid2weight).unwrap();
                let compressed_bin_partial_krid2weight = encoder.finish().unwrap();
                println!("bin_partial_krid2weight size compressed: {}", compressed_bin_partial_krid2weight.len());
                Self::send_large_data(&mut stream, &compressed_bin_partial_krid2weight).await;
                drop(stream);

                // 接先
                let (mut stream, parent_addr) = self_clone.clone().listener_shake_hand(machine_id, do_what2.as_str(), expect_addr).await;
                let compressed_bin_partial_krid2weight = Self::receive_large_data(&mut stream).await;
                let mut decoder = GzDecoder::new(&compressed_bin_partial_krid2weight[..]);
                let mut bin_partial_krid2weight = Vec::<u8>::new();
                decoder.read_to_end(&mut bin_partial_krid2weight).unwrap();
                let circled_partial_krid2weight: Vec<usize> = bincode::deserialize(&bin_partial_krid2weight).unwrap();
                drop(stream);
                
                // 发后
                let mut stream = TcpStream::connect(next_addr).await.unwrap();
                Self::stream_shake_hand(&mut stream, do_what2.as_str()).await;
                let bin_partial_krid2weight = bincode::serialize(&circled_partial_krid2weight).unwrap();
                let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
                encoder.write_all(&bin_partial_krid2weight).unwrap();
                let compressed_bin_partial_krid2weight = encoder.finish().unwrap();
                Self::send_large_data(&mut stream, &compressed_bin_partial_krid2weight).await;
                drop(stream);
                
                circled_partial_krid2weight
            });
            circle_tasks.push(handle);
        }
        let mut results = futures::future::join_all(circle_tasks).await;
        results.rotate_right(self.machine_id);

        // Combine results from all machines
        let mut krid2weight: Vec<usize> = Vec::new();

        for result in results.into_iter() {
            match result {
                Ok(partial_krid2weight) => {
                    krid2weight.extend(partial_krid2weight);
                },
                Err(error) => {
                    // Handle the error (e.g., log it, return an error)
                    eprintln!("Error during communication: {}", error);
                    panic!();
                }
            }
        }

        krid2weight
    }

    fn load_wtdistid_with_kriso<T>(street: usize, recall_from: usize) -> (Vec<usize>, usize)
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {
        // 确定数据库、列族，以及打开数据库
        let path = std::format!("data/{}", T::GAME_NAME);
        let options = {
            let mut options = Options::default();
            options.create_if_missing(false);
            options
        };

        let kriso2wtdistid_cf = if street == recall_from {
            format!(
                "{}_nriso_{}_to_winning_distribution_id",
                T::GAME_NAME,
                street + 1
            )
        } else {
            format!(
                "{}_priso_{}_from_{}_to_winning_trace_distribution_id",
                T::GAME_NAME,
                street + 1,
                recall_from + 1
            )
        };

        let cf_names = vec!["default", &kriso2wtdistid_cf];
        let cf_descriptors: Vec<_> = cf_names
            .iter()
            .map(|cf_name| {
                let mut cf_opt = Options::default();
                cf_opt.create_if_missing(false);
                ColumnFamilyDescriptor::new(cf_name.clone(), cf_opt)
            })
            .collect();
        let db = DBCommon::<SingleThreaded,_>::open_cf_descriptors_read_only(&options, &path , cf_descriptors, false).expect(&format!("打不开这个数据库{}的列族{}", path, kriso2wtdistid_cf));
        let cf_handle = db.cf_handle(&kriso2wtdistid_cf).expect(&format!("没有这个列族:{}", kriso2wtdistid_cf));

        // 读street中的最后一位数据，代表的是distsize
        let isosize = T::instance().hand_isomorphism_size_street(street, recall_from);
        let distsize = db.get_cf(&cf_handle, (isosize as u32).to_be_bytes()).expect(&format!("isosize/key:{}在{}街的值为None", isosize, street)).unwrap();
        let distsize = u32::from_be_bytes((*distsize).try_into().unwrap()) as usize;
        
        // 把所有distid读出来，并且校验（校验的逻辑是最后一位是否是distsize，把所有数据去重之后长度是否为distsize）
        let mut dbcf_iter = db.iterator_cf(cf_handle, IteratorMode::Start);
        let mut kriso2wtdistid = vec![distsize+1; isosize+1];
        for item in dbcf_iter {
            let (keybytes, valuebytes) = item.unwrap();
            assert!(keybytes.len() == 4 && valuebytes.len() == 4);
            let iso = u32::from_be_bytes((*keybytes).try_into().unwrap()) as usize;
            let distid = u32::from_be_bytes((*valuebytes).try_into().unwrap()) as usize;
            kriso2wtdistid[iso] = distid;
        }
        assert_eq!(kriso2wtdistid.pop().unwrap(), distsize);
        let unique: HashSet<_> = kriso2wtdistid.iter().cloned().collect();
        assert_eq!(unique.len(), distsize);

        // 返回结果
        (kriso2wtdistid, distsize)
    }

    fn load_wtdist_with_id<T>(street: usize, recall_from: usize) -> Vec<Vec<usize>> 
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {
        // 确定数据库、列族，以及打开数据库
        let path = std::format!("data/{}", T::GAME_NAME);
        let options = {
            let mut options = Options::default();
            options.create_if_missing(false);
            options
        };

        if street == recall_from {
            let nrid2wdist_cf = format!(
                "{}_nrid_{}_to_winning_distribution",
                T::GAME_NAME,
                street + 1
            );
            let cf_names = {
                let mut cf_names = vec!["default"];
                cf_names.push(&nrid2wdist_cf);
                cf_names
            };

            let cf_descriptors: Vec<_> = cf_names
                .iter()
                .map(|cf_name| {
                    let mut cf_opt = Options::default();
                    cf_opt.create_if_missing(false);
                    ColumnFamilyDescriptor::new(cf_name.clone(), cf_opt)
                })
                .collect();
            let db = DBCommon::<SingleThreaded,_>::open_cf_descriptors_read_only(&options, &path , cf_descriptors, false).expect(&format!("打不开这个数据库{}的列族{:?}", path, cf_names));

            // 读取wdist
            let cf_handle = db.cf_handle(&nrid2wdist_cf).expect(&format!("没有这个列族:{}", nrid2wdist_cf));
            // 把所有distid读出来，并且校验（校验的逻辑是最后一位是否是distsize，把所有数据去重之后长度是否为distsize）
            let mut dbcf_iter = db.iterator_cf(cf_handle, IteratorMode::Start);
            let mut krid2wtdist: Vec<Vec<usize>> = vec![];
            for item in dbcf_iter {
                let (keybytes, valuebytes) = item.unwrap();
                assert!(keybytes.len() == 4);
                let distid = u32::from_be_bytes((*keybytes).try_into().unwrap()) as usize;
                let _dist: [i64; 3] = bincode::deserialize(&valuebytes).unwrap();
                krid2wtdist.push(vec![distid]);
                assert_eq!(distid, krid2wtdist.len()-1);
            }
            krid2wtdist
        } else {
            let krid2wtdist_cf = format!(
                "{}_prid_{}_from_{}_to_winning_trace_distribution",
                T::GAME_NAME,
                street + 1,
                recall_from + 1
            );

            let cf_names = {
                let mut cf_names = vec!["default"];
                cf_names.push(&krid2wtdist_cf);
                cf_names
            };

            let cf_descriptors: Vec<_> = cf_names
                .iter()
                .map(|cf_name| {
                    let mut cf_opt = Options::default();
                    cf_opt.create_if_missing(false);
                    ColumnFamilyDescriptor::new(cf_name.clone(), cf_opt)
                })
                .collect();
            let db = DBCommon::<SingleThreaded,_>::open_cf_descriptors_read_only(&options, &path , cf_descriptors, false).expect(&format!("打不开这个数据库{}的列族{:?}", path, cf_names));

            // 读取wtdist
            let cf_handle = db.cf_handle(&krid2wtdist_cf).expect(&format!("没有这个列族:{}", krid2wtdist_cf));
            // 把所有distid读出来，并且校验（校验的逻辑是最后一位是否是distsize，把所有数据去重之后长度是否为distsize）
            let mut dbcf_iter = db.iterator_cf(cf_handle, IteratorMode::Start);
            let mut krid2wtdist: Vec<Vec<usize>> = vec![];
            for item in dbcf_iter {
                let (keybytes, valuebytes) = item.unwrap();
                assert!(keybytes.len() == 4);
                let distid = u32::from_be_bytes((*keybytes).try_into().unwrap()) as usize;
                let dist: Vec<u32> = bincode::deserialize(&valuebytes).unwrap();
                let dist = dist.into_iter().map(|u32v| u32v as usize).collect();
                krid2wtdist.push(dist);
                assert_eq!(distid, krid2wtdist.len()-1);
            }
            krid2wtdist
        }        
    }

    // 排序的存储还是从street逆向到recall_from
    fn load_wdists_with_ids<T>(street: usize, recall_from: usize)-> Vec<Vec<Array1<f64>>> 
        where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static
    {
        // 确定数据库、列族，以及打开数据库
        let path = std::format!("data/{}", T::GAME_NAME);
        let options = {
            let mut options = Options::default();
            options.create_if_missing(false);
            options
        };

        let nrid2wdist_cfs = {
            let mut nrid2wdist_cfs = vec![];
            for st in (recall_from..=street).rev(){
                let nrid2wdist_cf = format!(
                    "{}_nrid_{}_to_winning_distribution",
                    T::GAME_NAME,
                    st + 1,
                );
                nrid2wdist_cfs.push(nrid2wdist_cf);
            }
            nrid2wdist_cfs
        };

        let cf_names = {
            let mut cf_names = vec!["default"];
            nrid2wdist_cfs
                .iter()
                .for_each(|nrid2wdist_cf|{
                    cf_names.push(nrid2wdist_cf);
                });
            cf_names
        };

        let cf_descriptors: Vec<_> = cf_names
            .iter()
            .map(|cf_name| {
                let mut cf_opt = Options::default();
                cf_opt.create_if_missing(false);
                ColumnFamilyDescriptor::new(cf_name.clone(), cf_opt)
            })
            .collect();
        let db = DBCommon::<SingleThreaded,_>::open_cf_descriptors_read_only(&options, &path , cf_descriptors, false).expect(&format!("打不开这个数据库{}的列族{:?}", path, cf_names));

        // 读取各个相关的街的wdist
        let mut nrid2wdists: Vec<Vec<[i64; 3]>> = vec![];
        for (vecidx, st) in (recall_from..=street).rev().enumerate(){
            let mut nrid2wdist: Vec<[i64; 3]> = vec![];
            let cf_name = nrid2wdist_cfs[vecidx].as_ref();
            let cf_handle = db.cf_handle(cf_name).expect(&format!("没有这个列族:{}", cf_name));
            let mut dbcf_iter = db.iterator_cf(cf_handle, IteratorMode::Start);

            for item in dbcf_iter {
                let (keybytes, valuebytes) = item.unwrap();
                assert!(keybytes.len() == 4);
                let distid = u32::from_be_bytes((*keybytes).try_into().unwrap()) as usize;
                let dist: [i64; 3] = bincode::deserialize(&valuebytes).unwrap();
                nrid2wdist.push(dist);
                assert_eq!(distid, nrid2wdist.len()-1);
            }

            nrid2wdists.push(nrid2wdist);
        }
        let nrid2wdists = nrid2wdists;

        // 将各街的wdist归一化的，顺序是street...recall_from
        let nrid2wdistsnorm = nrid2wdists
            .iter()
            .map(|nrid2wdist_st| {
                nrid2wdist_st
                    .par_iter()
                    .map(|&wdist|{
                        let max_value = wdist.iter().sum::<i64>().to_f64().unwrap();
                        let wdist_norm = wdist
                            .iter()
                            .map(|&component| {
                                component.to_f64().unwrap() / max_value
                            })
                            .collect::<Vec<_>>();
                        Array1::<f64>::from(wdist_norm)
                    })
                    .collect::<Vec<Array1<f64>>>()
            })
            .collect::<Vec<_>>();

        nrid2wdistsnorm
    }
}


impl<T> KrwEmdClusterPP<T> 
    where T: Singleton + Hand + WaughTrait + ShowdownRanker + 'static,
{
    fn new (kriso2bucket_street: Vec<Vec<usize>>, bucket_size_street: Vec<usize>, abstr_configs: Vec<AbstractAlgorithmStreet>) -> Self{
        Self {
            kriso2bucket_street,
            bucket_size_street,
            abstr_configs,
            _marker: PhantomData,
        }
    }

    pub fn save(&self, custom_name: &str) {
        let path_str = std::format!("data/CustomCluster/{}/KrwEmd/{}", T::GAME_NAME, custom_name);
        let path = Path::new(path_str.as_str());
        if path.exists() {
            fs::remove_dir_all(path).unwrap();
        }
        fs::create_dir_all(path).unwrap();

        // 创建一个选项对象
        let mut opts = Options::default();
        // 设置数据库如果不存在则创建
        opts.create_if_missing(true);
        let mut db = DB::open(&opts, path).unwrap();
     
        self.kriso2bucket_street
            .iter()
            .zip(self.bucket_size_street.iter())
            .enumerate()
            .for_each(|(street, (kriso2bucket, bucket_size))|{
                let recall_from = if let AbstractAlgorithmStreet::KrwEmd { recall_from,..} = &self.abstr_configs[street] {
                    *recall_from
                }
                else if let AbstractAlgorithmStreet::Isomorphism{recall_from} = self.abstr_configs[street] {
                    recall_from
                }
                else if let AbstractAlgorithmStreet::Krwi{recall_from} = self.abstr_configs[street] {
                    recall_from
                }
                else if let AbstractAlgorithmStreet::Kroi{recall_from} = self.abstr_configs[street] {
                    recall_from
                }   
                else {
                    panic!("不能到这里");
                };
                assert!(street >= recall_from, "street:{}, recall_from: {}", street, recall_from);
                save_cluster_bucket_street::<T>(&mut db, street, recall_from, kriso2bucket.as_ref(), *bucket_size);
            });

        save_cluster_configs_yaml(path, self.abstr_configs.as_ref());
    }
}