#pragma once

#include <torch/extension.h>


std::tuple<std::vector<torch::Tensor>, std::vector<torch::Tensor>>
gen_cache_mask_cpu(std::vector<torch::Tensor> caches,
                    int dst_id, torch::Tensor dst, std::vector<torch::Tensor> dst_bndries,
                    std::vector<torch::Tensor> reuse_masks,
                    int num_threads);


std::tuple<std::vector<torch::Tensor>, std::vector<torch::Tensor>>
gen_reuse_mask_cpu(int src_id, torch::Tensor src, std::vector<torch::Tensor> src_bndries,
                    int dst_id, torch::Tensor dst, std::vector<torch::Tensor> dst_bndries,
              int num_threads);