16 #include <Eigen/Dense> 17 #include <Eigen/SparseCore> 49 Mrpt(
const Eigen::Ref<const Eigen::MatrixXf> &X_) :
50 X(Eigen::Map<const Eigen::MatrixXf>(X_.data(), X_.rows(), X_.cols())),
60 Mrpt(
const float *X_,
int dim_,
int n_samples_) :
61 X(Eigen::Map<const Eigen::MatrixXf>(X_, dim_, n_samples_)),
62 n_samples(n_samples_),
84 void grow(
int n_trees_,
int depth_,
float density_ = -1.0,
int seed = 0) {
87 throw std::logic_error(
"The index has already been grown.");
91 throw std::out_of_range(
"The number of trees must be positive.");
94 if (depth_ <= 0 || depth_ > std::log2(n_samples)) {
95 throw std::out_of_range(
"The depth must belong to the set {1, ... , log2(n)}.");
98 if (density_ < -1.0001 || density_ > 1.0001 || (density_ > -0.9999 && density_ < -0.0001)) {
99 throw std::out_of_range(
"The density must be on the interval (0,1].");
104 n_pool = n_trees_ * depth_;
105 n_array = 1 << (depth_ + 1);
108 density = 1.0 / std::sqrt(dim);
113 density < 1 ? build_sparse_random_matrix(sparse_random_matrix, n_pool, dim, density, seed) :
114 build_dense_random_matrix(dense_random_matrix, n_pool, dim, seed);
116 split_points = Eigen::MatrixXf(n_array, n_trees);
117 tree_leaves = std::vector<std::vector<int>>(n_trees);
119 count_first_leaf_indices_all(leaf_first_indices_all, n_samples, depth);
120 leaf_first_indices = leaf_first_indices_all[depth];
122 #pragma omp parallel for 123 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
124 Eigen::MatrixXf tree_projections;
127 tree_projections.noalias() = sparse_random_matrix.middleRows(n_tree * depth, depth) * X;
129 tree_projections.noalias() = dense_random_matrix.middleRows(n_tree * depth, depth) * X;
131 tree_leaves[n_tree] = std::vector<int>(n_samples);
132 std::vector<int> &indices = tree_leaves[n_tree];
133 std::iota(indices.begin(), indices.end(), 0);
135 grow_subtree(indices.begin(), indices.end(), 0, 0, n_tree, tree_projections);
178 void grow(
double target_recall,
const Eigen::Ref<const Eigen::MatrixXf> &Q,
int k_,
int trees_max = -1,
179 int depth_max = -1,
int depth_min_ = -1,
int votes_max_ = -1,
180 float density = -1.0,
int seed = 0) {
181 if (target_recall < 0.0 - epsilon || target_recall > 1.0 + epsilon) {
182 throw std::out_of_range(
"Target recall must be on the interval [0,1].");
185 grow(Q, k_, trees_max, depth_max, depth_min_, votes_max_, density, seed);
186 prune(target_recall);
218 void grow(
double target_recall,
const float *Q,
int n_test,
int k_,
int trees_max = -1,
219 int depth_max = -1,
int depth_min_ = -1,
int votes_max_ = -1,
220 float density = -1.0,
int seed = 0,
const std::vector<int> &indices_test = {}) {
221 if (target_recall < 0.0 - epsilon || target_recall > 1.0 + epsilon) {
222 throw std::out_of_range(
"Target recall must be on the interval [0,1].");
225 grow(Q, n_test, k_, trees_max, depth_max, depth_min_, votes_max_, density, seed, indices_test);
226 prune(target_recall);
256 void grow_autotune(
double target_recall,
int k_,
int trees_max = -1,
int depth_max = -1,
int depth_min_ = -1,
257 int votes_max_ = -1,
float density_ = -1.0,
int seed = 0,
int n_test = 100) {
259 throw std::out_of_range(
"Test set size must be > 0.");
262 n_test = n_test > n_samples ? n_samples : n_test;
263 std::vector<int> indices_test(sample_indices(n_test, seed));
264 const Eigen::MatrixXf Q(
subset(indices_test));
266 grow(target_recall, Q.data(), Q.cols(), k_, trees_max,
267 depth_max, depth_min_, votes_max_, density_, seed, indices_test);
282 if (index_type == normal || index_type == autotuned_unpruned) {
299 return index_type == autotuned;
344 void grow(
const float *data,
int n_test,
int k_,
int trees_max = -1,
int depth_max = -1,
345 int depth_min_ = -1,
int votes_max_ = -1,
float density_ = -1.0,
int seed = 0,
346 const std::vector<int> &indices_test = {}) {
348 if (trees_max == - 1) {
349 trees_max = std::min(std::sqrt(n_samples), 1000.0);
352 if (depth_min_ == -1) {
353 depth_min_ = std::max(static_cast<int>(std::log2(n_samples) - 11), 5);
356 if (depth_max == -1) {
357 depth_max = std::max(static_cast<int>(std::log2(n_samples) - 4), depth_min_);
360 if (votes_max_ == -1) {
361 votes_max_ = std::max(trees_max / 10, std::min(trees_max, 10));
364 if (density_ > -1.0001 && density_ < -0.9999) {
365 density_ = 1.0 / std::sqrt(dim);
369 throw std::logic_error(
"The index has already been grown.");
372 if (k_ <= 0 || k_ > n_samples) {
373 throw std::out_of_range(
"k_ must belong to the set {1, ..., n}.");
376 if (trees_max <= 0) {
377 throw std::out_of_range(
"trees_max must be positive.");
380 if (depth_max <= 0 || depth_max > std::log2(n_samples)) {
381 throw std::out_of_range(
"depth_max must belong to the set {1, ... , log2(n)}.");
384 if (depth_min_ <= 0 || depth_min_ > depth_max) {
385 throw std::out_of_range(
"depth_min_ must belong to the set {1, ... , depth_max}");
388 if (votes_max_ <= 0 || votes_max_ > trees_max) {
389 throw std::out_of_range(
"votes_max_ must belong to the set {1, ... , trees_max}.");
392 if (density_ < 0.0 || density_ > 1.0001) {
393 throw std::out_of_range(
"The density must be on the interval (0,1].");
396 if(n_samples < 101) {
397 throw std::out_of_range(
"Sample size must be at least 101 to autotune an index.");
400 depth_min = depth_min_;
401 votes_max = votes_max_;
404 const Eigen::Map<const Eigen::MatrixXf> Q(data, dim, n_test);
406 grow(trees_max, depth_max, density_, seed);
407 Eigen::MatrixXi exact(k, n_test);
408 compute_exact(Q, exact, indices_test);
410 std::vector<Eigen::MatrixXd> recalls(depth_max - depth_min + 1);
411 cs_sizes = std::vector<Eigen::MatrixXd>(depth_max - depth_min + 1);
413 for (
int d = depth_min; d <= depth_max; ++d) {
414 recalls[d - depth_min] = Eigen::MatrixXd::Zero(votes_max, trees_max);
415 cs_sizes[d - depth_min] = Eigen::MatrixXd::Zero(votes_max, trees_max);
418 for (
int i = 0; i < n_test; ++i) {
419 std::vector<Eigen::MatrixXd> recall_tmp(depth_max - depth_min + 1);
420 std::vector<Eigen::MatrixXd> cs_size_tmp(depth_max - depth_min + 1);
422 count_elected(Q.col(i), Eigen::Map<Eigen::VectorXi>(exact.data() + i * k, k),
423 votes_max, recall_tmp, cs_size_tmp);
425 for (
int d = depth_min; d <= depth_max; ++d) {
426 recalls[d - depth_min] += recall_tmp[d - depth_min];
427 cs_sizes[d - depth_min] += cs_size_tmp[d - depth_min];
431 for (
int d = depth_min; d <= depth_max; ++d) {
432 recalls[d - depth_min] /= (k * n_test);
433 cs_sizes[d - depth_min] /= n_test;
437 std::set<Mrpt_Parameters,decltype(is_faster)*> pars = list_parameters(recalls);
438 opt_pars = pareto_frontier(pars);
440 index_type = autotuned_unpruned;
468 void grow(
const Eigen::Ref<const Eigen::MatrixXf> &Q,
int k_,
int trees_max = -1,
int depth_max = -1,
469 int depth_min_ = -1,
int votes_max_ = -1,
float density_ = -1.0,
int seed = 0) {
470 if (Q.rows() != dim) {
471 throw std::invalid_argument(
"Dimensions of the data and the validation set do not match.");
474 grow(Q.data(), Q.cols(), k_, trees_max,
475 depth_max, depth_min_, votes_max_, density_, seed);
503 void grow_autotune(
int k_,
int trees_max = -1,
int depth_max = -1,
int depth_min_ = -1,
504 int votes_max_ = -1,
float density_ = -1.0,
int seed = 0,
int n_test = 100) {
506 throw std::out_of_range(
"Test set size must be > 0.");
509 n_test = n_test > n_samples ? n_samples : n_test;
510 std::vector<int> indices_test(sample_indices(n_test, seed));
511 const Eigen::MatrixXf Q(
subset(indices_test));
513 grow(Q.data(), Q.cols(), k_, trees_max,
514 depth_max, depth_min_, votes_max_, density_, seed, indices_test);
528 if (target_recall < 0.0 - epsilon || target_recall > 1.0 + epsilon) {
529 throw std::out_of_range(
"Target recall must be on the interval [0,1].");
535 int depth_max = depth;
537 index2.n_trees = index2.par.
n_trees;
538 index2.depth = index2.par.
depth;
539 index2.votes = index2.par.
votes;
540 index2.n_pool = index2.depth * index2.n_trees;
541 index2.n_array = 1 << (index2.depth + 1);
542 index2.tree_leaves.assign(tree_leaves.begin(), tree_leaves.begin() + index2.n_trees);
543 index2.leaf_first_indices_all = leaf_first_indices_all;
544 index2.density = density;
547 index2.split_points = split_points.topLeftCorner(index2.n_array, index2.n_trees);
548 index2.leaf_first_indices = leaf_first_indices_all[index2.depth];
549 if (index2.density < 1) {
550 index2.sparse_random_matrix = Eigen::SparseMatrix<float, Eigen::RowMajor>(index2.n_pool, index2.dim);
551 for (
int n_tree = 0; n_tree < index2.n_trees; ++n_tree)
552 index2.sparse_random_matrix.middleRows(n_tree * index2.depth, index2.depth) =
553 sparse_random_matrix.middleRows(n_tree * depth_max, index2.depth);
555 index2.dense_random_matrix = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>(index2.n_pool, index2.dim);
556 for (
int n_tree = 0; n_tree < index2.n_trees; ++n_tree)
557 index2.dense_random_matrix.middleRows(n_tree * index2.depth, index2.depth) =
558 dense_random_matrix.middleRows(n_tree * depth_max, index2.depth);
560 index2.index_type = autotuned;
578 if (target_recall < 0.0 - epsilon || target_recall > 1.0 + epsilon) {
579 throw std::out_of_range(
"Target recall must be on the interval [0,1].");
585 int depth_max = depth;
587 index2->n_trees = index2->par.
n_trees;
588 index2->depth = index2->par.
depth;
589 index2->votes = index2->par.
votes;
590 index2->n_pool = index2->depth * index2->n_trees;
591 index2->n_array = 1 << (index2->depth + 1);
592 index2->tree_leaves.assign(tree_leaves.begin(), tree_leaves.begin() + index2->n_trees);
593 index2->leaf_first_indices_all = leaf_first_indices_all;
594 index2->density = density;
597 index2->split_points = split_points.topLeftCorner(index2->n_array, index2->n_trees);
598 index2->leaf_first_indices = leaf_first_indices_all[index2->depth];
599 if (index2->density < 1) {
600 index2->sparse_random_matrix = Eigen::SparseMatrix<float, Eigen::RowMajor>(index2->n_pool, index2->dim);
601 for (
int n_tree = 0; n_tree < index2->n_trees; ++n_tree)
602 index2->sparse_random_matrix.middleRows(n_tree * index2->depth, index2->depth) =
603 sparse_random_matrix.middleRows(n_tree * depth_max, index2->depth);
605 index2->dense_random_matrix = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>(index2->n_pool, index2->dim);
606 for (
int n_tree = 0; n_tree < index2->n_trees; ++n_tree)
607 index2->dense_random_matrix.middleRows(n_tree * index2->depth, index2->depth) =
608 dense_random_matrix.middleRows(n_tree * depth_max, index2->depth);
610 index2->index_type = autotuned;
626 if (index_type == normal) {
627 throw std::logic_error(
"The list of optimal parameters cannot be retrieved for the non-autotuned index.");
629 if (index_type == autotuned) {
630 throw std::logic_error(
"The list of optimal parameters cannot be retrieved for the index which has already been subsetted or deleted to the target recall level.");
633 std::vector<Mrpt_Parameters> new_pars;
634 std::copy(opt_pars.begin(), opt_pars.end(), std::back_inserter(new_pars));
661 void query(
const float *data,
int k,
int vote_threshold,
int *out,
662 float *out_distances =
nullptr,
int *out_n_elected =
nullptr)
const {
664 if (k <= 0 || k > n_samples) {
665 throw std::out_of_range(
"k must belong to the set {1, ..., n}.");
668 if (vote_threshold <= 0 || vote_threshold > n_trees) {
669 throw std::out_of_range(
"vote_threshold must belong to the set {1, ... , n_trees}.");
673 throw std::logic_error(
"The index must be built before making queries.");
676 const Eigen::Map<const Eigen::VectorXf> q(data, dim);
678 Eigen::VectorXf projected_query(n_pool);
680 projected_query.noalias() = sparse_random_matrix * q;
682 projected_query.noalias() = dense_random_matrix * q;
684 std::vector<int> found_leaves(n_trees);
690 #pragma omp parallel for 691 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
693 for (
int d = 0; d < depth; ++d) {
694 const int j = n_tree * depth + d;
695 const int idx_left = 2 * idx_tree + 1;
696 const int idx_right = idx_left + 1;
697 const float split_point = split_points(idx_tree, n_tree);
698 if (projected_query(j) <= split_point) {
701 idx_tree = idx_right;
704 found_leaves[n_tree] = idx_tree - (1 << depth) + 1;
707 int n_elected = 0, max_leaf_size = n_samples / (1 << depth) + 1;
708 Eigen::VectorXi elected(n_trees * max_leaf_size);
709 Eigen::VectorXi votes = Eigen::VectorXi::Zero(n_samples);
712 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
713 int leaf_begin = leaf_first_indices[found_leaves[n_tree]];
714 int leaf_end = leaf_first_indices[found_leaves[n_tree] + 1];
715 const std::vector<int> &indices = tree_leaves[n_tree];
716 for (
int i = leaf_begin; i < leaf_end; ++i) {
717 int idx = indices[i];
718 if (++votes(idx) == vote_threshold)
719 elected(n_elected++) = idx;
724 *out_n_elected = n_elected;
727 exact_knn(q, k, elected, n_elected, out, out_distances);
740 void query(
const Eigen::Ref<const Eigen::VectorXf> &q,
int k,
int vote_threshold,
int *out,
741 float *out_distances =
nullptr,
int *out_n_elected =
nullptr)
const {
742 query(q.data(), k, vote_threshold, out, out_distances, out_n_elected);
767 void query(
const float *q,
int *out,
float *out_distances =
nullptr,
768 int *out_n_elected =
nullptr)
const {
769 if (index_type == normal) {
770 throw std::logic_error(
"The index is not autotuned: k and vote threshold has to be specified.");
773 if (index_type == autotuned_unpruned) {
774 throw std::logic_error(
"The target recall level has to be set before making queries.");
777 query(q, k, votes, out, out_distances, out_n_elected);
788 void query(
const Eigen::Ref<const Eigen::VectorXf> &q,
int *out,
float *out_distances =
nullptr,
789 int *out_n_elected =
nullptr)
const {
790 query(q.data(), out, out_distances, out_n_elected);
814 static void exact_knn(
const float *q_data,
const float *X_data,
int dim,
int n_samples,
815 int k,
int *out,
float *out_distances =
nullptr) {
817 const Eigen::Map<const Eigen::MatrixXf> X(X_data, dim, n_samples);
818 const Eigen::Map<const Eigen::VectorXf> q(q_data, dim);
820 if (k < 1 || k > n_samples) {
821 throw std::out_of_range(
"k must be positive and no greater than the sample size of data X.");
824 Eigen::VectorXf distances(n_samples);
826 #pragma omp parallel for 827 for (
int i = 0; i < n_samples; ++i)
828 distances(i) = (X.col(i) - q).squaredNorm();
831 Eigen::MatrixXf::Index index;
832 distances.minCoeff(&index);
836 out_distances[0] = std::sqrt(distances(index));
841 Eigen::VectorXi idx(n_samples);
842 std::iota(idx.data(), idx.data() + n_samples, 0);
843 std::partial_sort(idx.data(), idx.data() + k, idx.data() + n_samples,
844 [&distances](
int i1,
int i2) {
return distances(i1) < distances(i2); });
846 for (
int i = 0; i < k; ++i)
850 for (
int i = 0; i < k; ++i)
851 out_distances[i] = std::sqrt(distances(idx(i)));
862 static void exact_knn(
const Eigen::Ref<const Eigen::VectorXf> &q,
863 const Eigen::Ref<const Eigen::MatrixXf> &X,
864 int k,
int *out,
float *out_distances =
nullptr) {
865 Mrpt::exact_knn(q.data(), X.data(), X.rows(), X.cols(), k, out, out_distances);
874 void exact_knn(
const float *q,
int k,
int *out,
float *out_distances =
nullptr)
const {
884 void exact_knn(
const Eigen::Ref<const Eigen::VectorXf> &q,
int k,
int *out,
885 float *out_distances =
nullptr)
const {
886 Mrpt::exact_knn(q.data(), X.data(), dim, n_samples, k, out, out_distances);
905 bool save(
const char *path)
const {
907 if ((fd = fopen(path,
"wb")) == NULL)
911 fwrite(&i,
sizeof(
int), 1, fd);
913 if (index_type == 2) {
914 write_parameter_list(opt_pars, fd);
917 write_parameters(&par, fd);
918 fwrite(&n_trees,
sizeof(
int), 1, fd);
919 fwrite(&depth,
sizeof(
int), 1, fd);
920 fwrite(&density,
sizeof(
float), 1, fd);
922 fwrite(split_points.data(),
sizeof(float), n_array * n_trees, fd);
925 for (
int i = 0; i < n_trees; ++i) {
926 int sz = tree_leaves[i].size();
927 fwrite(&sz,
sizeof(
int), 1, fd);
928 fwrite(&tree_leaves[i][0],
sizeof(
int), sz, fd);
933 int non_zeros = sparse_random_matrix.nonZeros();
934 fwrite(&non_zeros,
sizeof(
int), 1, fd);
935 for (
int k = 0; k < sparse_random_matrix.outerSize(); ++k) {
936 for (Eigen::SparseMatrix<float, Eigen::RowMajor>::InnerIterator it(sparse_random_matrix, k); it; ++it) {
937 float val = it.value();
938 int row = it.row(), col = it.col();
939 fwrite(&row,
sizeof(
int), 1, fd);
940 fwrite(&col,
sizeof(
int), 1, fd);
941 fwrite(&val,
sizeof(
float), 1, fd);
945 fwrite(dense_random_matrix.data(),
sizeof(float), n_pool * dim, fd);
960 if ((fd = fopen(path,
"rb")) == NULL)
964 fread(&i,
sizeof(
int), 1, fd);
965 index_type =
static_cast<itype
>(i);
966 if (index_type == autotuned_unpruned) {
967 read_parameter_list(fd);
970 read_parameters(&par, fd);
971 fread(&n_trees,
sizeof(
int), 1, fd);
972 fread(&depth,
sizeof(
int), 1, fd);
973 fread(&density,
sizeof(
float), 1, fd);
975 n_pool = n_trees * depth;
976 n_array = 1 << (depth + 1);
978 count_first_leaf_indices_all(leaf_first_indices_all, n_samples, depth);
979 leaf_first_indices = leaf_first_indices_all[depth];
981 split_points = Eigen::MatrixXf(n_array, n_trees);
982 fread(split_points.data(),
sizeof(float), n_array * n_trees, fd);
985 tree_leaves = std::vector<std::vector<int>>(n_trees);
986 for (
int i = 0; i < n_trees; ++i) {
988 fread(&sz,
sizeof(
int), 1, fd);
989 std::vector<int> leaves(sz);
990 fread(&leaves[0],
sizeof(
int), sz, fd);
991 tree_leaves[i] = leaves;
997 fread(&non_zeros,
sizeof(
int), 1, fd);
999 sparse_random_matrix = Eigen::SparseMatrix<float>(n_pool, dim);
1000 std::vector<Eigen::Triplet<float>> triplets;
1001 for (
int k = 0; k < non_zeros; ++k) {
1004 fread(&row,
sizeof(
int), 1, fd);
1005 fread(&col,
sizeof(
int), 1, fd);
1006 fread(&val,
sizeof(
float), 1, fd);
1007 triplets.push_back(Eigen::Triplet<float>(row, col, val));
1010 sparse_random_matrix.setFromTriplets(triplets.begin(), triplets.end());
1011 sparse_random_matrix.makeCompressed();
1013 dense_random_matrix = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>(n_pool, dim);
1014 fread(dense_random_matrix.data(),
sizeof(float), n_pool * dim, fd);
1030 return n_trees == 0;
1039 friend class MrptTest;
1040 friend class UtilityTest;
1051 void grow_subtree(std::vector<int>::iterator begin, std::vector<int>::iterator end,
1052 int tree_level,
int i,
int n_tree,
const Eigen::MatrixXf &tree_projections) {
1053 int n = end - begin;
1054 int idx_left = 2 * i + 1;
1055 int idx_right = idx_left + 1;
1057 if (tree_level == depth)
return;
1059 std::nth_element(begin, begin + n / 2, end,
1060 [&tree_projections, tree_level] (
int i1,
int i2) {
1061 return tree_projections(tree_level, i1) < tree_projections(tree_level, i2);
1063 auto mid = end - n / 2;
1066 split_points(i, n_tree) = tree_projections(tree_level, *(mid - 1));
1068 auto left_it = std::max_element(begin, mid,
1069 [&tree_projections, tree_level] (
int i1,
int i2) {
1070 return tree_projections(tree_level, i1) < tree_projections(tree_level, i2);
1072 split_points(i, n_tree) = (tree_projections(tree_level, *mid) +
1073 tree_projections(tree_level, *left_it)) / 2.0;
1076 grow_subtree(begin, mid, tree_level + 1, idx_left, n_tree, tree_projections);
1077 grow_subtree(mid, end, tree_level + 1, idx_right, n_tree, tree_projections);
1083 void exact_knn(
const Eigen::Map<const Eigen::VectorXf> &q,
int k,
const Eigen::VectorXi &indices,
1084 int n_elected,
int *out,
float *out_distances =
nullptr)
const {
1087 for (
int i = 0; i < k; ++i)
1090 if (out_distances) {
1091 for (
int i = 0; i < k; ++i)
1092 out_distances[i] = -1;
1098 Eigen::VectorXf distances(n_elected);
1100 #pragma omp parallel for 1101 for (
int i = 0; i < n_elected; ++i)
1102 distances(i) = (X.col(indices(i)) - q).squaredNorm();
1105 Eigen::MatrixXf::Index index;
1106 distances.minCoeff(&index);
1107 out[0] = n_elected ? indices(index) : -1;
1110 out_distances[0] = n_elected ? std::sqrt(distances(index)) : -1;
1115 int n_to_sort = n_elected > k ? k : n_elected;
1116 Eigen::VectorXi idx(n_elected);
1117 std::iota(idx.data(), idx.data() + n_elected, 0);
1118 std::partial_sort(idx.data(), idx.data() + n_to_sort, idx.data() + n_elected,
1119 [&distances](
int i1,
int i2) {
return distances(i1) < distances(i2); });
1121 for (
int i = 0; i < k; ++i)
1122 out[i] = i < n_elected ? indices(idx(i)) : -1;
1124 if (out_distances) {
1125 for (
int i = 0; i < k; ++i)
1126 out_distances[i] = i < n_elected ? std::sqrt(distances(idx(i))) : -1;
1130 void prune(
double target_recall) {
1131 if (target_recall < 0.0 - epsilon || target_recall > 1.0 + epsilon) {
1132 throw std::out_of_range(
"Target recall must be on the interval [0,1].");
1140 int depth_max = depth;
1145 n_pool = depth * n_trees;
1146 n_array = 1 << (depth + 1);
1148 tree_leaves.resize(n_trees);
1149 tree_leaves.shrink_to_fit();
1150 split_points.conservativeResize(n_array, n_trees);
1151 leaf_first_indices = leaf_first_indices_all[depth];
1154 Eigen::SparseMatrix<float, Eigen::RowMajor> srm_new(n_pool, dim);
1155 for (
int n_tree = 0; n_tree < n_trees; ++n_tree)
1156 srm_new.middleRows(n_tree * depth, depth) = sparse_random_matrix.middleRows(n_tree * depth_max, depth);
1157 sparse_random_matrix = srm_new;
1159 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> drm_new(n_pool, dim);
1160 for (
int n_tree = 0; n_tree < n_trees; ++n_tree)
1161 drm_new.middleRows(n_tree * depth, depth) = dense_random_matrix.middleRows(n_tree * depth_max, depth);
1162 dense_random_matrix = drm_new;
1165 index_type = autotuned;
1168 void count_elected(
const Eigen::VectorXf &q,
const Eigen::Map<Eigen::VectorXi> &exact,
int votes_max,
1169 std::vector<Eigen::MatrixXd> &recalls, std::vector<Eigen::MatrixXd> &cs_sizes)
const {
1170 Eigen::VectorXf projected_query(n_pool);
1172 projected_query.noalias() = sparse_random_matrix * q;
1174 projected_query.noalias() = dense_random_matrix * q;
1176 int depth_min = depth - recalls.size() + 1;
1177 std::vector<std::vector<int>> start_indices(n_trees);
1179 #pragma omp parallel for 1180 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
1181 start_indices[n_tree] = std::vector<int>(depth - depth_min + 1);
1183 for (
int d = 0; d < depth; ++d) {
1184 const int j = n_tree * depth + d;
1185 const int idx_left = 2 * idx_tree + 1;
1186 const int idx_right = idx_left + 1;
1187 const float split_point = split_points(idx_tree, n_tree);
1188 if (projected_query(j) <= split_point) {
1189 idx_tree = idx_left;
1191 idx_tree = idx_right;
1193 if (d >= depth_min - 1)
1194 start_indices[n_tree][d - depth_min + 1] = idx_tree - (1 << (d + 1)) + 1;
1198 const int *exact_begin = exact.data();
1199 const int *exact_end = exact.data() + exact.size();
1201 for (
int depth_crnt = depth_min; depth_crnt <= depth; ++depth_crnt) {
1202 Eigen::VectorXi votes = Eigen::VectorXi::Zero(n_samples);
1203 const std::vector<int> &leaf_first_indices = leaf_first_indices_all[depth_crnt];
1205 Eigen::MatrixXd recall(votes_max, n_trees);
1206 Eigen::MatrixXd candidate_set_size(votes_max, n_trees);
1207 recall.col(0) = Eigen::VectorXd::Zero(votes_max);
1208 candidate_set_size.col(0) = Eigen::VectorXd::Zero(votes_max);
1211 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
1212 std::vector<int> &found_leaves = start_indices[n_tree];
1215 recall.col(n_tree) = recall.col(n_tree - 1);
1216 candidate_set_size.col(n_tree) = candidate_set_size.col(n_tree - 1);
1219 int leaf_begin = leaf_first_indices[found_leaves[depth_crnt - depth_min]];
1220 int leaf_end = leaf_first_indices[found_leaves[depth_crnt - depth_min] + 1];
1222 const std::vector<int> &indices = tree_leaves[n_tree];
1223 for (
int i = leaf_begin; i < leaf_end; ++i) {
1224 int idx = indices[i];
1225 int v = ++votes(idx);
1226 if (v <= votes_max) {
1227 candidate_set_size(v - 1, n_tree)++;
1228 if (std::find(exact_begin, exact_end, idx) != exact_end)
1229 recall(v - 1, n_tree)++;
1234 recalls[depth_crnt - depth_min] = recall;
1235 cs_sizes[depth_crnt - depth_min] = candidate_set_size;
1248 static void build_sparse_random_matrix(Eigen::SparseMatrix<float, Eigen::RowMajor> &sparse_random_matrix,
1249 int n_row,
int n_col,
float density,
int seed = 0) {
1250 sparse_random_matrix = Eigen::SparseMatrix<float, Eigen::RowMajor>(n_row, n_col);
1252 std::random_device rd;
1253 int s = seed ? seed : rd();
1254 std::mt19937 gen(s);
1255 std::uniform_real_distribution<float> uni_dist(0, 1);
1256 std::normal_distribution<float> norm_dist(0, 1);
1258 std::vector<Eigen::Triplet<float>> triplets;
1259 for (
int j = 0; j < n_row; ++j) {
1260 for (
int i = 0; i < n_col; ++i) {
1261 if (uni_dist(gen) > density)
continue;
1262 triplets.push_back(Eigen::Triplet<float>(j, i, norm_dist(gen)));
1266 sparse_random_matrix.setFromTriplets(triplets.begin(), triplets.end());
1267 sparse_random_matrix.makeCompressed();
1274 static void build_dense_random_matrix(Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &dense_random_matrix,
1275 int n_row,
int n_col,
int seed = 0) {
1276 dense_random_matrix = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>(n_row, n_col);
1278 std::random_device rd;
1279 int s = seed ? seed : rd();
1280 std::mt19937 gen(s);
1281 std::normal_distribution<float> normal_dist(0, 1);
1283 std::generate(dense_random_matrix.data(), dense_random_matrix.data() + n_row * n_col,
1284 [&normal_dist, &gen] {
return normal_dist(gen); });
1287 void compute_exact(
const Eigen::Map<const Eigen::MatrixXf> &Q, Eigen::MatrixXi &out_exact,
1288 const std::vector<int> &indices_test = {})
const {
1289 int n_test = Q.cols();
1291 Eigen::VectorXi idx(n_samples);
1292 std::iota(idx.data(), idx.data() + n_samples, 0);
1294 for (
int i = 0; i < n_test; ++i) {
1295 if(!indices_test.empty()) {
1296 std::remove(idx.data(), idx.data() + n_samples, indices_test[i]);
1298 exact_knn(Eigen::Map<const Eigen::VectorXf>(Q.data() + i * dim, dim), k, idx,
1299 (indices_test.empty() ? n_samples : n_samples - 1), out_exact.data() + i * k);
1300 std::sort(out_exact.data() + i * k, out_exact.data() + i * k + k);
1301 if(!indices_test.empty()) {
1302 idx[n_samples - 1] = indices_test[i];
1311 void vote(
const Eigen::VectorXf &projected_query,
int vote_threshold, Eigen::VectorXi &elected,
1312 int &n_elected,
int n_trees,
int depth_crnt) {
1313 std::vector<int> found_leaves(n_trees);
1314 const std::vector<int> &leaf_first_indices = leaf_first_indices_all[depth_crnt];
1316 #pragma omp parallel for 1317 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
1319 for (
int d = 0; d < depth_crnt; ++d) {
1320 const int j = n_tree * depth + d;
1321 const int idx_left = 2 * idx_tree + 1;
1322 const int idx_right = idx_left + 1;
1323 const float split_point = split_points(idx_tree, n_tree);
1324 if (projected_query(j) <= split_point) {
1325 idx_tree = idx_left;
1327 idx_tree = idx_right;
1330 found_leaves[n_tree] = idx_tree - (1 << depth_crnt) + 1;
1333 int max_leaf_size = n_samples / (1 << depth_crnt) + 1;
1334 elected = Eigen::VectorXi(n_trees * max_leaf_size);
1335 Eigen::VectorXi votes = Eigen::VectorXi::Zero(n_samples);
1338 for (
int n_tree = 0; n_tree < n_trees; ++n_tree) {
1339 int leaf_begin = leaf_first_indices[found_leaves[n_tree]];
1340 int leaf_end = leaf_first_indices[found_leaves[n_tree] + 1];
1341 const std::vector<int> &indices = tree_leaves[n_tree];
1342 for (
int i = leaf_begin; i < leaf_end; ++i) {
1343 int idx = indices[i];
1344 if (++votes(idx) == vote_threshold)
1345 elected(n_elected++) = idx;
1350 std::pair<double,double> fit_projection_times(
const Eigen::Map<const Eigen::MatrixXf> &Q,
1351 std::vector<int> &exact_x) {
1352 std::vector<double> projection_times, projection_x;
1353 long double idx_sum = 0;
1355 std::vector<int> tested_trees {1,2,3,4,5,7,10,15,20,25,30,40,50};
1356 generate_x(tested_trees, n_trees, 10, n_trees);
1358 for (
int d = depth_min; d <= depth; ++d) {
1359 for (
int i = 0; i < (int) tested_trees.size(); ++i) {
1360 int t = tested_trees[i];
1361 int n_random_vectors = t * d;
1362 projection_x.push_back(n_random_vectors);
1363 Eigen::SparseMatrix<float, Eigen::RowMajor> sparse_mat;
1364 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> dense_mat;
1367 build_sparse_random_matrix(sparse_mat, n_random_vectors, dim, density);
1369 build_dense_random_matrix(dense_mat, n_random_vectors, dim);
1372 double start_proj = omp_get_wtime();
1373 Eigen::VectorXf projected_query(n_random_vectors);
1376 projected_query.noalias() = sparse_mat * Q.col(0);
1378 projected_query.noalias() = dense_mat * Q.col(0);
1381 double end_proj = omp_get_wtime();
1382 projection_times.push_back(end_proj - start_proj);
1383 idx_sum += projected_query.norm();
1385 int votes_index = votes_max < t ? votes_max : t;
1386 for (
int v = 1; v <= votes_index; ++v) {
1387 int cs_size = get_candidate_set_size(t, d, v);
1388 if (cs_size > 0) exact_x.push_back(cs_size);
1394 projection_x[0] += idx_sum > 1.0 ? 0.0000 : 0.0001;
1395 return fit_theil_sen(projection_x, projection_times);
1398 std::vector<std::map<int,std::pair<double,double>>> fit_voting_times(
const Eigen::Map<const Eigen::MatrixXf> &Q) {
1399 int n_test = Q.cols();
1401 std::random_device rd;
1402 std::mt19937 rng(rd());
1403 std::uniform_int_distribution<int> uni(0, n_test - 1);
1405 std::vector<int> tested_trees {1,2,3,4,5,7,10,15,20,25,30,40,50};
1406 generate_x(tested_trees, n_trees, 10, n_trees);
1407 std::vector<int> vote_thresholds_x {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
1408 generate_x(vote_thresholds_x, votes_max, 10, votes_max);
1410 beta_voting = std::vector<std::map<int,std::pair<double,double>>>();
1412 for (
int d = depth_min; d <= depth; ++d) {
1413 std::map<int,std::pair<double,double>> beta;
1414 for (
const auto &v : vote_thresholds_x) {
1415 long double idx_sum = 0;
1416 std::vector<double> voting_times, voting_x;
1418 for (
int i = 0; i < (int) tested_trees.size(); ++i) {
1419 int t = tested_trees[i];
1421 Eigen::VectorXi elected;
1424 Eigen::VectorXf projected_query(n_trees * depth);
1426 projected_query.noalias() = sparse_random_matrix * Q.col(ri);
1428 projected_query.noalias() = dense_random_matrix * Q.col(ri);
1431 double start_voting = omp_get_wtime();
1432 vote(projected_query, v, elected, n_el, t, d);
1433 double end_voting = omp_get_wtime();
1435 voting_times.push_back(end_voting - start_voting);
1436 voting_x.push_back(t);
1437 for (
int i = 0; i < n_el; ++i)
1438 idx_sum += elected(i);
1440 voting_x[0] += idx_sum > 1.0 ? 0.0 : 0.00001;
1441 beta[v] = fit_theil_sen(voting_x, voting_times);
1443 beta_voting.push_back(beta);
1449 static void generate_x(std::vector<int> &x,
int max_generated,
int n_tested,
int max_val) {
1450 n_tested = max_generated > n_tested ? n_tested : max_val;
1451 int increment = max_generated / n_tested;
1452 for (
int i = 1; i <= n_tested; ++i) {
1453 if (std::find(x.begin(), x.end(), i * increment) == x.end() && i * increment <= max_generated) {
1454 x.push_back(i * increment);
1458 auto end = std::remove_if(x.begin(), x.end(), [max_val](
int t) {
return t > max_val; });
1459 x.erase(end, x.end());
1462 std::pair<double,double> fit_exact_times(
const Eigen::Map<const Eigen::MatrixXf> &Q) {
1463 std::vector<int> s_tested {1,2,5,10,20,35,50,75,100,150,200,300,400,500};
1464 generate_x(s_tested, n_samples / 20, 20, n_samples);
1466 int n_test = Q.cols();
1467 std::vector<double> exact_times;
1468 long double idx_sum = 0;
1470 std::random_device rd;
1471 std::mt19937 rng(rd());
1472 std::uniform_int_distribution<int> uni(0, n_test - 1);
1473 std::uniform_int_distribution<int> uni2(0, n_samples - 1);
1475 std::vector<double> ex;
1477 for (
int i = 0; i < (int) s_tested.size(); ++i) {
1478 double mean_exact_time = 0;
1479 int s_size = s_tested[i];
1480 ex.push_back(s_size);
1482 for (
int m = 0; m < n_sim; ++m) {
1484 Eigen::VectorXi elected(s_size);
1485 for (
int j = 0; j < elected.size(); ++j)
1486 elected(j) = uni2(rng);
1488 double start_exact = omp_get_wtime();
1489 std::vector<int> res(k);
1490 exact_knn(Eigen::Map<const Eigen::VectorXf>(Q.data() + ri * dim, dim), k, elected, s_size, &res[0]);
1491 double end_exact = omp_get_wtime();
1492 mean_exact_time += (end_exact - start_exact);
1494 for (
int l = 0; l < k; ++l)
1497 mean_exact_time /= n_sim;
1498 exact_times.push_back(mean_exact_time);
1501 ex[0] += idx_sum > 1.0 ? 0.0 : 0.00001;
1502 return fit_theil_sen(ex, exact_times);
1505 std::set<Mrpt_Parameters,decltype(is_faster)*> list_parameters(
const std::vector<Eigen::MatrixXd> &recalls) {
1506 std::set<Mrpt_Parameters,decltype(is_faster)*> pars(is_faster);
1507 std::vector<Eigen::MatrixXd> query_times(depth - depth_min + 1);
1508 for (
int d = depth_min; d <= depth; ++d) {
1509 Eigen::MatrixXd query_time = Eigen::MatrixXd::Zero(votes_max, n_trees);
1511 for (
int t = 1; t <= n_trees; ++t) {
1512 int votes_index = votes_max < t ? votes_max : t;
1513 for (
int v = 1; v <= votes_index; ++v) {
1514 double qt = get_query_time(t, d, v);
1515 query_time(v - 1, t - 1) = qt;
1527 query_times[d - depth_min] = query_time;
1533 std::set<Mrpt_Parameters,decltype(is_faster)*> pareto_frontier(
const std::set<
Mrpt_Parameters,decltype(is_faster)*> &pars) {
1534 opt_pars = std::set<Mrpt_Parameters,decltype(is_faster)*>(is_faster);
1535 double best_recall = -1.0;
1536 for (
const auto &p : pars) {
1537 if (p.estimated_recall > best_recall) {
1539 best_recall = p.estimated_recall;
1546 void fit_times(
const Eigen::Map<const Eigen::MatrixXf> &Q) {
1547 std::vector<int> exact_x;
1548 beta_projection = fit_projection_times(Q, exact_x);
1549 beta_voting = fit_voting_times(Q);
1550 beta_exact = fit_exact_times(Q);
1553 static std::pair<double,double> fit_theil_sen(
const std::vector<double> &x,
1554 const std::vector<double> &y) {
1556 std::vector<double> slopes;
1557 for (
int i = 0; i < n; ++i) {
1558 for (
int j = 0; j < n; ++j) {
1560 slopes.push_back((y[j] - y[i]) / (x[j] - x[i]));
1564 int n_slopes = slopes.size();
1565 std::nth_element(slopes.begin(), slopes.begin() + n_slopes / 2, slopes.end());
1566 double slope = *(slopes.begin() + n_slopes / 2);
1568 std::vector<double> residuals(n);
1569 for (
int i = 0; i < n; ++i)
1570 residuals[i] = y[i] - slope * x[i];
1572 std::nth_element(residuals.begin(), residuals.begin() + n / 2, residuals.end());
1573 double intercept = *(residuals.begin() + n / 2);
1575 return std::make_pair(intercept, slope);
1583 fwrite(&p->
n_trees,
sizeof(
int), 1, fd);
1584 fwrite(&p->
depth,
sizeof(
int), 1, fd);
1585 fwrite(&p->
votes,
sizeof(
int), 1, fd);
1586 fwrite(&p->
k,
sizeof(
int), 1, fd);
1592 fread(&p->
n_trees,
sizeof(
int), 1, fd);
1593 fread(&p->
depth,
sizeof(
int), 1, fd);
1594 fread(&p->
votes,
sizeof(
int), 1, fd);
1595 fread(&p->
k,
sizeof(
int), 1, fd);
1600 void write_parameter_list(
const std::set<
Mrpt_Parameters,decltype(is_faster)*> &pars, FILE *fd)
const {
1605 int par_sz = pars.size();
1606 fwrite(&par_sz,
sizeof(
int), 1, fd);
1608 for (
const auto p : pars)
1609 write_parameters(&p, fd);
1612 void read_parameter_list(FILE *fd) {
1617 opt_pars = std::set<Mrpt_Parameters,decltype(is_faster)*>(is_faster);
1619 fread(&par_sz,
sizeof(
int), 1, fd);
1621 for (
int i = 0; i < par_sz; ++i) {
1623 read_parameters(&p, fd);
1629 double tr = target_recall - epsilon;
1630 for (
const auto &p : opt_pars) {
1636 if (!opt_pars.empty()) {
1637 return *(opt_pars.rbegin());
1648 static void count_leaf_sizes(
int n,
int level,
int tree_depth, std::vector<int> &out_leaf_sizes) {
1649 if (level == tree_depth) {
1650 out_leaf_sizes.push_back(n);
1654 count_leaf_sizes(n - n / 2, level + 1, tree_depth, out_leaf_sizes);
1655 count_leaf_sizes(n / 2, level + 1, tree_depth, out_leaf_sizes);
1664 static void count_first_leaf_indices(std::vector<int> &indices,
int n,
int depth) {
1665 std::vector<int> leaf_sizes;
1666 count_leaf_sizes(n, 0, depth, leaf_sizes);
1668 indices = std::vector<int>(leaf_sizes.size() + 1);
1670 for (
int i = 0; i < (int) leaf_sizes.size(); ++i)
1671 indices[i + 1] = indices[i] + leaf_sizes[i];
1674 static void count_first_leaf_indices_all(std::vector<std::vector<int>> &indices,
int n,
int depth_max) {
1675 for (
int d = 0; d <= depth_max; ++d) {
1676 std::vector<int> idx;
1677 count_first_leaf_indices(idx, n, d);
1678 indices.push_back(idx);
1682 static double predict_theil_sen(
double x, std::pair<double,double> beta) {
1683 return beta.first + beta.second * x;
1686 double get_candidate_set_size(
int tree,
int depth,
int v)
const {
1687 return cs_sizes[depth - depth_min](v - 1, tree - 1);
1690 double get_projection_time(
int n_trees,
int depth,
int v)
const {
1691 return predict_theil_sen(n_trees * depth, beta_projection);
1694 double get_voting_time(
int n_trees,
int depth,
int v)
const {
1695 const std::map<int,std::pair<double,double>> &beta = beta_voting[depth - depth_min];
1697 if (v <= 0 || beta.empty()) {
1701 for (
const auto &b : beta) {
1703 return predict_theil_sen(n_trees, b.second);
1707 return predict_theil_sen(n_trees, beta.rbegin()->second);
1710 double get_exact_time(
int n_trees,
int depth,
int v)
const {
1711 return predict_theil_sen(get_candidate_set_size(n_trees, depth, v), beta_exact);
1714 double get_query_time(
int tree,
int depth,
int v)
const {
1715 return get_projection_time(tree, depth, v)
1716 + get_voting_time(tree, depth, v)
1717 + get_exact_time(tree, depth, v);
1720 std::vector<int> sample_indices(
int n_test,
int seed = 0)
const {
1721 std::random_device rd;
1722 int s = seed ? seed : rd();
1723 std::mt19937 gen(s);
1725 std::vector<int> indices_data(n_samples);
1726 std::iota(indices_data.begin(), indices_data.end(), 0);
1727 std::shuffle(indices_data.begin(), indices_data.end(), gen);
1728 return std::vector<int>(indices_data.begin(), indices_data.begin() + n_test);
1731 Eigen::MatrixXf
subset(
const std::vector<int> &indices)
const {
1732 int n_test = indices.size();
1733 Eigen::MatrixXf Q = Eigen::MatrixXf(dim, n_test);
1734 for(
int i = 0; i < n_test; ++i)
1735 Q.col(i) = X.col(indices[i]);
1741 const Eigen::Map<const Eigen::MatrixXf> X;
1742 Eigen::MatrixXf split_points;
1743 std::vector<std::vector<int>> tree_leaves;
1744 Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> dense_random_matrix;
1745 Eigen::SparseMatrix<float, Eigen::RowMajor> sparse_random_matrix;
1746 std::vector<std::vector<int>> leaf_first_indices_all;
1747 std::vector<int> leaf_first_indices;
1749 const int n_samples;
1754 float density = -1.0;
1759 enum itype {normal, autotuned, autotuned_unpruned};
1760 itype index_type = normal;
1765 const double epsilon = 0.0001;
1766 std::vector<Eigen::MatrixXd> cs_sizes;
1767 std::pair<double,double> beta_projection, beta_exact;
1768 std::vector<std::map<int,std::pair<double,double>>> beta_voting;
1769 std::set<Mrpt_Parameters,decltype(is_faster)*> opt_pars;
1772 #endif // CPP_MRPT_H_ void query(const Eigen::Ref< const Eigen::VectorXf > &q, int k, int vote_threshold, int *out, float *out_distances=nullptr, int *out_n_elected=nullptr) const
Definition: Mrpt.h:740
void query(const float *q, int *out, float *out_distances=nullptr, int *out_n_elected=nullptr) const
Definition: Mrpt.h:767
bool is_autotuned() const
Definition: Mrpt.h:298
void query(const float *data, int k, int vote_threshold, int *out, float *out_distances=nullptr, int *out_n_elected=nullptr) const
Definition: Mrpt.h:661
int votes
Definition: Mrpt.h:23
void grow(const float *data, int n_test, int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density_=-1.0, int seed=0, const std::vector< int > &indices_test={})
Definition: Mrpt.h:344
Mrpt_Parameters parameters() const
Definition: Mrpt.h:281
std::vector< Mrpt_Parameters > optimal_parameters() const
Definition: Mrpt.h:625
bool load(const char *path)
Definition: Mrpt.h:958
int depth
Definition: Mrpt.h:21
double estimated_recall
Definition: Mrpt.h:25
static void exact_knn(const float *q_data, const float *X_data, int dim, int n_samples, int k, int *out, float *out_distances=nullptr)
Definition: Mrpt.h:814
void exact_knn(const float *q, int k, int *out, float *out_distances=nullptr) const
Definition: Mrpt.h:874
void grow_autotune(double target_recall, int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density_=-1.0, int seed=0, int n_test=100)
Definition: Mrpt.h:256
Mrpt(const float *X_, int dim_, int n_samples_)
Definition: Mrpt.h:60
int n_trees
Definition: Mrpt.h:20
static void exact_knn(const Eigen::Ref< const Eigen::VectorXf > &q, const Eigen::Ref< const Eigen::MatrixXf > &X, int k, int *out, float *out_distances=nullptr)
Definition: Mrpt.h:862
void grow(double target_recall, const float *Q, int n_test, int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density=-1.0, int seed=0, const std::vector< int > &indices_test={})
Definition: Mrpt.h:218
void query(const Eigen::Ref< const Eigen::VectorXf > &q, int *out, float *out_distances=nullptr, int *out_n_elected=nullptr) const
Definition: Mrpt.h:788
Mrpt * subset_pointer(double target_recall) const
Definition: Mrpt.h:577
void grow(int n_trees_, int depth_, float density_=-1.0, int seed=0)
Definition: Mrpt.h:84
bool save(const char *path) const
Definition: Mrpt.h:905
void grow(const Eigen::Ref< const Eigen::MatrixXf > &Q, int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density_=-1.0, int seed=0)
Definition: Mrpt.h:468
void exact_knn(const Eigen::Ref< const Eigen::VectorXf > &q, int k, int *out, float *out_distances=nullptr) const
Definition: Mrpt.h:884
bool empty() const
Definition: Mrpt.h:1029
int k
Definition: Mrpt.h:22
void grow(double target_recall, const Eigen::Ref< const Eigen::MatrixXf > &Q, int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density=-1.0, int seed=0)
Definition: Mrpt.h:178
Mrpt(const Eigen::Ref< const Eigen::MatrixXf > &X_)
Definition: Mrpt.h:49
Mrpt subset(double target_recall) const
Definition: Mrpt.h:527
double estimated_qtime
Definition: Mrpt.h:24
void grow_autotune(int k_, int trees_max=-1, int depth_max=-1, int depth_min_=-1, int votes_max_=-1, float density_=-1.0, int seed=0, int n_test=100)
Definition: Mrpt.h:503