--- /dev/null
+/***********************************************************************
+ * Software License Agreement (BSD License)
+ *
+ * Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
+ * Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
+ *
+ * THE BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *************************************************************************/
+
+#ifndef KDTREE_H
+#define KDTREE_H
+
+#include <algorithm>
+#include <map>
+#include <cassert>
+
+#include "heap.h"
+#include "common.h"
+#include "constants.h"
+#include "allocator.h"
+#include "matrix.h"
+#include "result_set.h"
+#include "random.h"
+#include "nn_index.h"
+#include "saving.h"
+
+using namespace std;
+
+
+namespace flann
+{
+
+
+/**
+ * Randomized kd-tree index
+ *
+ * Contains the k-d trees and other information for indexing a set of points
+ * for nearest-neighbor matching.
+ */
+class KDTreeIndex : public NNIndex
+{
+
+ enum {
+ /**
+ * To improve efficiency, only SAMPLE_MEAN random values are used to
+ * compute the mean and variance at each level when building a tree.
+ * A value of 100 seems to perform as well as using all values.
+ */
+ SAMPLE_MEAN = 100,
+ /**
+ * Top random dimensions to consider
+ *
+ * When creating random trees, the dimension on which to subdivide is
+ * selected at random from among the top RAND_DIM dimensions with the
+ * highest variance. A value of 5 works well.
+ */
+ RAND_DIM=5
+ };
+
+
+ /**
+ * Number of randomized trees that are used
+ */
+ int numTrees;
+
+ /**
+ * Array of indices to vectors in the dataset. When doing lookup,
+ * this is used instead to mark checkID.
+ */
+ int* vind;
+
+ /**
+ * An unique ID for each lookup.
+ */
+ int checkID;
+
+ /**
+ * The dataset used by this index
+ */
+ const Matrix<float> dataset;
+
+ int size_;
+ int veclen_;
+
+
+ float* mean;
+ float* var;
+
+
+ /*--------------------- Internal Data Structures --------------------------*/
+
+ /**
+ * A node of the binary k-d tree.
+ *
+ * This is All nodes that have vec[divfeat] < divval are placed in the
+ * child1 subtree, else child2., A leaf node is indicated if both children are NULL.
+ */
+ struct TreeSt {
+ /**
+ * Index of the vector feature used for subdivision.
+ * If this is a leaf node (both children are NULL) then
+ * this holds vector index for this leaf.
+ */
+ int divfeat;
+ /**
+ * The value used for subdivision.
+ */
+ float divval;
+ /**
+ * The child nodes.
+ */
+ TreeSt *child1, *child2;
+ };
+ typedef TreeSt* Tree;
+
+ /**
+ * Array of k-d trees used to find neighbors.
+ */
+ Tree* trees;
+ typedef BranchStruct<Tree> BranchSt;
+ typedef BranchSt* Branch;
+ /**
+ * Priority queue storing intermediate branches in the best-bin-first search
+ */
+ Heap<BranchSt>* heap;
+
+
+ /**
+ * Pooled memory allocator.
+ *
+ * Using a pooled memory allocator is more efficient
+ * than allocating memory directly when there is a large
+ * number small of memory allocations.
+ */
+ PooledAllocator pool;
+
+
+
+public:
+
+ flann_algorithm_t getType() const
+ {
+ return KDTREE;
+ }
+
+ /**
+ * KDTree constructor
+ *
+ * Params:
+ * inputData = dataset with the input features
+ * params = parameters passed to the kdtree algorithm
+ */
+ KDTreeIndex(const Matrix<float>& inputData, const KDTreeIndexParams& params = KDTreeIndexParams() ) : dataset(inputData)
+ {
+ size_ = dataset.rows;
+ veclen_ = dataset.cols;
+
+ numTrees = params.trees;
+ trees = new Tree[numTrees];
+
+ // get the parameters
+// if (params.find("trees") != params.end()) {
+// numTrees = (int)params["trees"];
+// trees = new Tree[numTrees];
+// }
+// else {
+// numTrees = -1;
+// trees = NULL;
+// }
+ heap = new Heap<BranchSt>(size_);
+ checkID = -1000;
+
+ // Create a permutable array of indices to the input vectors.
+ vind = new int[size_];
+ for (int i = 0; i < size_; i++) {
+ vind[i] = i;
+ }
+
+ mean = new float[veclen_];
+ var = new float[veclen_];
+ }
+
+ /**
+ * Standard destructor
+ */
+ ~KDTreeIndex()
+ {
+ delete[] vind;
+ if (trees!=NULL) {
+ delete[] trees;
+ }
+ delete heap;
+ delete[] mean;
+ delete[] var;
+ }
+
+
+ /**
+ * Builds the index
+ */
+ void buildIndex()
+ {
+ /* Construct the randomized trees. */
+ for (int i = 0; i < numTrees; i++) {
+ /* Randomize the order of vectors to allow for unbiased sampling. */
+ for (int j = size_; j > 0; --j) {
+// int rand = cast(int) (drand48() * size);
+ int rnd = rand_int(j);
+ assert(rnd >=0 && rnd < size_);
+ swap(vind[j-1], vind[rnd]);
+ }
+ trees[i] = NULL;
+ divideTree(&trees[i], 0, size_ - 1);
+ }
+ }
+
+
+
+ void saveIndex(FILE* stream)
+ {
+ save_header(stream, *this);
+ save_value(stream, numTrees);
+ for (int i=0;i<numTrees;++i) {
+ save_tree(stream, trees[i]);
+ }
+ }
+
+
+
+ void loadIndex(FILE* stream)
+ {
+ IndexHeader header = load_header(stream);
+
+ if (header.rows!=size() || header.cols!=veclen()) {
+ throw FLANNException("The index saved belongs to a different dataset");
+ }
+ load_value(stream, numTrees);
+
+ if (trees!=NULL) {
+ delete[] trees;
+ }
+ trees = new Tree[numTrees];
+ for (int i=0;i<numTrees;++i) {
+ load_tree(stream,trees[i]);
+ }
+ }
+
+
+
+ /**
+ * Returns size of index.
+ */
+ int size() const
+ {
+ return size_;
+ }
+
+ /**
+ * Returns the length of an index feature.
+ */
+ int veclen() const
+ {
+ return veclen_;
+ }
+
+
+ /**
+ * Computes the inde memory usage
+ * Returns: memory used by the index
+ */
+ int usedMemory() const
+ {
+ return pool.usedMemory+pool.wastedMemory+dataset.rows*sizeof(int); // pool memory and vind array memory
+ }
+
+
+ /**
+ * Find set of nearest neighbors to vec. Their indices are stored inside
+ * the result object.
+ *
+ * Params:
+ * result = the result object in which the indices of the nearest-neighbors are stored
+ * vec = the vector for which to search the nearest neighbors
+ * maxCheck = the maximum number of restarts (in a best-bin-first manner)
+ */
+ void findNeighbors(ResultSet& result, const float* vec, const SearchParams& searchParams)
+ {
+ int maxChecks = searchParams.checks;
+
+ if (maxChecks<0) {
+ getExactNeighbors(result, vec);
+ } else {
+ getNeighbors(result, vec, maxChecks);
+ }
+ }
+
+
+ void continueSearch(ResultSet& result, float* vec, int maxCheck)
+ {
+ BranchSt branch;
+
+ int checkCount = 0;
+
+ /* Keep searching other branches from heap until finished. */
+ while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) {
+ searchLevel(result, vec, branch.node,branch.mindistsq, checkCount, maxCheck);
+ }
+
+ assert(result.full());
+ }
+
+
+// Params estimateSearchParams(float precision, Dataset<float>* testset = NULL)
+// {
+// Params params;
+//
+// return params;
+// }
+
+
+private:
+
+
+ void save_tree(FILE* stream, Tree tree)
+ {
+ save_value(stream, *tree);
+ if (tree->child1!=NULL) {
+ save_tree(stream, tree->child1);
+ }
+ if (tree->child2!=NULL) {
+ save_tree(stream, tree->child2);
+ }
+ }
+
+
+ void load_tree(FILE* stream, Tree& tree)
+ {
+ tree = pool.allocate<TreeSt>();
+ load_value(stream, *tree);
+ if (tree->child1!=NULL) {
+ load_tree(stream, tree->child1);
+ }
+ if (tree->child2!=NULL) {
+ load_tree(stream, tree->child2);
+ }
+ }
+
+
+ /**
+ * Create a tree node that subdivides the list of vecs from vind[first]
+ * to vind[last]. The routine is called recursively on each sublist.
+ * Place a pointer to this new tree node in the location pTree.
+ *
+ * Params: pTree = the new node to create
+ * first = index of the first vector
+ * last = index of the last vector
+ */
+ void divideTree(Tree* pTree, int first, int last)
+ {
+ Tree node;
+
+ node = pool.allocate<TreeSt>(); // allocate memory
+ *pTree = node;
+
+ /* If only one exemplar remains, then make this a leaf node. */
+ if (first == last) {
+ node->child1 = node->child2 = NULL; /* Mark as leaf node. */
+ node->divfeat = vind[first]; /* Store index of this vec. */
+ } else {
+ chooseDivision(node, first, last);
+ subdivide(node, first, last);
+ }
+ }
+
+
+
+
+ /**
+ * Choose which feature to use in order to subdivide this set of vectors.
+ * Make a random choice among those with the highest variance, and use
+ * its variance as the threshold value.
+ */
+ void chooseDivision(Tree node, int first, int last)
+ {
+ memset(mean,0,veclen_*sizeof(float));
+ memset(var,0,veclen_*sizeof(float));
+
+ /* Compute mean values. Only the first SAMPLE_MEAN values need to be
+ sampled to get a good estimate.
+ */
+ int end = min(first + SAMPLE_MEAN, last);
+ int count = end - first + 1;
+ for (int j = first; j <= end; ++j) {
+ float* v = dataset[vind[j]];
+ for (int k=0; k<veclen_; ++k) {
+ mean[k] += v[k];
+ }
+ }
+ for (int k=0; k<veclen_; ++k) {
+ mean[k] /= count;
+ }
+
+ /* Compute variances (no need to divide by count). */
+ for (int j = first; j <= end; ++j) {
+ float* v = dataset[vind[j]];
+ for (int k=0; k<veclen_; ++k) {
+ float dist = v[k] - mean[k];
+ var[k] += dist * dist;
+ }
+ }
+ /* Select one of the highest variance indices at random. */
+ node->divfeat = selectDivision(var);
+ node->divval = mean[node->divfeat];
+
+ }
+
+
+ /**
+ * Select the top RAND_DIM largest values from v and return the index of
+ * one of these selected at random.
+ */
+ int selectDivision(float* v)
+ {
+ int num = 0;
+ int topind[RAND_DIM];
+
+ /* Create a list of the indices of the top RAND_DIM values. */
+ for (int i = 0; i < veclen_; ++i) {
+ if (num < RAND_DIM || v[i] > v[topind[num-1]]) {
+ /* Put this element at end of topind. */
+ if (num < RAND_DIM) {
+ topind[num++] = i; /* Add to list. */
+ }
+ else {
+ topind[num-1] = i; /* Replace last element. */
+ }
+ /* Bubble end value down to right location by repeated swapping. */
+ int j = num - 1;
+ while (j > 0 && v[topind[j]] > v[topind[j-1]]) {
+ swap(topind[j], topind[j-1]);
+ --j;
+ }
+ }
+ }
+ /* Select a random integer in range [0,num-1], and return that index. */
+// int rand = cast(int) (drand48() * num);
+ int rnd = rand_int(num);
+ assert(rnd >=0 && rnd < num);
+ return topind[rnd];
+ }
+
+
+ /**
+ * Subdivide the list of exemplars using the feature and division
+ * value given in this node. Call divideTree recursively on each list.
+ */
+ void subdivide(Tree node, int first, int last)
+ {
+ /* Move vector indices for left subtree to front of list. */
+ int i = first;
+ int j = last;
+ while (i <= j) {
+ int ind = vind[i];
+ float val = dataset[ind][node->divfeat];
+ if (val < node->divval) {
+ ++i;
+ } else {
+ /* Move to end of list by swapping vind i and j. */
+ swap(vind[i], vind[j]);
+ --j;
+ }
+ }
+ /* If either list is empty, it means we have hit the unlikely case
+ in which all remaining features are identical. Split in the middle
+ to maintain a balanced tree.
+ */
+ if ( (i == first) || (i == last+1)) {
+ i = (first+last+1)/2;
+ }
+
+ divideTree(& node->child1, first, i - 1);
+ divideTree(& node->child2, i, last);
+ }
+
+
+
+ /**
+ * Performs an exact nearest neighbor search. The exact search performs a full
+ * traversal of the tree.
+ */
+ void getExactNeighbors(ResultSet& result, const float* vec)
+ {
+ checkID -= 1; /* Set a different unique ID for each search. */
+
+ if (numTrees > 1) {
+ fprintf(stderr,"It doesn't make any sense to use more than one tree for exact search");
+ }
+ if (numTrees>0) {
+ searchLevelExact(result, vec, trees[0], 0.0);
+ }
+ assert(result.full());
+ }
+
+ /**
+ * Performs the approximate nearest-neighbor search. The search is approximate
+ * because the tree traversal is abandoned after a given number of descends in
+ * the tree.
+ */
+ void getNeighbors(ResultSet& result, const float* vec, int maxCheck)
+ {
+ int i;
+ BranchSt branch;
+
+ int checkCount = 0;
+ heap->clear();
+ checkID -= 1; /* Set a different unique ID for each search. */
+
+ /* Search once through each tree down to root. */
+ for (i = 0; i < numTrees; ++i) {
+ searchLevel(result, vec, trees[i], 0.0, checkCount, maxCheck);
+ }
+
+ /* Keep searching other branches from heap until finished. */
+ while ( heap->popMin(branch) && (checkCount < maxCheck || !result.full() )) {
+ searchLevel(result, vec, branch.node,branch.mindistsq, checkCount, maxCheck);
+ }
+
+ assert(result.full());
+ }
+
+
+ /**
+ * Search starting from a given node of the tree. Based on any mismatches at
+ * higher levels, all exemplars below this level must have a distance of
+ * at least "mindistsq".
+ */
+ void searchLevel(ResultSet& result, const float* vec, Tree node, float mindistsq, int& checkCount, int maxCheck)
+ {
+ if (result.worstDist()<mindistsq) {
+// printf("Ignoring branch, too far\n");
+ return;
+ }
+
+ float val, diff;
+ Tree bestChild, otherChild;
+
+ /* If this is a leaf node, then do check and return. */
+ if (node->child1 == NULL && node->child2 == NULL) {
+
+ /* Do not check same node more than once when searching multiple trees.
+ Once a vector is checked, we set its location in vind to the
+ current checkID.
+ */
+ if (vind[node->divfeat] == checkID || checkCount>=maxCheck) {
+ if (result.full()) return;
+ }
+ checkCount++;
+ vind[node->divfeat] = checkID;
+
+ result.addPoint(dataset[node->divfeat],node->divfeat);
+ return;
+ }
+
+ /* Which child branch should be taken first? */
+ val = vec[node->divfeat];
+ diff = val - node->divval;
+ bestChild = (diff < 0) ? node->child1 : node->child2;
+ otherChild = (diff < 0) ? node->child2 : node->child1;
+
+ /* Create a branch record for the branch not taken. Add distance
+ of this feature boundary (we don't attempt to correct for any
+ use of this feature in a parent node, which is unlikely to
+ happen and would have only a small effect). Don't bother
+ adding more branches to heap after halfway point, as cost of
+ adding exceeds their value.
+ */
+
+ float new_distsq = flann_dist(&val, &val+1, &node->divval, mindistsq);
+// if (2 * checkCount < maxCheck || !result.full()) {
+ if (new_distsq < result.worstDist() || !result.full()) {
+ heap->insert( BranchSt::make_branch(otherChild, new_distsq) );
+ }
+
+ /* Call recursively to search next level down. */
+ searchLevel(result, vec, bestChild, mindistsq, checkCount, maxCheck);
+ }
+
+ /**
+ * Performs an exact search in the tree starting from a node.
+ */
+ void searchLevelExact(ResultSet& result, const float* vec, Tree node, float mindistsq)
+ {
+ if (mindistsq>result.worstDist()) {
+ return;
+ }
+
+ float val, diff;
+ Tree bestChild, otherChild;
+
+ /* If this is a leaf node, then do check and return. */
+ if (node->child1 == NULL && node->child2 == NULL) {
+
+ /* Do not check same node more than once when searching multiple trees.
+ Once a vector is checked, we set its location in vind to the
+ current checkID.
+ */
+ if (vind[node->divfeat] == checkID)
+ return;
+ vind[node->divfeat] = checkID;
+
+ result.addPoint(dataset[node->divfeat],node->divfeat);
+ return;
+ }
+
+ /* Which child branch should be taken first? */
+ val = vec[node->divfeat];
+ diff = val - node->divval;
+ bestChild = (diff < 0) ? node->child1 : node->child2;
+ otherChild = (diff < 0) ? node->child2 : node->child1;
+
+
+ /* Call recursively to search next level down. */
+ searchLevelExact(result, vec, bestChild, mindistsq);
+ float new_distsq = flann_dist(&val, &val+1, &node->divval, mindistsq);
+ searchLevelExact(result, vec, otherChild, new_distsq);
+ }
+
+}; // class KDTree
+
+}
+
+#endif //KDTREE_H