k-d tree(k-dimensional tree)

k-d tree(k-dimensional tree)는 k-차원의 공간 내에서 점들을 구성하기 위한 공간 분할 자료구조로 binary tree의 특수한 경우입니다. K-d tree는 다차원의 탐색 키를 사용하는 탐색 알고리즘에 유용하게 사용됩니다. 예를 들자면, SIFT나 SURF로 추출한 특징점들은 높은 차원의 벡터로 표시되는데 이 특징점들을 DB에 저장된 특징점들과 비교하여 유사도가 가장 높은 것을 찾기위해 사용됩니다.

Insertion

k-d tree에 노드를 삽입하는 방법은 binary search tree와 유사하게 동작합니다. binary search tree의 insertion을 참고하면 이해가 쉽습니다.

Search

k-d tree에서 노드를 검색하는 방법은 주로 두 가지 방법을 사용합니다. 첫번째가 range search 방법으로 찾고자 하는 키 값의 범위를 정하고 이 범위에 포함되는 노드를 찾게 됩니다. 이 탐색 방법도 binary search tree에서 탐색 방법과 유사합니다. 두번째는 nearest neighbour search 방법으로 주어진 키 값에 가장 근접한 노드를 찾는 것으로 구현이 좀 까다롭습니다. 여기서 사용한 방법은 hyper cube를 사용하여 노드의 양쪽으로 공간을 분할해 가면서 찾는 방법입니다.

Deletion

노드를 tree에서 직접 삭제하는 것은 좀 까다롭습니다. 좀 쉬운 대안으로 노드에 삭제 플래그를 만들고 노드가 삭제될 때 삭제 플래그를 true로 만드는 방법이 있을 것입니다. 여기서는 노드의 삭제는 구현되어있지 않습니다.

k-d tree에서 중요한 이슈로 밸런싱을 유지하는 것이 될 것입니다. 본 예제 프로그램에서는 밸런싱은 구현되어있지 않습니다.

kd_tree.h 파일

#pragma once

#include <math.h>
#include <list>

using namespace std;

// k-D tree 클래스
// T - k-D tree 노드 위치의 데이터 형
// DIM - k-D tree 노드 위치의 차원(데이터 수)
template <class T, int DIM>
class CKdTree {
public:
    // 노드의 위치를 포함하는 하이퍼 큐브 구조체
    struct sHyperCube
    {
        // 하이퍼 큐브의 범위(low, high)
        T _low[DIM], _high[DIM];

        sHyperCube (const T low[DIM], const T high[DIM])
        {
            // 초기화
            memcpy (_low,  low,  DIM*sizeof(T));
            memcpy (_high, high, DIM*sizeof(T));
        }

        void extend (const T pos[DIM])
        {
            // 노드가 tree에 추가될 때, 이 노드의 위치를 하이퍼 큐브가 포함하도록 범위를 조절한다.
            // 즉, 노드의 위치가 하이퍼 큐브 바깥에 있다면 하이퍼 큐브를 노드의 위치까지 확장한다.
            for (int i=0; i<DIM; i++) {
                if (pos[i] < _low[i]) _low[i] = pos[i];
                if (pos[i] > _high[i]) _high[i] = pos[i];
            }
        }

        T distance (const T pos[DIM])
        {
            // 하이퍼 큐브의 표면에서 노드의 위치(pos)까지 떨어진 거리를 계산한다.
            // 만일 노드가 하이퍼 큐브 내에 존재한다면 거리는 0이다.
            T d = T(0);

            for (int i=0; i<DIM; i++) {
                if (pos[i] < _low[i]) {
                    T dx = _low[i] - pos[i];
                    d += dx*dx;
                }
                else if (pos[i] > _high[i]) {
                    T dx = _high[i] - pos[i];
                    d += dx*dx;
                }
            }
            return sqrt(d);
        }
    };

    struct sKdNode
    {
        T _pos[DIM];        // 노드의 위치
        void *_data;        // 노드가 가지는 추가적인 데이터
        sKdNode *_left;     // 노드의 왼쪽(작은 값)
        sKdNode *_right;    // 노드의 오른쪽(큰 값)

        sKdNode (const T pos[DIM], void *data)
            : _data(data), _left(NULL), _right(NULL)
        {
            memcpy (_pos, pos, DIM*sizeof (T));
        }

        ~sKdNode ()
        {
            if (_left)
                delete _left;
            if (_right) delete _right;
        }
    };

public:
    // k-차원의 tree를 생성한다.
    CKdTree () : _root(NULL), _cube(NULL) { }

    ~CKdTree ()
    {
        if (_root) delete _root;
        if (_cube) delete _cube;
    }

    // k-D tree에 하나의 노드를 추가한다.
    void insert (const T pos[DIM], void *data)
    {
        insert_i (_root, pos, data, 0);

        // 하이퍼 큐브가 만들어지지 않았다면 새로 만들고
        // 이미 만들어져 있다면 새로운 점을 포함하도록 확장한다.
        if (!_cube) _cube = new sHyperCube (pos, pos);
        else        _cube->extend (pos);
    }

    // 찾고자 하는 위치(pos)에서 가장 가까운(nearest neighbour) 하나의 노드를 찾는다.
    sKdNode *nn_search (const T pos[DIM])
    {
        if (!_cube || !_root) return NULL;

        // 하이퍼 큐브 복사, 임시로 사용할 큐브를 만든다.
        sHyperCube tmp (_cube->_low, _cube->_high);

        // 가장 가까운 노드를 저장할 변수, 우선 root를 설정해 둔다.
        _res_min_node = _root;
        _res_min_dist = distance (_root->_pos, pos);

        // _root 노드부터 NN(nearest neighbour)를 반복적으로 찾는다.
        nn_search_i (_root, pos, &tmp, 0);

        return _res_min_node;
    }

    // low와 high 범위 안에 포함되는 모든 점을 찾는다.
    list<sKdNode *> *range_search (const T low[DIM], const T high[DIM])
    {
        _res_node.clear ();

        range_search_i (_root, low, high, 0);

        return &_res_node;
    }

private:
    inline T distance (const T pos1[DIM], const T pos2[DIM])
    {
        // 두 점(pos1, pos2)간의 유클리디안 거리를 계산한다.
        T d = T(0);

        for (int i=0; i<DIM; i++) {
            T dx = pos1[i] - pos2[i];
            d += dx*dx;
        }
        return sqrt(d);
    }

    inline bool compare (const T low[DIM], const T high[DIM], const T pos[DIM])
    {
        // 위치(pos)가 low, high 영역에 들어오는지 검사한다.
        // 영역에 들어오면 true를 리턴한다.
        for (int i=0; i<DIM; ++i) {
            if (pos[i] < low[i] || high[i] < pos[i]) {
                return false;
            }
        }
        return true;
    }

    void insert_i (sKdNode *&node, const T pos[DIM], void *data, int dir)
    {
        // 하나의 노드를 tree에 추가하는 실제 구현,
        // 방법은 이진 트리와 유사하다.
        if (!node) {
            node = new sKdNode(pos, data);
        }
        else {
            if (pos[dir] < node->_pos[dir]) {
                insert_i (node->_left, pos, data, (dir + 1) % DIM);
            }
            else {
                insert_i (node->_right, pos, data, (dir + 1) % DIM);
            }
        }
    }

    void nn_search_i (sKdNode *node, const T pos[DIM], sHyperCube *_cube, int dir)
    {
        if (!node) return;

        // tree내의 노드의 위치(node->pos)와 찾는 위치(pos)를 비교하여 노드의 어느 쪽을 먼저 탐색할 지를 결정한다.
        T dp = pos[dir] - node->_pos[dir];
        if (dp <= 0) {
            // 찾는 위치(pos)가 노드의 왼쪽에 있기때문에 왼쪽 노드를 먼저 탐색한다.

            // 하이퍼 큐브를 node->_pos[dir]위치를 기준으로 반으로 나누어
            // 나누어진 왼쪽 하이퍼 큐브로 노드의 왼쪽을 탐색해 나간다.
            T tmp = _cube->_high[dir];
            _cube->_high[dir] = node->_pos[dir];
            nn_search_i (node->_left, pos, _cube, (dir + 1) % DIM);
            _cube->_high[dir] = tmp;
        }
        else {
            // 하이퍼 큐브를 node->_pos[dir]위치를 기준으로 반으로 나누어
            // 나누어진 오른쪽 하이퍼 큐브로 노드의 오른을 탐색해 나간다.
            T tmp = _cube->_low[dir];
            _cube->_low[dir] = node->_pos[dir];
            nn_search_i (node->_right, pos, _cube, (dir + 1) % DIM);
            _cube->_low[dir] = tmp;
        }

        // 현재 노드의 위치와 찾는 위치(pos)간의 거리를 계산하여 이 값을 지금까지의 최소 거리 값과 비교한다.
        // 만일, 더 가깝다면 최소 거리와 노드의 포인터를 업데이트 한다.
        T d = distance (node->_pos, pos);
        if (d < _res_min_dist) {
            _res_min_node = node;
            _res_min_dist = d;
        }

        if (dp <= 0) {
            // dp <= 0인 경우, 위에서 왼쪽 노드를 먼저 탐색했기때문에 이제 오른쪽 노드도 탐색한다.

            T tmp = _cube->_low[dir];
            _cube->_low[dir] = node->_pos[dir];

            // 찾는 위치(pos)와 하이퍼 큐브간의 떨어진 거리를 계산해서 지금까지 찾은 최소 거리보다 길다면,
            // 이 노드의 아래에 있는 tree는 더이상 탐색하지 않아도 된다.
            if (_cube->distance (pos) < _res_min_dist) {
                nn_search_i (node->_right, pos, _cube, (dir + 1) % DIM);
            }
            _cube->_low[dir] = tmp;
        }
        else {
            T tmp = _cube->_high[dir];
            _cube->_high[dir] = node->_pos[dir];
            if (_cube->distance (pos) < _res_min_dist) {
                nn_search_i (node->_left, pos, _cube, (dir + 1) % DIM);
            }
            _cube->_high[dir] = tmp;
        }
    }

    void range_search_i (sKdNode *node, const T low[DIM], const T high[DIM], int dir)
    {
        if (!node) return;

        // 현재 노드의 위치가 찾는 범위(low, hight)안에 들어오면 이 노드를 찾은 결과에 저장한다.
        if (compare (low, high, node->_pos)) {
            _res_node.push_back (node);
        }

        // 노드의 위치가 찾는 범위(low, hight)에 걸쳐 있으면 sub-tree로 찾아들어간다.
        if (low[dir] <= node->_pos[dir]) {
            range_search_i (node->_left, low, high, (dir + 1) % DIM);
        }
        if (node->_pos[dir] <= high[dir]) {
            range_search_i (node->_right, low, high, (dir + 1) % DIM);
        }
    }

private:
    sKdNode *_root;
    sHyperCube *_cube;

    // range search 결과를 저장하는 곳
    list<sKdNode *> _res_node;

    // nearest neighbour search 결과를 저장하는 곳
    sKdNode *_res_min_node;
    T    _res_min_dist;
};

VC++ 2008 프로젝트 파일:

kd_tree.zip

0.01MB