[알고리즘] KD 트리 , KDB 트리

프로그래밍/알고리즘 2019. 4. 30. 10:26

개념

이진검색트리를 확장한 것으로 두개 이상의 key 를 사용합니다. 다른 말로 다차원 검색트리로 동일한 레벨에 있는 노드는 모두 동일한 하나의 필드만 이용해서 분기합니다.

현재 그림은 두개의 key 를 갖고있는 KD 트리이다. 왼쪽 key 를 x, 오른쪽 key 를 y 라고 생각 한다면 깊이 1 위치에 A 에서는 처음에 x 를 기준으로 작으면 왼쪽 크면 오른쪽으로 분리한다. 그리고 깊이 2 위치에 B,C 에서는 y 의 값을 기준으로 깊이 3 에서는 x의 값을 기준으로 삽입을 한다.

삭제

1. 자식이 없는 경우

: 노드만 제거

2. 자식이 하나뿐인 경우

: 자식이 둘인 경우와 같이 해결

: 왼쪽 자식만 있는경우, 왼쪽 서브트리 중 노드 r에서의 분기에 사용한 필드 값이 가장 큰 노드를 찾아서 이동

3. 자식이 둘인 경우

: 오른족 서브트리 중 노드 r에서 분기에 사용한 필드의 값이 가장 작은 노드를 찾아서 이동

KDB 트리

KDB 트리는 KD 트리에서 영역의 개념이 추가되서 노드가 영역 노드와 키 노드로 나뉘는 차이점이 있다.

코드

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <time.h>
 
#define MAX_DIM 3
struct kd_node_t{
    double x[MAX_DIM];
    struct kd_node_t *left, *right;
};
 
    inline double
dist(struct kd_node_t *a, struct kd_node_t *b, int dim)
{
    double t, d = 0;
    while (dim--) {
        t = a->x[dim] - b->x[dim];
        d += t * t;
    }
    return d;
}
inline void swap(struct kd_node_t *x, struct kd_node_t *y) {
    double tmp[MAX_DIM];
    memcpy(tmp,  x->x, sizeof(tmp));
    memcpy(x->x, y->x, sizeof(tmp));
    memcpy(y->x, tmp,  sizeof(tmp));
}
 
 
/* see quickselect method */
    struct kd_node_t*
find_median(struct kd_node_t *start, struct kd_node_t *end, int idx)
{
    if (end <= start) return NULL;
    if (end == start + 1)
        return start;
 
    struct kd_node_t *p, *store, *md = start + (end - start) / 2;
    double pivot;
    while (1) {
        pivot = md->x[idx];
 
        swap(md, end - 1);
        for (store = p = start; p < end; p++) {
            if (p->x[idx] < pivot) {
                if (p != store)
                    swap(p, store);
                store++;
            }
        }
        swap(store, end - 1);
 
        /* median has duplicate values */
        if (store->x[idx] == md->x[idx])
            return md;
 
        if (store > md) end = store;
        else        start = store;
    }
}
 
    struct kd_node_t*
make_tree(struct kd_node_t *t, int len, int i, int dim)
{
    struct kd_node_t *n;
 
    if (!len) return 0;
 
    if ((n = find_median(t, t + len, i))) {
        i = (i + 1) % dim;
        n->left  = make_tree(t, n - t, i, dim);
        n->right = make_tree(n + 1, t + len - (n + 1), i, dim);
    }
    return n;
}
 
/* global variable, so sue me */
int visited;
 
void nearest(struct kd_node_t *root, struct kd_node_t *nd, int i, int dim,
        struct kd_node_t **best, double *best_dist)
{
    double d, dx, dx2;
 
    if (!root) return;
    d = dist(root, nd, dim);
    dx = root->x[i] - nd->x[i];
    dx2 = dx * dx;
 
    visited ++;
 
    if (!*best || d < *best_dist) {
        *best_dist = d;
        *best = root;
    }
 
    /* if chance of exact match is high */
    if (!*best_dist) return;
 
    if (++i >= dim) i = 0;
 
    nearest(dx > 0 ? root->left : root->right, nd, i, dim, best, best_dist);
    if (dx2 >= *best_dist) return;
    nearest(dx > 0 ? root->right : root->left, nd, i, dim, best, best_dist);
}
 
#define N 1000000
#define rand1() (rand() / (double)RAND_MAX)
#define rand_pt(v) { v.x[0] = rand1(); v.x[1] = rand1(); v.x[2] = rand1(); }
int main(void)
{
    int i;
    struct kd_node_t wp[] = {
        {{2, 3}}, {{5, 4}}, {{9, 6}}, {{4, 7}}, {{8, 1}}, {{7, 2}}
    };
    struct kd_node_t testNode = {{9, 2}};
    struct kd_node_t *root, *found, *million;
    double best_dist;
 
    root = make_tree(wp, sizeof(wp) / sizeof(wp[1]), 0, 2);
 
    visited = 0;
    found = 0;
    nearest(root, &testNode, 0, 2, &found, &best_dist);
 
    printf(">> WP tree\nsearching for (%g, %g)\n"
            "found (%g, %g) dist %g\nseen %d nodes\n\n",
            testNode.x[0], testNode.x[1],
            found->x[0], found->x[1], sqrt(best_dist), visited);
 
    million =(struct kd_node_t*) calloc(N, sizeof(struct kd_node_t));
    srand(time(0));
    for (i = 0; i < N; i++) rand_pt(million[i]);
 
    root = make_tree(million, N, 0, 3);
    rand_pt(testNode);
 
    visited = 0;
    found = 0;
    nearest(root, &testNode, 0, 3, &found, &best_dist);
 
    printf(">> Million tree\nsearching for (%g, %g, %g)\n"
            "found (%g, %g, %g) dist %g\nseen %d nodes\n",
            testNode.x[0], testNode.x[1], testNode.x[2],
            found->x[0], found->x[1], found->x[2],
            sqrt(best_dist), visited);
 
    /* search many random points in million tree to see average behavior.
       tree size vs avg nodes visited:
       10      ~  7
       100     ~ 16.5
       1000        ~ 25.5
       10000       ~ 32.8
       100000      ~ 38.3
       1000000     ~ 42.6
       10000000    ~ 46.7              */
    int sum = 0, test_runs = 100000;
    for (i = 0; i < test_runs; i++) {
        found = 0;
        visited = 0;
        rand_pt(testNode);
        nearest(root, &testNode, 0, 3, &found, &best_dist);
        sum += visited;
    }
    printf("\n>> Million tree\n"
            "visited %d nodes for %d random findings (%f per lookup)\n",
            sum, test_runs, sum/(double)test_runs);
 
    // free(million);
 
    return 0;
}

저작자표시 (새창열림)

'프로그래밍 > 알고리즘' 카테고리의 다른 글

[알고리즘] 깊이 우선 탐색(DFS, Depth-First Search) (0)	2019.05.23
[알고리즘] 해시 테이블 (Hash Table) (0)	2019.04.30
[알고리즘] B 트리 (0)	2019.04.29
[알고리즘] 레드-블랙트리(Red-BlackTree) -Insert (0)	2019.04.08
[알고리즘] 이진탐색트리(BST) (0)	2019.04.07

ABOUT ME

Joon's Blog Joon's Blog

개념

삭제

KDB 트리

코드

'프로그래밍 > 알고리즘' 카테고리의 다른 글

티스토리툴바

ABOUT ME

개념

삭제

KDB 트리

코드

'프로그래밍 > 알고리즘' 카테고리의 다른 글

관련글 관련글 더보기

티스토리툴바