菜鸟笔记
提升您的技术认知

Linux内核中的radix tree

基数树

内核中的基树的节点,使用struct radix_tree_node来表示,其源代码如下:

struct radix_tree_node {
    unsigned int    height;     /* Height from the bottom */
    unsigned int    count;      /* 子节点的个数 */
    union {
        struct radix_tree_node *parent; /* Used when ascending tree */
        struct rcu_head rcu_head;   /* Used when freeing node */
    };
    void __rcu  *slots[RADIX_TREE_MAP_SIZE];
    unsigned long   tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
};

/* root tags are stored in gfp_mask, shifted by __GFP_BITS_SHIFT */
struct radix_tree_root {
    unsigned int        height;
    gfp_t           gfp_mask;
    struct radix_tree_node  __rcu *rnode;
};

slots是指向各个孩子节点的指针,RADIX_TREE_MAP_SIZE通常为64(跟宏定义有关,我们以64为例来说明)。

tags顾名思义是标签,代表此节点的所有孩子节点的标签。tags是二维数组,RADIX_TREE_MAX_TAGS定义为3,即最多支持3种标签。RADIX_TREE_TAG_LONGS的长度使得可以放下所有子节点的tag(一个tag占1位)。

一个典型的基树如下图所示:

不得不说,本来简单的数据结构,加上了RCU的机制后,总是有点难以理解。

初始化

跟Linux内核中其它的数据结构类似,两种初始化方式:

#define RADIX_TREE(name, mask) \
    struct radix_tree_root name = RADIX_TREE_INIT(mask)

#define INIT_RADIX_TREE(root, mask)                 \
do {                                    \
    (root)->height = 0;                     \
    (root)->gfp_mask = (mask);                  \
    (root)->rnode = NULL;                       \
} while (0)

查找

/*
 * is_slot == 1 : search for the slot.
 * is_slot == 0 : search for the node.
 */
static void *radix_tree_lookup_element(struct radix_tree_root *root,
                unsigned long index, int is_slot)
{
    unsigned int height, shift;
    struct radix_tree_node *node, **slot;

    node = rcu_dereference_raw(root->rnode);
    if (node == NULL)
        return NULL;

    if (!radix_tree_is_indirect_ptr(node)) {
        if (index > 0)
            return NULL;
        return is_slot ? (void *)&root->rnode : node;
    }
    node = indirect_to_ptr(node);

    height = node->height;
    if (index > radix_tree_maxindex(height))
        return NULL;

    shift = (height-1) * RADIX_TREE_MAP_SHIFT;

    do {
        slot = (struct radix_tree_node **)
            (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK));
        node = rcu_dereference_raw(*slot);
        if (node == NULL)
            return NULL;

        shift -= RADIX_TREE_MAP_SHIFT;
        height--;
    } while (height > 0);

    return is_slot ? (void *)slot : indirect_to_ptr(node);
}

/**
 *  radix_tree_lookup    -    perform lookup operation on a radix tree
 *  @root:      radix tree root
 *  @index:     index key
 *
 *  Lookup the item at the position @index in the radix tree @root.
 *
 *  This function can be called under rcu_read_lock, however the caller
 *  must manage lifetimes of leaf nodes (eg. RCU may also be used to free
 *  them safely). No RCU barriers are required to access or modify the
 *  returned item, however.
 */
void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index)
{
    return radix_tree_lookup_element(root, index, 0);
}

查找的过程,就是把index从高位开始,每6位(以64位机器为例)为一个单位,分隔成一个个的slot index,然后从radix树的根往下搜索的过程,整体还是比较好理解的。暂时忽略indirect_to_prt,rcu_dereference_raw这几个函数,这不影响对整体流程的理解。

插入

/**
 *  radix_tree_insert    -    insert into a radix tree
 *  @root:      radix tree root
 *  @index:     index key
 *  @item:      item to insert
 *
 *  Insert an item into the radix tree at position @index.
 */
int radix_tree_insert(struct radix_tree_root *root,
            unsigned long index, void *item)
{
    struct radix_tree_node *node = NULL, *slot;
    unsigned int height, shift;
    int offset;
    int error;

    BUG_ON(radix_tree_is_indirect_ptr(item));

    /* Make sure the tree is high enough.  */
    if (index > radix_tree_maxindex(root->height)) {
        error = radix_tree_extend(root, index);
        if (error)
            return error;
    }

    slot = indirect_to_ptr(root->rnode);

    height = root->height;
    shift = (height-1) * RADIX_TREE_MAP_SHIFT;

    offset = 0;         /* uninitialised var warning */
    while (height > 0) {
        if (slot == NULL) {
            /* Have to add a child node.  */
            if (!(slot = radix_tree_node_alloc(root)))
                return -ENOMEM;
            slot->height = height;
            slot->parent = node;
            if (node) {
                rcu_assign_pointer(node->slots[offset], slot);
                node->count++;
            } else
                rcu_assign_pointer(root->rnode, ptr_to_indirect(slot));
        }

        /* Go a level down */
        offset = (index >> shift) & RADIX_TREE_MAP_MASK;
        node = slot;
        slot = node->slots[offset];
        shift -= RADIX_TREE_MAP_SHIFT;
        height--;
    }

    if (slot != NULL)
        return -EEXIST;

    if (node) {
        node->count++;
        rcu_assign_pointer(node->slots[offset], item);
        BUG_ON(tag_get(node, 0, offset));
        BUG_ON(tag_get(node, 1, offset));
    } else {
        rcu_assign_pointer(root->rnode, item);
        BUG_ON(root_tag_get(root, 0));
        BUG_ON(root_tag_get(root, 1));
    }

    return 0;
}

插入的过程跟查找的过程非常相似其实,就是沿着树从上到下地找,某个位置没有元素就创建元素,直到出错或者把元素放到指定的slot中。

Tag

内核的radix tree支持Tag功能,就是给某个元素打一个tag。这个tag会影响该元素到基树根上所有元素。这样通过查看根元素是否有这个tag,就能判断根元素下的子元素中是否存在这种tag的元素。另外内核的基树提供了迭代所有设置过tag的元素的方法:radix_tree_for_each_tagged。