From bd6ce1d77897ba181cd9b5e2b5a935b1d356e457 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Tue, 3 May 2022 01:29:08 +0800 Subject: [PATCH] int16 --- api/index.md | 30 ++- include/binary.h | 3 + include/types.h | 10 +- include/types/int16.h | 35 ++++ include/types/int8.h | 6 +- src/page.c | 39 +++- src/types.c | 61 +++++- src/types/int16.c | 420 ++++++++++++++++++++++++++++++++++++++++++ src/types/int8.c | 15 +- tests/CMakeLists.txt | 2 +- tests/types_test.c | 50 ++++- 11 files changed, 636 insertions(+), 35 deletions(-) create mode 100644 include/types/int16.h create mode 100644 src/types/int16.c diff --git a/api/index.md b/api/index.md index 61ca000..fe7acc3 100644 --- a/api/index.md +++ b/api/index.md @@ -10,7 +10,7 @@ │ ptr 000 │ ptr 001 │ ptr ... │ ptr 255 │ └─────────┴─────────┴─────────┴─────────┘ ``` -特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 +当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 ## int16 > 查找速度为 > - 无该表项:O(1) @@ -18,30 +18,38 @@ 由于总条目仅有65536条,因此使用位图索引+位图+顺序链表进行查找定位。 ### 位图与位图索引 -每一位代表一个槽位,为0表示当前为空,为1表示当前已有值,按顺序排列。 +每一位代表一个槽位,为0表示当前为空,为1表示当前已有值,按顺序排列,占用文件中的`65536/8/4096=2`页空间。 > 下面每格1字节 ``` ┌────────┬────────┬────────┬────────┐ -│00100011│00000000│11001010│11000110│ +│00100011│00000000│........│11000110│ └────────┴────────┴────────┴────────┘ ``` -每256位(32字节)为一组,生成8位(1字节)位图索引,插在该组最前。该值指示在这256个槽位中有多少个已被填充。特别地,如果256个槽位均被填满,索引也为`0`,因此还需要额外判断其对应位图是全空还是全满。只要有一处不为0而位图索引为0,即可判定这256个槽位全满。 +每256位(32字节)为一组,生成8位(1字节)位图索引,其数字值表示在这256个槽位中有多少个已被填充。总共有`256`组索引,作为第一级`index`,单独在一个块存放,并在开头添加3个分别指向2页索引起始和顺序链表起始的指针。特别地,如果256个槽位均被填满,索引也为`0`,因此还需要额外判断其对应位图是全空还是全满。只要有一处不为0而位图索引为0,即可判定这256个槽位全满。 > 下面每格1字节 ``` -┌────────┬────────┬────────┬────────┐ -│ 30 │ No.000 │ No.... │ No.255 │ -└────────┴────────┴────────┴────────┘ +┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐ +│ pointer of first index page start ( this pointer will never be zero ) │ +├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤ +│ pointer of second index page start ( this pointer will never be zero ) │ +├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤ +│ pointer of index chain start ( this pointer will never be zero ) │ +├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤ +│ 0000 │ 0045 │ 0100 │ 0065 │ 0000 │ .... │ 0033 │ 0000 │ +└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘ ``` ### 顺序链表 -根据位图和位图索引可以很方便地计算出当前值在顺序链表上的位置。顺序链表以256个uint64的位置指针为单位分配,当装满后分配一块新的空间,并将新链表开头的位置指针记录在旧链表开头。当没有下一个节点时,开头置0。特别地,当删除表项时,节点数有可能减少。此时并不归还多余节点所占空间,也不对节点头部指针做任何改动,而是将其保留以备后用。 +根据位图和位图索引可以很方便地计算出当前值在顺序链表上的位置。顺序链表以256个uint64的位置指针(半页)为单位分配,当装满后分配一块新的空间,并将新链表开头的位置指针记录在旧链表开头。当没有下一个节点时,开头置0。特别地,当删除表项时,节点数有可能减少。此时并不归还多余节点所占空间,也不对节点头部指针做任何改动,而是将其保留以备后用。 > 下面每格8字节 ``` ┌────────┬────────┬────────┬────────┐ │next ptr│ ptr000 │ ptr... │ ptr255 │ └────────┴────────┴────────┴────────┘ ``` -特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 +当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 ## int32/float +> 查找速度为O(logn) + 使用B+树建立索引,每个节点大小为`4096`字节,最多可有`n=341`个扇出,`340`个值;最少则有`170`个值(根节点不遵守最少值规则)。 > 下面每格4字节 ``` @@ -62,6 +70,8 @@ 4088 4096 ``` ## int64/double +> 查找速度为O(logn) + 使用B+树建立索引,每个节点大小为`4096`字节,最多可有`n=256`个扇出,`255`个值;最少则有`128`个值(根节点不遵守最少值规则)。 > 下面每格8字节 ``` @@ -82,4 +92,6 @@ 4088 4096 ``` ## string +> 查找速度为O(logn) + 先将其哈希为int64再按int64进行查找。冲突时根据string表项附带存储的[下一个哈希相同的数据项的指针(uint64)](/api/types.md#字符串)进行遍历。 diff --git a/include/binary.h b/include/binary.h index 8c14514..74bf3ff 100644 --- a/include/binary.h +++ b/include/binary.h @@ -100,4 +100,7 @@ #endif #endif +# define likely(x) __builtin_expect(!!(x), 1) +# define unlikely(x) __builtin_expect(!!(x), 0) + #endif \ No newline at end of file diff --git a/include/types.h b/include/types.h index 65b00c7..0508458 100644 --- a/include/types.h +++ b/include/types.h @@ -29,6 +29,13 @@ void* create_index(int fd, type_t t, void* buf); // 返回:索引头节点的指针 index void* load_index(int fd, type_t t, uint64_t ptr, void* buf); +// 移除 index 并释放空间 +int remove_index(int fd, type_t t, void* index); + +// 统计索引条数 +// 返回:索引条数 +uint64_t count_items(int fd, type_t t, void* index); + // 插入一条索引 int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr); @@ -37,6 +44,7 @@ int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr); uint64_t find_item_by_key(int fd, type_t t, void* index, key_t k); // 使用索引删除项 -int remove_item_by_key(int fd, type_t t, void* index, key_t k); +// 返回:ptr +uint64_t remove_item_by_key(int fd, type_t t, void* index, key_t k); #endif \ No newline at end of file diff --git a/include/types/int16.h b/include/types/int16.h new file mode 100644 index 0000000..b49dbc0 --- /dev/null +++ b/include/types/int16.h @@ -0,0 +1,35 @@ +#ifndef _TYPE_INT16_H_ +#define _TYPE_INT16_H_ + +#include +#include "../types.h" + +#define INT16_INDEX_SZ ( 8*3 + 256*8 ) +#define INT16_BITMAP_SZ ( 65536/8 ) +#define INT16_CHAIN_SZ ( (256+1)*8 ) + +// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290 +// &buf[0] ~ &buf[2081] is index, index = buf+10 +// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090 +// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194 +// 返回:index = buf+10 +void* create_int16_index(int fd, void* buf); + +// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290 +// &buf[0] ~ &buf[2081] is index, index = buf+10 +// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090 +// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194 +// 返回:index = buf+10 +void* load_int16_index(int fd, uint64_t ptr, void* buf); + +int remove_int16_index(int fd, void* index); + +uint64_t count_int16_items(int fd, void* index); + +int insert_int16_item(int fd, void* index, key_t k, uint64_t ptr); + +uint64_t find_item_by_int16_key(int fd, void* index, key_t k); + +uint64_t remove_item_by_int16_key(int fd, void* index, key_t k); + +#endif \ No newline at end of file diff --git a/include/types/int8.h b/include/types/int8.h index ca449ed..30cfc98 100644 --- a/include/types/int8.h +++ b/include/types/int8.h @@ -12,10 +12,14 @@ void* create_int8_index(int fd, void* buf); // len(buf) >= INT8_INDEX_SZ+10 void* load_int8_index(int fd, uint64_t ptr, void* buf); +int remove_int8_index(int fd, void* index); + +uint64_t count_int8_items(int fd, void* index); + int insert_int8_item(int fd, void* index, key_t k, uint64_t ptr); uint64_t find_item_by_int8_key(int fd, void* index, key_t k); -int remove_item_by_int8_key(int fd, void* index, key_t k); +uint64_t remove_item_by_int8_key(int fd, void* index, key_t k); #endif \ No newline at end of file diff --git a/src/page.c b/src/page.c index 8639c9a..41466eb 100644 --- a/src/page.c +++ b/src/page.c @@ -14,8 +14,13 @@ static const uint8_t nullpage[PAGESZ]; void* alloc_page(int fd, void* page) { uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0; uint8_t buf[8]; + if(page == NULL) return NULL; // 对于 page,只关心位于第一页 8~15 字节的 ptr of unused blk while(ptr) { + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return NULL; + } if(!(ptr%PAGESZ)) { // 找到符合要求的页 if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; if(read(fd, buf, 8) != 8) return NULL; @@ -46,7 +51,7 @@ void* alloc_page(int fd, void* page) { readle64(fd, ptr); } ptr = lseek(fd, 0, SEEK_END); - if(ptr < 0) return NULL; + if((int)ptr < 0) return NULL; if(ptr%PAGESZ) { // 文件没有页对齐 errno = ESPIPE; return NULL; @@ -57,6 +62,7 @@ void* alloc_page(int fd, void* page) { } void* get_page(int fd, uint64_t ptr, void* page) { + if(page == NULL) return NULL; if(ptr%PAGESZ) return NULL; if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; putle64(page, ptr); @@ -66,16 +72,21 @@ void* get_page(int fd, uint64_t ptr, void* page) { } int sync_page(int fd, void* page) { + if(page == NULL) return EOF; uint64_t ptr = le64(page-8); if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; return write(fd, page, PAGESZ) != PAGESZ; } int free_page(int fd, void* page) { + if(page == NULL) return EOF; uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0, page_ptr = le64(page-8); uint8_t buf[8]; while(ptr && ptr < page_ptr) { - if(prev_ptr == ptr) return EOF; + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整 lseek(fd, prev_prev_ptr, SEEK_SET); putle64(buf, ptr); @@ -110,11 +121,16 @@ void* alloc_block(int fd, uint16_t size, void* blk) { uint8_t buf[8]; uint16_t blksz; + if(blk == NULL) return NULL; if(size > PAGESZ) return NULL; // 对于 page,只关心位于第一页 8~15 字节的 ptr of unused blk while(ptr) { - if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; - if(read(fd, buf, 8) != 8) return NULL; + if(unlikely(lseek(fd, ptr, SEEK_SET) < 0)) return NULL; + if(unlikely(read(fd, buf, 8) != 8)) return NULL; + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return NULL; + } readle16(fd, blksz); if(blksz >= size) { // 找到符合要求的块 if(blksz - size > 10) { // 分裂块 @@ -153,7 +169,7 @@ void* alloc_block(int fd, uint16_t size, void* blk) { readle64(fd, ptr); } ptr = lseek(fd, 0, SEEK_END); - if(ptr < 0) return NULL; + if((int)ptr < 0) return NULL; if(ptr%PAGESZ) { // 文件没有页对齐 errno = ESPIPE; return NULL; @@ -173,6 +189,7 @@ void* alloc_block(int fd, uint16_t size, void* blk) { } void* get_block(int fd, uint16_t size, uint64_t ptr, void* blk) { + if(blk == NULL) return NULL; if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; putle64(blk, ptr); putle16(blk+8, size); @@ -182,6 +199,7 @@ void* get_block(int fd, uint16_t size, uint64_t ptr, void* blk) { } int sync_block(int fd, void* blk) { + if(blk == NULL) return EOF; uint64_t off = le64(blk-10); uint16_t size = le16(blk-2); if(size > PAGESZ) { @@ -193,11 +211,15 @@ int sync_block(int fd, void* blk) { } int free_block(int fd, void* blk) { + if(blk == NULL) return EOF; uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0, off = le64(blk-10); uint8_t buf[8]; uint16_t size = le16(blk-2), sz; while(ptr && ptr < off) { - if(prev_ptr == ptr) return EOF; + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整 lseek(fd, prev_prev_ptr, SEEK_SET); putle64(buf, ptr); @@ -247,7 +269,10 @@ int add_block(int fd, uint16_t size, uint64_t off) { uint8_t buf[8]; uint16_t sz; while(ptr && ptr < off) { - if(prev_ptr == ptr) return EOF; + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整 lseek(fd, prev_prev_ptr, SEEK_SET); putle64(buf, ptr); diff --git a/src/types.c b/src/types.c index 2729b77..cf9ff5d 100644 --- a/src/types.c +++ b/src/types.c @@ -1,16 +1,21 @@ #include "../include/types.h" #include "../include/types/int8.h" +#include "../include/types/int16.h" // ptr = init(fd) typedef void* (*_type_init_t)(int, void*); // ptr = load(fd, ptr) typedef void* (*_type_load_t)(int, uint64_t, void*); +// ret = rm(fd, index) +typedef int (*_type_remove_t)(int, void*); +// n = count(fd, index) +typedef uint64_t (*_type_count_t)(int, void*); // ret = insert_item(fd, index, k, ptr) typedef int (*_insert_item_t)(int, void*, key_t, uint64_t); // ptr = find_by_key(fd, index, k) typedef uint64_t (*_find_by_key_t)(int, void*, key_t); // ret = remove_by_key(fd, index, k) -typedef int (*_remove_by_key_t)(int, void*, key_t); +typedef uint64_t (*_remove_by_key_t)(int, void*, key_t); // Function not implemented static void* create_not_impl_index(int fd, void* buf) { @@ -24,6 +29,18 @@ static void* load_not_impl_index(int fd, uint64_t ptr, void* buf) { return 0; } +// Function not implemented +static int remove_not_impl_index(int fd, void* index) { + errno = ENOSYS; + return 0; +} + +// Function not implemented +static uint64_t count_not_impl_items(int fd, void* index) { + errno = ENOSYS; + return 0; +} + // Function not implemented static int insert_not_impl_item(int fd, void* index, key_t k, uint64_t ptr) { errno = ENOSYS; @@ -37,14 +54,14 @@ static uint64_t find_item_by_not_impl_key(int fd, void* index, key_t k) { } // Function not implemented -static int remove_item_by_not_impl_key(int fd, void* index, key_t k) { +static uint64_t remove_item_by_not_impl_key(int fd, void* index, key_t k) { errno = ENOSYS; return 0; } static _type_init_t _types_init[] = { create_int8_index, - create_not_impl_index, + create_int16_index, create_not_impl_index, create_not_impl_index, create_not_impl_index, @@ -54,7 +71,7 @@ static _type_init_t _types_init[] = { static _type_load_t _types_load[] = { load_int8_index, - load_not_impl_index, + load_int16_index, load_not_impl_index, load_not_impl_index, load_not_impl_index, @@ -62,9 +79,29 @@ static _type_load_t _types_load[] = { load_not_impl_index }; +static _type_remove_t _types_remove[] = { + remove_int8_index, + remove_int16_index, + remove_not_impl_index, + remove_not_impl_index, + remove_not_impl_index, + remove_not_impl_index, + remove_not_impl_index +}; + +static _type_count_t _types_count[] = { + count_int8_items, + count_int16_items, + count_not_impl_items, + count_not_impl_items, + count_not_impl_items, + count_not_impl_items, + count_not_impl_items +}; + static _insert_item_t _insert_item[] = { insert_int8_item, - insert_not_impl_item, + insert_int16_item, insert_not_impl_item, insert_not_impl_item, insert_not_impl_item, @@ -74,7 +111,7 @@ static _insert_item_t _insert_item[] = { static _find_by_key_t _find_item_by_key[] = { find_item_by_int8_key, - find_item_by_not_impl_key, + find_item_by_int16_key, find_item_by_not_impl_key, find_item_by_not_impl_key, find_item_by_not_impl_key, @@ -84,7 +121,7 @@ static _find_by_key_t _find_item_by_key[] = { static _remove_by_key_t _remove_item_by_key[] = { remove_item_by_int8_key, - remove_item_by_not_impl_key, + remove_item_by_int16_key, remove_item_by_not_impl_key, remove_item_by_not_impl_key, remove_item_by_not_impl_key, @@ -100,6 +137,14 @@ void* load_index(int fd, type_t t, uint64_t ptr, void* buf) { return _types_load[t&7](fd, ptr, buf); } +int remove_index(int fd, type_t t, void* index) { + return _types_remove[t&7](fd, index); +} + +uint64_t count_items(int fd, type_t t, void* index) { + return _types_count[t&7](fd, index); +} + int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr) { return _insert_item[t&7](fd, index, k, ptr); } @@ -108,6 +153,6 @@ uint64_t find_item_by_key(int fd, type_t t, void* index, key_t k) { return _find_item_by_key[t&7](fd, index, k); } -int remove_item_by_key(int fd, type_t t, void* index, key_t k) { +uint64_t remove_item_by_key(int fd, type_t t, void* index, key_t k) { return _remove_item_by_key[t&7](fd, index, k); } diff --git a/src/types/int16.c b/src/types/int16.c new file mode 100644 index 0000000..8a9b6e2 --- /dev/null +++ b/src/types/int16.c @@ -0,0 +1,420 @@ +#include +#include +#include +#include +#include +#include "../../include/binary.h" +#include "../../include/page.h" +#include "../../include/types/int16.h" + +// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290 +// &buf[0] ~ &buf[2081] is index, index = buf+10 +// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090 +// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194 +// 返回:index = buf+10 +void* create_int16_index(int fd, void* buf) { + buf = alloc_block(fd, INT16_INDEX_SZ, buf); + if(buf == NULL) return NULL; + memset(buf, 0, INT16_INDEX_SZ); + + void* page = alloc_page(fd, buf+INT16_INDEX_SZ); + if(page == NULL) { + free_block(fd, buf); + return NULL; + } + memset(page, 0, PAGESZ); + sync_page(fd, page); + + void* page2 = alloc_page(fd, page+PAGESZ); + if(page2 == NULL) { + free_block(fd, buf); + free_page(fd, page); + return NULL; + } + memset(page2, 0, PAGESZ); + sync_page(fd, page2); + + putle64(buf, le64(page-8)); + putle64(buf+8, le64(page2-8)); + sync_block(fd, buf); + return buf; +} + +// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290 +// &buf[0] ~ &buf[2081] is index, index = buf+10 +// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090 +// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194 +// 返回:index = buf+10 +void* load_int16_index(int fd, uint64_t ptr, void* buf) { + buf = get_block(fd, INT16_INDEX_SZ, ptr, buf); + if(get_page(fd, le64(buf+8), get_page(fd, le64(buf), buf+INT16_INDEX_SZ)+PAGESZ) == NULL) return NULL; + return buf; +} + +int remove_int16_index(int fd, void* index) { + uint64_t ptr = le64(index+16); // 链表头 + while(ptr) { + uint64_t tmp; + if(unlikely(lseek(fd, ptr, SEEK_SET) < 0)) return EOF; + readle64(fd, tmp); + add_block(fd, INT16_CHAIN_SZ, ptr); + ptr = tmp; + } + if(free_page(fd, index+INT16_INDEX_SZ+8)) return 2; // 第一页位图 + if(free_page(fd, index+INT16_INDEX_SZ+8+PAGESZ+8)) return 3; // 第二页位图 + return free_block(fd, index); // 位图索引 +} + +uint64_t count_int16_items(int fd, void* index) { + int total = 0; + // 计算总的条目数 + for(int i = 0; i < 128; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + total += 256; + continue; + } + total += s; + } + for(int i = 128; i < 256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) { + total += 256; + continue; + } + total += s; + } + return total; +} + +int insert_int16_item(int fd, void* index, key_t k, uint64_t ptr) { + int isexist, sum = 0, total = count_int16_items(fd, index); + uint16_t key = (uint16_t)k; + char buf[8]; + char tmp[10+INT16_CHAIN_SZ]; + putle64(buf, ptr); + + #ifdef DEBUG + printf("No.%u: ", (int)key); + #endif + + if(key < 32768) { + // key是否已存在 + isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8)); + // 查找 key 之前共有多少索引 + for(int i = key/256*32; i < key/8; i++) { // 从未计算的32位组开始算起 + sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[i]); + } + sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8))); + #ifdef DEBUG + printf("popc: %d, ", sum); + #endif + for(int i = 0; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + #ifdef DEBUG + printf("sum: %d, totl: %d, ", sum, total); + #endif + } else { + // key是否已存在 + isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8)); + // 查找 key 之前共有多少索引 + for(int i = key/256*32; i < key/8; i++) { // 从未计算的32位组开始算起 + sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i]); + } + sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8))); + #ifdef DEBUG + printf("popc: %d, ", sum); + #endif + for(int i = 0; i < 128; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + for(int i = 128; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + #ifdef DEBUG + printf("sum: %d, totl: %d, ", sum, total); + #endif + } + + if(!isexist) { + // 写入位图 + if(key<32768) { + ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] |= 128>>(key%8); + if(unlikely(sync_page(fd, index+INT16_INDEX_SZ+8))) { // 失败,撤销更改 + ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] &= ~(128>>(key%8)); + return EOF; + } + } else { + ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] |= 128>>(key%8); + if(unlikely(sync_page(fd, index+INT16_INDEX_SZ+8+PAGESZ+8))) { // 失败,撤销更改 + ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] &= ~(128>>(key%8)); + return EOF; + } + } + ((uint8_t*)(index+24))[key/256]++; // 写入位图索引 + if(unlikely(sync_block(fd, index))) { // 失败,撤销更改 + ((uint8_t*)(index+24))[key/256]--; + return EOF; + } + #ifdef DEBUG + printf("i: %d, sumblk: %d, map: %02x, ", key/256, ((uint8_t*)(index+24))[key/256], ((uint8_t*)(index+INT16_INDEX_SZ+8+((key<32768)?0:8)))[key/8]); + #endif + } + + if(unlikely(!le64(index+16))) { // 插入的是本索引的第一个值 + if(alloc_block(fd, INT16_CHAIN_SZ, tmp) == NULL) return EOF; + memcpy(index+16, tmp, 8); // 记录第一个链表的指针 + sync_block(fd, index); // 同步索引到文件 + memset(tmp+10, 0, INT16_CHAIN_SZ); // 清空 + memcpy(tmp+10+8, buf, 8); // 写入 item 位置 + return sync_block(fd, tmp+10); // 同步链表到文件 + } + + if(isexist) { // 索引已存在,仅替换指针 + ptr = le64(index+16); + for(int i = 0; i < sum/256; i++) { + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + if(unlikely(!ptr)) { + errno = ESPIPE; + return EOF; + } + } + lseek(fd, 8*(sum%256+1), SEEK_CUR); + #ifdef DEBUG + puts("replace"); + #endif + return write(fd, buf, 8) != 8; + } + + // 索引不存在,需要搬移,统一向后移一个指针 + uint64_t prev_ptr, first_ptr = 1; + ptr = le64(index+16); + if(total%256 == 255) { // 旧链表刚好装满,需要新分配一个 + while(ptr && first_ptr) { // 遍历到末尾 + prev_ptr = ptr; + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + readle64(fd, first_ptr); + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } + } + if(first_ptr) { // 需要分配 + if(alloc_block(fd, INT16_CHAIN_SZ, tmp) == NULL) return EOF; + lseek(fd, prev_ptr, SEEK_SET); + write(fd, tmp, 8); // 将新分配的块附加到链表 + prev_ptr = ptr; + ptr = le64(tmp); + memset(tmp+10, 0, INT16_CHAIN_SZ); // 清空新链表 + sync_block(fd, tmp+10); + } + // 存在之前分配好的,但是由于删除索引而弃用的块,因此无需新分配 + } + if(sum == total) { // 恰好在最后添加 + while(ptr && first_ptr) { // 遍历到末尾 + prev_ptr = ptr; + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + readle64(fd, first_ptr); + if(unlikely(ptr == prev_ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } + } + // 定位到最后一个未满块或第一个空块上的最后 + lseek(fd, prev_ptr+8*(sum%256+1), SEEK_SET); + #ifdef DEBUG + puts("append"); + #endif + return write(fd, buf, 8) != 8; // 写入 + } + // 定位回链表头 + lseek(fd, le64(index+16), SEEK_SET); + // 跳转到应当存入的块 + for(int i = 0; i < sum/256; i++) { + readle64(fd, ptr); + if(unlikely(!ptr)) { + errno = ESPIPE; + return EOF; // 不应当出现,如果出现说明文件损坏 + } + lseek(fd, ptr, SEEK_SET); + } + // 搬移 + int offset = sum%256; // 搬移开始的位置,也是应当存入的位置 + readle64(fd, ptr); // 下一个块指针 + #ifdef DEBUG + printf("off: %d, ", offset); + #endif + if(offset) { // 具有偏移,先定位到偏移 + lseek(fd, offset*8, SEEK_CUR); + } + readle64(fd, prev_ptr); // 读取第一个 item 指针 + #ifdef DEBUG + printf("first item: %llu\n", prev_ptr); + #endif + lseek(fd, -8, SEEK_CUR); // 返回 + write(fd, buf, 8); // 插入 + while(prev_ptr) { // 一直搬移到末尾 + if(unlikely(offset && !(offset++%256))) { // 进入新的块 + lseek(fd, ptr, SEEK_SET); + readle64(fd, first_ptr); // 下一个块指针 + if(unlikely(first_ptr == ptr)) { // 文件损坏 + errno = ESPIPE; + return EOF; + } + ptr = first_ptr; + } + putle64(buf, prev_ptr); // 以备写入 + readle64(fd, prev_ptr); // 读取下一个 item 指针 + lseek(fd, -8, SEEK_CUR); // 返回 + write(fd, buf, 8); // 搬移一个指针 + } + return 0; +} + +uint64_t find_item_by_int16_key(int fd, void* index, key_t k) { + uint64_t ptr; + uint16_t key = (uint16_t)k; + uint16_t sum; + if(key < 32768) { + int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8)); + if(!isexist) return 0; + sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8))); + for(int i = 0; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + } else { + int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8)); + if(!isexist) return 0; + sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8))); + for(int i = 0; i < 128; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + for(int i = 128; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + } + ptr = le64(index+16); + for(int i = 0; i < sum/256 && ptr; i++) { + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + } + if(!ptr) return EOF; + ptr += 8*(sum%256+1); + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + return ptr; +} + +uint64_t remove_item_by_int16_key(int fd, void* index, key_t k) { + uint64_t ptr; + uint16_t key = (uint16_t)k; + uint16_t sum; + char buf[8]; + if(key < 32768) { + int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8)); + if(!isexist) return 0; + sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8))); + for(int i = 0; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + } else { + int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8)); + if(!isexist) return 0; + sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8))); + for(int i = 0; i < 128; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + for(int i = 128; i < key/256; i++) { + int s = ((uint8_t*)(index+24))[i]; + if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) { + sum += 256; + continue; + } + sum += s; + } + } + ptr = le64(index+16); + for(int i = 0; i < sum/256 && ptr; i++) { + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, ptr); + } + if(!ptr) return EOF; + + int offset = sum%256; + uint64_t cur_ptr = ptr+8*(offset+1), next_ptr, first_ptr = 0; + if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; + readle64(fd, next_ptr); + lseek(fd, cur_ptr, SEEK_SET); // 回到开头 + readle64(fd, ptr); // 返回值 + cur_ptr += 8; + + do { // 一直循环到末尾 + if(unlikely(offset++%256 == 255)) { // 当前位于末尾,需要从下一页取值 + // 换下一页 + if(lseek(fd, next_ptr+8, SEEK_SET) < 0) return EOF; + readle64(fd, first_ptr); // 读后一个值 + // 回上一页 + lseek(fd, cur_ptr, SEEK_SET); + putle64(buf, first_ptr); + write(fd, buf, 8); // 覆盖 + // 换下一页 + cur_ptr = lseek(fd, next_ptr, SEEK_SET)+8; + readle64(fd, next_ptr); + lseek(fd, 8, SEEK_CUR); + continue; + } + // 当前后方至少还有一个指针,可以直接取值 + readle64(fd, first_ptr); // 读下一个值 + lseek(fd, -16, SEEK_CUR); // 回原处 + putle64(buf, first_ptr); + write(fd, buf, 8); // 覆盖 + cur_ptr += 8; + lseek(fd, 8, SEEK_CUR); + } while(first_ptr); + + return ptr; +} diff --git a/src/types/int8.c b/src/types/int8.c index b770676..9b015b9 100644 --- a/src/types/int8.c +++ b/src/types/int8.c @@ -16,6 +16,16 @@ void* load_int8_index(int fd, uint64_t ptr, void* buf) { return get_block(fd, INT8_INDEX_SZ, ptr, buf); } +int remove_int8_index(int fd, void* index) { + return free_block(fd, index); +} + +uint64_t count_int8_items(int fd, void* index) { + uint64_t cnt = 0; + for(int i = 0; i < 256; i++) cnt += !!(((uint64_t*)index)[i]); + return cnt; +} + int insert_int8_item(int fd, void* index, key_t k, uint64_t ptr) { uint8_t key = (uint8_t)k; ((uint64_t*)index)[key] = ptr; @@ -27,8 +37,9 @@ uint64_t find_item_by_int8_key(int fd, void* index, key_t k) { return ((uint64_t*)index)[key]; } -int remove_item_by_int8_key(int fd, void* index, key_t k) { +uint64_t remove_item_by_int8_key(int fd, void* index, key_t k) { uint8_t key = (uint8_t)k; + uint64_t ptr = ((uint64_t*)index)[key]; ((uint64_t*)index)[key] = 0; - return 0; + return ptr; } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 1129b8d..185c054 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -3,7 +3,7 @@ project(fumidb_test VERSION 1.0) add_executable(binary_test binary_test.c) add_executable(page_test page_test.c ../src/page.c ../src/file.c) -add_executable(types_test types_test.c ../src/types.c ../src/types/int8.c ../src/page.c ../src/file.c) +add_executable(types_test types_test.c ../src/types.c ../src/types/int8.c ../src/types/int16.c ../src/page.c ../src/file.c) add_test(test_binary binary_test COMMAND binary_test) add_test(test_page page_test COMMAND page_test) diff --git a/tests/types_test.c b/tests/types_test.c index 52b8f39..1cd90fd 100644 --- a/tests/types_test.c +++ b/tests/types_test.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -8,7 +8,7 @@ #include "../include/types.h" #include "../include/types/int8.h" -uint8_t int8buf[INT8_INDEX_SZ+10]; +uint8_t buf[10290]; int main() { /* test int8 */ @@ -18,9 +18,9 @@ int main() { return 1; } if(init_file_header_page(fd) < 0) return 2; - void* index = create_index(fd, TYPE_INT8, int8buf); + void* index = create_index(fd, TYPE_INT8, buf); if(!index) { - perror("create_index"); + perror("create_int8_index"); return 3; } if(le64(index-10) != HEADERSZ) { @@ -36,6 +36,7 @@ int main() { insert_item(fd, TYPE_INT8, index, 45, 345743415); insert_item(fd, TYPE_INT8, index, 67, 56787145); insert_item(fd, TYPE_INT8, index, 123, 123567854424); + if(count_items(fd, TYPE_INT8, index) != 5) return 6; if(find_item_by_key(fd, TYPE_INT8, index, 1) != 3456432) return 6; if(find_item_by_key(fd, TYPE_INT8, index, 3) != 7654323456) return 7; if(find_item_by_key(fd, TYPE_INT8, index, 45) != 345743415) return 8; @@ -45,8 +46,8 @@ int main() { index = NULL; close(fd); fd = open("types_test_tmp.bin", O_RDWR, 0644); - memset(int8buf, 0, INT8_INDEX_SZ+10); - index = load_index(fd, TYPE_INT8, HEADERSZ, int8buf); + memset(buf, 0, INT8_INDEX_SZ+10); + index = load_index(fd, TYPE_INT8, HEADERSZ, buf); if(find_item_by_key(fd, TYPE_INT8, index, 1) != 3456432) return 6; if(find_item_by_key(fd, TYPE_INT8, index, 3) != 7654323456) return 7; if(find_item_by_key(fd, TYPE_INT8, index, 45) != 345743415) return 8; @@ -55,7 +56,44 @@ int main() { if(find_item_by_key(fd, TYPE_INT8, index, 255) != 0) return 11; remove_item_by_key(fd, TYPE_INT8, index, 123); if(find_item_by_key(fd, TYPE_INT8, index, 123) != 0) return 12; + if(count_items(fd, TYPE_INT8, index) != 4) return 13; + if(remove_index(fd, TYPE_INT8, index)) return 14; + index = create_index(fd, TYPE_INT8, buf); + if(!index) { + perror("create_int8_index"); + return 3; + } + if(count_items(fd, TYPE_INT8, index) != 0) return 15; close(fd); /* end test int8 */ + + /* test int16 */ + fd = open("types_test_tmp.bin", O_RDWR | O_CREAT | O_TRUNC, 0644); + if(fd < 0) { + perror("create"); + return 1; + } + if(init_file_header_page(fd) < 0) return 2; + index = create_index(fd, TYPE_INT16, buf); + if(!index) { + perror("create_int16_index"); + return 3; + } + + for(int i = 57344, cnt = 0; i < 65536+4099; i++, cnt++) { + int n; + if((n=count_items(fd, TYPE_INT16, index)) != cnt) { + printf("%d != %d\n", cnt, n); + return 4; + } + if(insert_item(fd, TYPE_INT16, index, (key_t)i, i)) { + printf("%u ", (uint16_t)i); + fflush(stdout); + perror("insert_int16_item"); + return 4; + } + } + close(fd); + /* end test int16 */ // remove("types_test_tmp.bin"); }