1
0
mirror of https://github.com/fumiama/fumidb.git synced 2026-06-05 08:40:30 +08:00
This commit is contained in:
源文雨
2022-05-03 01:29:08 +08:00
parent e24f761f34
commit bd6ce1d778
11 changed files with 636 additions and 35 deletions

View File

@@ -10,7 +10,7 @@
│ ptr 000 │ ptr 001 │ ptr ... │ ptr 255 │
└─────────┴─────────┴─────────┴─────────┘
```
特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。
当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。
## int16
> 查找速度为
> - 无该表项O(1)
@@ -18,30 +18,38 @@
由于总条目仅有65536条因此使用位图索引+位图+顺序链表进行查找定位。
### 位图与位图索引
每一位代表一个槽位为0表示当前为空为1表示当前已有值按顺序排列。
每一位代表一个槽位为0表示当前为空为1表示当前已有值按顺序排列,占用文件中的`65536/8/4096=2`页空间
> 下面每格1字节
```
┌────────┬────────┬────────┬────────┐
│00100011│00000000│11001010│11000110│
│00100011│00000000│........│11000110│
└────────┴────────┴────────┴────────┘
```
每256位32字节为一组生成8位1字节位图索引插在该组最前。该值指示在这256个槽位中有多少个已被填充。特别地如果256个槽位均被填满索引也为`0`因此还需要额外判断其对应位图是全空还是全满。只要有一处不为0而位图索引为0即可判定这256个槽位全满。
每256位32字节为一组生成8位1字节位图索引其数字值表示在这256个槽位中有多少个已被填充。总共有`256`组索引,作为第一级`index`单独在一个块存放并在开头添加3个分别指向2页索引起始和顺序链表起始的指针。特别地如果256个槽位均被填满索引也为`0`因此还需要额外判断其对应位图是全空还是全满。只要有一处不为0而位图索引为0即可判定这256个槽位全满。
> 下面每格1字节
```
┌────────────────┬────────────────┐
30 │ No.000 │ No.... │ No.255
────────────────┴────────┴────────
┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐
pointer of first index page start ( this pointer will never be zero )
├──────────┼──────────┼──────────┼────────────────────┼──────────┼──────────┼──────────┤
│ pointer of second index page start ( this pointer will never be zero ) │
├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
│ pointer of index chain start ( this pointer will never be zero ) │
├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤
│ 0000 │ 0045 │ 0100 │ 0065 │ 0000 │ .... │ 0033 │ 0000 │
└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘
```
### 顺序链表
根据位图和位图索引可以很方便地计算出当前值在顺序链表上的位置。顺序链表以256个uint64的位置指针为单位分配当装满后分配一块新的空间并将新链表开头的位置指针记录在旧链表开头。当没有下一个节点时开头置0。特别地当删除表项时节点数有可能减少。此时并不归还多余节点所占空间也不对节点头部指针做任何改动而是将其保留以备后用。
根据位图和位图索引可以很方便地计算出当前值在顺序链表上的位置。顺序链表以256个uint64的位置指针(半页)为单位分配当装满后分配一块新的空间并将新链表开头的位置指针记录在旧链表开头。当没有下一个节点时开头置0。特别地当删除表项时节点数有可能减少。此时并不归还多余节点所占空间也不对节点头部指针做任何改动而是将其保留以备后用。
> 下面每格8字节
```
┌────────┬────────┬────────┬────────┐
│next ptr│ ptr000 │ ptr... │ ptr255 │
└────────┴────────┴────────┴────────┘
```
特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。
当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。
## int32/float
> 查找速度为O(logn)
使用B+树建立索引,每个节点大小为`4096`字节,最多可有`n=341`个扇出,`340`个值;最少则有`170`个值(根节点不遵守最少值规则)。
> 下面每格4字节
```
@@ -62,6 +70,8 @@
4088 4096
```
## int64/double
> 查找速度为O(logn)
使用B+树建立索引,每个节点大小为`4096`字节,最多可有`n=256`个扇出,`255`个值;最少则有`128`个值(根节点不遵守最少值规则)。
> 下面每格8字节
```
@@ -82,4 +92,6 @@
4088 4096
```
## string
> 查找速度为O(logn)
先将其哈希为int64再按int64进行查找。冲突时根据string表项附带存储的[下一个哈希相同的数据项的指针(uint64)](/api/types.md#字符串)进行遍历。

View File

@@ -100,4 +100,7 @@
#endif
#endif
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
#endif

View File

@@ -29,6 +29,13 @@ void* create_index(int fd, type_t t, void* buf);
// 返回:索引头节点的指针 index
void* load_index(int fd, type_t t, uint64_t ptr, void* buf);
// 移除 index 并释放空间
int remove_index(int fd, type_t t, void* index);
// 统计索引条数
// 返回:索引条数
uint64_t count_items(int fd, type_t t, void* index);
// 插入一条索引
int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr);
@@ -37,6 +44,7 @@ int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr);
uint64_t find_item_by_key(int fd, type_t t, void* index, key_t k);
// 使用索引删除项
int remove_item_by_key(int fd, type_t t, void* index, key_t k);
// 返回ptr
uint64_t remove_item_by_key(int fd, type_t t, void* index, key_t k);
#endif

35
include/types/int16.h Normal file
View File

@@ -0,0 +1,35 @@
#ifndef _TYPE_INT16_H_
#define _TYPE_INT16_H_
#include <stdint.h>
#include "../types.h"
#define INT16_INDEX_SZ ( 8*3 + 256*8 )
#define INT16_BITMAP_SZ ( 65536/8 )
#define INT16_CHAIN_SZ ( (256+1)*8 )
// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290
// &buf[0] ~ &buf[2081] is index, index = buf+10
// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090
// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194
// 返回index = buf+10
void* create_int16_index(int fd, void* buf);
// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290
// &buf[0] ~ &buf[2081] is index, index = buf+10
// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090
// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194
// 返回index = buf+10
void* load_int16_index(int fd, uint64_t ptr, void* buf);
int remove_int16_index(int fd, void* index);
uint64_t count_int16_items(int fd, void* index);
int insert_int16_item(int fd, void* index, key_t k, uint64_t ptr);
uint64_t find_item_by_int16_key(int fd, void* index, key_t k);
uint64_t remove_item_by_int16_key(int fd, void* index, key_t k);
#endif

View File

@@ -12,10 +12,14 @@ void* create_int8_index(int fd, void* buf);
// len(buf) >= INT8_INDEX_SZ+10
void* load_int8_index(int fd, uint64_t ptr, void* buf);
int remove_int8_index(int fd, void* index);
uint64_t count_int8_items(int fd, void* index);
int insert_int8_item(int fd, void* index, key_t k, uint64_t ptr);
uint64_t find_item_by_int8_key(int fd, void* index, key_t k);
int remove_item_by_int8_key(int fd, void* index, key_t k);
uint64_t remove_item_by_int8_key(int fd, void* index, key_t k);
#endif

View File

@@ -14,8 +14,13 @@ static const uint8_t nullpage[PAGESZ];
void* alloc_page(int fd, void* page) {
uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0;
uint8_t buf[8];
if(page == NULL) return NULL;
// 对于 page只关心位于第一页 8~15 字节的 ptr of unused blk
while(ptr) {
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return NULL;
}
if(!(ptr%PAGESZ)) { // 找到符合要求的页
if(lseek(fd, ptr, SEEK_SET) < 0) return NULL;
if(read(fd, buf, 8) != 8) return NULL;
@@ -46,7 +51,7 @@ void* alloc_page(int fd, void* page) {
readle64(fd, ptr);
}
ptr = lseek(fd, 0, SEEK_END);
if(ptr < 0) return NULL;
if((int)ptr < 0) return NULL;
if(ptr%PAGESZ) { // 文件没有页对齐
errno = ESPIPE;
return NULL;
@@ -57,6 +62,7 @@ void* alloc_page(int fd, void* page) {
}
void* get_page(int fd, uint64_t ptr, void* page) {
if(page == NULL) return NULL;
if(ptr%PAGESZ) return NULL;
if(lseek(fd, ptr, SEEK_SET) < 0) return NULL;
putle64(page, ptr);
@@ -66,16 +72,21 @@ void* get_page(int fd, uint64_t ptr, void* page) {
}
int sync_page(int fd, void* page) {
if(page == NULL) return EOF;
uint64_t ptr = le64(page-8);
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
return write(fd, page, PAGESZ) != PAGESZ;
}
int free_page(int fd, void* page) {
if(page == NULL) return EOF;
uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0, page_ptr = le64(page-8);
uint8_t buf[8];
while(ptr && ptr < page_ptr) {
if(prev_ptr == ptr) return EOF;
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整
lseek(fd, prev_prev_ptr, SEEK_SET);
putle64(buf, ptr);
@@ -110,11 +121,16 @@ void* alloc_block(int fd, uint16_t size, void* blk) {
uint8_t buf[8];
uint16_t blksz;
if(blk == NULL) return NULL;
if(size > PAGESZ) return NULL;
// 对于 page只关心位于第一页 8~15 字节的 ptr of unused blk
while(ptr) {
if(lseek(fd, ptr, SEEK_SET) < 0) return NULL;
if(read(fd, buf, 8) != 8) return NULL;
if(unlikely(lseek(fd, ptr, SEEK_SET) < 0)) return NULL;
if(unlikely(read(fd, buf, 8) != 8)) return NULL;
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return NULL;
}
readle16(fd, blksz);
if(blksz >= size) { // 找到符合要求的块
if(blksz - size > 10) { // 分裂块
@@ -153,7 +169,7 @@ void* alloc_block(int fd, uint16_t size, void* blk) {
readle64(fd, ptr);
}
ptr = lseek(fd, 0, SEEK_END);
if(ptr < 0) return NULL;
if((int)ptr < 0) return NULL;
if(ptr%PAGESZ) { // 文件没有页对齐
errno = ESPIPE;
return NULL;
@@ -173,6 +189,7 @@ void* alloc_block(int fd, uint16_t size, void* blk) {
}
void* get_block(int fd, uint16_t size, uint64_t ptr, void* blk) {
if(blk == NULL) return NULL;
if(lseek(fd, ptr, SEEK_SET) < 0) return NULL;
putle64(blk, ptr);
putle16(blk+8, size);
@@ -182,6 +199,7 @@ void* get_block(int fd, uint16_t size, uint64_t ptr, void* blk) {
}
int sync_block(int fd, void* blk) {
if(blk == NULL) return EOF;
uint64_t off = le64(blk-10);
uint16_t size = le16(blk-2);
if(size > PAGESZ) {
@@ -193,11 +211,15 @@ int sync_block(int fd, void* blk) {
}
int free_block(int fd, void* blk) {
if(blk == NULL) return EOF;
uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0, off = le64(blk-10);
uint8_t buf[8];
uint16_t size = le16(blk-2), sz;
while(ptr && ptr < off) {
if(prev_ptr == ptr) return EOF;
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整
lseek(fd, prev_prev_ptr, SEEK_SET);
putle64(buf, ptr);
@@ -247,7 +269,10 @@ int add_block(int fd, uint16_t size, uint64_t off) {
uint8_t buf[8];
uint16_t sz;
while(ptr && ptr < off) {
if(prev_ptr == ptr) return EOF;
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
if(prev_prev_ptr && ptr < prev_ptr) { // 不符合顺序,进行一次调整
lseek(fd, prev_prev_ptr, SEEK_SET);
putle64(buf, ptr);

View File

@@ -1,16 +1,21 @@
#include "../include/types.h"
#include "../include/types/int8.h"
#include "../include/types/int16.h"
// ptr = init(fd)
typedef void* (*_type_init_t)(int, void*);
// ptr = load(fd, ptr)
typedef void* (*_type_load_t)(int, uint64_t, void*);
// ret = rm(fd, index)
typedef int (*_type_remove_t)(int, void*);
// n = count(fd, index)
typedef uint64_t (*_type_count_t)(int, void*);
// ret = insert_item(fd, index, k, ptr)
typedef int (*_insert_item_t)(int, void*, key_t, uint64_t);
// ptr = find_by_key(fd, index, k)
typedef uint64_t (*_find_by_key_t)(int, void*, key_t);
// ret = remove_by_key(fd, index, k)
typedef int (*_remove_by_key_t)(int, void*, key_t);
typedef uint64_t (*_remove_by_key_t)(int, void*, key_t);
// Function not implemented
static void* create_not_impl_index(int fd, void* buf) {
@@ -24,6 +29,18 @@ static void* load_not_impl_index(int fd, uint64_t ptr, void* buf) {
return 0;
}
// Function not implemented
static int remove_not_impl_index(int fd, void* index) {
errno = ENOSYS;
return 0;
}
// Function not implemented
static uint64_t count_not_impl_items(int fd, void* index) {
errno = ENOSYS;
return 0;
}
// Function not implemented
static int insert_not_impl_item(int fd, void* index, key_t k, uint64_t ptr) {
errno = ENOSYS;
@@ -37,14 +54,14 @@ static uint64_t find_item_by_not_impl_key(int fd, void* index, key_t k) {
}
// Function not implemented
static int remove_item_by_not_impl_key(int fd, void* index, key_t k) {
static uint64_t remove_item_by_not_impl_key(int fd, void* index, key_t k) {
errno = ENOSYS;
return 0;
}
static _type_init_t _types_init[] = {
create_int8_index,
create_not_impl_index,
create_int16_index,
create_not_impl_index,
create_not_impl_index,
create_not_impl_index,
@@ -54,7 +71,7 @@ static _type_init_t _types_init[] = {
static _type_load_t _types_load[] = {
load_int8_index,
load_not_impl_index,
load_int16_index,
load_not_impl_index,
load_not_impl_index,
load_not_impl_index,
@@ -62,9 +79,29 @@ static _type_load_t _types_load[] = {
load_not_impl_index
};
static _type_remove_t _types_remove[] = {
remove_int8_index,
remove_int16_index,
remove_not_impl_index,
remove_not_impl_index,
remove_not_impl_index,
remove_not_impl_index,
remove_not_impl_index
};
static _type_count_t _types_count[] = {
count_int8_items,
count_int16_items,
count_not_impl_items,
count_not_impl_items,
count_not_impl_items,
count_not_impl_items,
count_not_impl_items
};
static _insert_item_t _insert_item[] = {
insert_int8_item,
insert_not_impl_item,
insert_int16_item,
insert_not_impl_item,
insert_not_impl_item,
insert_not_impl_item,
@@ -74,7 +111,7 @@ static _insert_item_t _insert_item[] = {
static _find_by_key_t _find_item_by_key[] = {
find_item_by_int8_key,
find_item_by_not_impl_key,
find_item_by_int16_key,
find_item_by_not_impl_key,
find_item_by_not_impl_key,
find_item_by_not_impl_key,
@@ -84,7 +121,7 @@ static _find_by_key_t _find_item_by_key[] = {
static _remove_by_key_t _remove_item_by_key[] = {
remove_item_by_int8_key,
remove_item_by_not_impl_key,
remove_item_by_int16_key,
remove_item_by_not_impl_key,
remove_item_by_not_impl_key,
remove_item_by_not_impl_key,
@@ -100,6 +137,14 @@ void* load_index(int fd, type_t t, uint64_t ptr, void* buf) {
return _types_load[t&7](fd, ptr, buf);
}
int remove_index(int fd, type_t t, void* index) {
return _types_remove[t&7](fd, index);
}
uint64_t count_items(int fd, type_t t, void* index) {
return _types_count[t&7](fd, index);
}
int insert_item(int fd, type_t t, void* index, key_t k, uint64_t ptr) {
return _insert_item[t&7](fd, index, k, ptr);
}
@@ -108,6 +153,6 @@ uint64_t find_item_by_key(int fd, type_t t, void* index, key_t k) {
return _find_item_by_key[t&7](fd, index, k);
}
int remove_item_by_key(int fd, type_t t, void* index, key_t k) {
uint64_t remove_item_by_key(int fd, type_t t, void* index, key_t k) {
return _remove_item_by_key[t&7](fd, index, k);
}

420
src/types/int16.c Normal file
View File

@@ -0,0 +1,420 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include "../../include/binary.h"
#include "../../include/page.h"
#include "../../include/types/int16.h"
// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290
// &buf[0] ~ &buf[2081] is index, index = buf+10
// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090
// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194
// 返回index = buf+10
void* create_int16_index(int fd, void* buf) {
buf = alloc_block(fd, INT16_INDEX_SZ, buf);
if(buf == NULL) return NULL;
memset(buf, 0, INT16_INDEX_SZ);
void* page = alloc_page(fd, buf+INT16_INDEX_SZ);
if(page == NULL) {
free_block(fd, buf);
return NULL;
}
memset(page, 0, PAGESZ);
sync_page(fd, page);
void* page2 = alloc_page(fd, page+PAGESZ);
if(page2 == NULL) {
free_block(fd, buf);
free_page(fd, page);
return NULL;
}
memset(page2, 0, PAGESZ);
sync_page(fd, page2);
putle64(buf, le64(page-8));
putle64(buf+8, le64(page2-8));
sync_block(fd, buf);
return buf;
}
// len(buf) >= INT16_INDEX_SZ+10 + INT16_BITMAP_SZ+8*2 = 10290
// &buf[0] ~ &buf[2081] is index, index = buf+10
// &buf[2082] ~ &buf[6185] is the first page of bitmap, ptr = buf+2090
// &buf[6186] ~ &buf[10289] is the second page of bitmap, ptr = buf+6194
// 返回index = buf+10
void* load_int16_index(int fd, uint64_t ptr, void* buf) {
buf = get_block(fd, INT16_INDEX_SZ, ptr, buf);
if(get_page(fd, le64(buf+8), get_page(fd, le64(buf), buf+INT16_INDEX_SZ)+PAGESZ) == NULL) return NULL;
return buf;
}
int remove_int16_index(int fd, void* index) {
uint64_t ptr = le64(index+16); // 链表头
while(ptr) {
uint64_t tmp;
if(unlikely(lseek(fd, ptr, SEEK_SET) < 0)) return EOF;
readle64(fd, tmp);
add_block(fd, INT16_CHAIN_SZ, ptr);
ptr = tmp;
}
if(free_page(fd, index+INT16_INDEX_SZ+8)) return 2; // 第一页位图
if(free_page(fd, index+INT16_INDEX_SZ+8+PAGESZ+8)) return 3; // 第二页位图
return free_block(fd, index); // 位图索引
}
uint64_t count_int16_items(int fd, void* index) {
int total = 0;
// 计算总的条目数
for(int i = 0; i < 128; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
total += 256;
continue;
}
total += s;
}
for(int i = 128; i < 256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) {
total += 256;
continue;
}
total += s;
}
return total;
}
int insert_int16_item(int fd, void* index, key_t k, uint64_t ptr) {
int isexist, sum = 0, total = count_int16_items(fd, index);
uint16_t key = (uint16_t)k;
char buf[8];
char tmp[10+INT16_CHAIN_SZ];
putle64(buf, ptr);
#ifdef DEBUG
printf("No.%u: ", (int)key);
#endif
if(key < 32768) {
// key是否已存在
isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8));
// 查找 key 之前共有多少索引
for(int i = key/256*32; i < key/8; i++) { // 从未计算的32位组开始算起
sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[i]);
}
sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8)));
#ifdef DEBUG
printf("popc: %d, ", sum);
#endif
for(int i = 0; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
#ifdef DEBUG
printf("sum: %d, totl: %d, ", sum, total);
#endif
} else {
// key是否已存在
isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8));
// 查找 key 之前共有多少索引
for(int i = key/256*32; i < key/8; i++) { // 从未计算的32位组开始算起
sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i]);
}
sum += __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8)));
#ifdef DEBUG
printf("popc: %d, ", sum);
#endif
for(int i = 0; i < 128; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
for(int i = 128; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
#ifdef DEBUG
printf("sum: %d, totl: %d, ", sum, total);
#endif
}
if(!isexist) {
// 写入位图
if(key<32768) {
((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] |= 128>>(key%8);
if(unlikely(sync_page(fd, index+INT16_INDEX_SZ+8))) { // 失败,撤销更改
((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] &= ~(128>>(key%8));
return EOF;
}
} else {
((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] |= 128>>(key%8);
if(unlikely(sync_page(fd, index+INT16_INDEX_SZ+8+PAGESZ+8))) { // 失败,撤销更改
((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] &= ~(128>>(key%8));
return EOF;
}
}
((uint8_t*)(index+24))[key/256]++; // 写入位图索引
if(unlikely(sync_block(fd, index))) { // 失败,撤销更改
((uint8_t*)(index+24))[key/256]--;
return EOF;
}
#ifdef DEBUG
printf("i: %d, sumblk: %d, map: %02x, ", key/256, ((uint8_t*)(index+24))[key/256], ((uint8_t*)(index+INT16_INDEX_SZ+8+((key<32768)?0:8)))[key/8]);
#endif
}
if(unlikely(!le64(index+16))) { // 插入的是本索引的第一个值
if(alloc_block(fd, INT16_CHAIN_SZ, tmp) == NULL) return EOF;
memcpy(index+16, tmp, 8); // 记录第一个链表的指针
sync_block(fd, index); // 同步索引到文件
memset(tmp+10, 0, INT16_CHAIN_SZ); // 清空
memcpy(tmp+10+8, buf, 8); // 写入 item 位置
return sync_block(fd, tmp+10); // 同步链表到文件
}
if(isexist) { // 索引已存在,仅替换指针
ptr = le64(index+16);
for(int i = 0; i < sum/256; i++) {
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
if(unlikely(!ptr)) {
errno = ESPIPE;
return EOF;
}
}
lseek(fd, 8*(sum%256+1), SEEK_CUR);
#ifdef DEBUG
puts("replace");
#endif
return write(fd, buf, 8) != 8;
}
// 索引不存在,需要搬移,统一向后移一个指针
uint64_t prev_ptr, first_ptr = 1;
ptr = le64(index+16);
if(total%256 == 255) { // 旧链表刚好装满,需要新分配一个
while(ptr && first_ptr) { // 遍历到末尾
prev_ptr = ptr;
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
readle64(fd, first_ptr);
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
}
if(first_ptr) { // 需要分配
if(alloc_block(fd, INT16_CHAIN_SZ, tmp) == NULL) return EOF;
lseek(fd, prev_ptr, SEEK_SET);
write(fd, tmp, 8); // 将新分配的块附加到链表
prev_ptr = ptr;
ptr = le64(tmp);
memset(tmp+10, 0, INT16_CHAIN_SZ); // 清空新链表
sync_block(fd, tmp+10);
}
// 存在之前分配好的,但是由于删除索引而弃用的块,因此无需新分配
}
if(sum == total) { // 恰好在最后添加
while(ptr && first_ptr) { // 遍历到末尾
prev_ptr = ptr;
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
readle64(fd, first_ptr);
if(unlikely(ptr == prev_ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
}
// 定位到最后一个未满块或第一个空块上的最后
lseek(fd, prev_ptr+8*(sum%256+1), SEEK_SET);
#ifdef DEBUG
puts("append");
#endif
return write(fd, buf, 8) != 8; // 写入
}
// 定位回链表头
lseek(fd, le64(index+16), SEEK_SET);
// 跳转到应当存入的块
for(int i = 0; i < sum/256; i++) {
readle64(fd, ptr);
if(unlikely(!ptr)) {
errno = ESPIPE;
return EOF; // 不应当出现,如果出现说明文件损坏
}
lseek(fd, ptr, SEEK_SET);
}
// 搬移
int offset = sum%256; // 搬移开始的位置,也是应当存入的位置
readle64(fd, ptr); // 下一个块指针
#ifdef DEBUG
printf("off: %d, ", offset);
#endif
if(offset) { // 具有偏移,先定位到偏移
lseek(fd, offset*8, SEEK_CUR);
}
readle64(fd, prev_ptr); // 读取第一个 item 指针
#ifdef DEBUG
printf("first item: %llu\n", prev_ptr);
#endif
lseek(fd, -8, SEEK_CUR); // 返回
write(fd, buf, 8); // 插入
while(prev_ptr) { // 一直搬移到末尾
if(unlikely(offset && !(offset++%256))) { // 进入新的块
lseek(fd, ptr, SEEK_SET);
readle64(fd, first_ptr); // 下一个块指针
if(unlikely(first_ptr == ptr)) { // 文件损坏
errno = ESPIPE;
return EOF;
}
ptr = first_ptr;
}
putle64(buf, prev_ptr); // 以备写入
readle64(fd, prev_ptr); // 读取下一个 item 指针
lseek(fd, -8, SEEK_CUR); // 返回
write(fd, buf, 8); // 搬移一个指针
}
return 0;
}
uint64_t find_item_by_int16_key(int fd, void* index, key_t k) {
uint64_t ptr;
uint16_t key = (uint16_t)k;
uint16_t sum;
if(key < 32768) {
int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8));
if(!isexist) return 0;
sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8)));
for(int i = 0; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
} else {
int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8));
if(!isexist) return 0;
sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8)));
for(int i = 0; i < 128; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
for(int i = 128; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
}
ptr = le64(index+16);
for(int i = 0; i < sum/256 && ptr; i++) {
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
}
if(!ptr) return EOF;
ptr += 8*(sum%256+1);
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
return ptr;
}
uint64_t remove_item_by_int16_key(int fd, void* index, key_t k) {
uint64_t ptr;
uint16_t key = (uint16_t)k;
uint16_t sum;
char buf[8];
if(key < 32768) {
int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & (128>>(key%8));
if(!isexist) return 0;
sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8))[key/8] & ~(0xff>>(key%8)));
for(int i = 0; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
} else {
int isexist = ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & (128>>(key%8));
if(!isexist) return 0;
sum = __builtin_popcount(((uint8_t*)(index+INT16_INDEX_SZ+8+8))[key/8] & ~(0xff>>(key%8)));
for(int i = 0; i < 128; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
for(int i = 128; i < key/256; i++) {
int s = ((uint8_t*)(index+24))[i];
if(unlikely(!s && ((uint8_t*)(index+INT16_INDEX_SZ+8+8))[i*32])) {
sum += 256;
continue;
}
sum += s;
}
}
ptr = le64(index+16);
for(int i = 0; i < sum/256 && ptr; i++) {
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, ptr);
}
if(!ptr) return EOF;
int offset = sum%256;
uint64_t cur_ptr = ptr+8*(offset+1), next_ptr, first_ptr = 0;
if(lseek(fd, ptr, SEEK_SET) < 0) return EOF;
readle64(fd, next_ptr);
lseek(fd, cur_ptr, SEEK_SET); // 回到开头
readle64(fd, ptr); // 返回值
cur_ptr += 8;
do { // 一直循环到末尾
if(unlikely(offset++%256 == 255)) { // 当前位于末尾,需要从下一页取值
// 换下一页
if(lseek(fd, next_ptr+8, SEEK_SET) < 0) return EOF;
readle64(fd, first_ptr); // 读后一个值
// 回上一页
lseek(fd, cur_ptr, SEEK_SET);
putle64(buf, first_ptr);
write(fd, buf, 8); // 覆盖
// 换下一页
cur_ptr = lseek(fd, next_ptr, SEEK_SET)+8;
readle64(fd, next_ptr);
lseek(fd, 8, SEEK_CUR);
continue;
}
// 当前后方至少还有一个指针,可以直接取值
readle64(fd, first_ptr); // 读下一个值
lseek(fd, -16, SEEK_CUR); // 回原处
putle64(buf, first_ptr);
write(fd, buf, 8); // 覆盖
cur_ptr += 8;
lseek(fd, 8, SEEK_CUR);
} while(first_ptr);
return ptr;
}

View File

@@ -16,6 +16,16 @@ void* load_int8_index(int fd, uint64_t ptr, void* buf) {
return get_block(fd, INT8_INDEX_SZ, ptr, buf);
}
int remove_int8_index(int fd, void* index) {
return free_block(fd, index);
}
uint64_t count_int8_items(int fd, void* index) {
uint64_t cnt = 0;
for(int i = 0; i < 256; i++) cnt += !!(((uint64_t*)index)[i]);
return cnt;
}
int insert_int8_item(int fd, void* index, key_t k, uint64_t ptr) {
uint8_t key = (uint8_t)k;
((uint64_t*)index)[key] = ptr;
@@ -27,8 +37,9 @@ uint64_t find_item_by_int8_key(int fd, void* index, key_t k) {
return ((uint64_t*)index)[key];
}
int remove_item_by_int8_key(int fd, void* index, key_t k) {
uint64_t remove_item_by_int8_key(int fd, void* index, key_t k) {
uint8_t key = (uint8_t)k;
uint64_t ptr = ((uint64_t*)index)[key];
((uint64_t*)index)[key] = 0;
return 0;
return ptr;
}

View File

@@ -3,7 +3,7 @@ project(fumidb_test VERSION 1.0)
add_executable(binary_test binary_test.c)
add_executable(page_test page_test.c ../src/page.c ../src/file.c)
add_executable(types_test types_test.c ../src/types.c ../src/types/int8.c ../src/page.c ../src/file.c)
add_executable(types_test types_test.c ../src/types.c ../src/types/int8.c ../src/types/int16.c ../src/page.c ../src/file.c)
add_test(test_binary binary_test COMMAND binary_test)
add_test(test_page page_test COMMAND page_test)

View File

@@ -1,4 +1,4 @@
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
@@ -8,7 +8,7 @@
#include "../include/types.h"
#include "../include/types/int8.h"
uint8_t int8buf[INT8_INDEX_SZ+10];
uint8_t buf[10290];
int main() {
/* test int8 */
@@ -18,9 +18,9 @@ int main() {
return 1;
}
if(init_file_header_page(fd) < 0) return 2;
void* index = create_index(fd, TYPE_INT8, int8buf);
void* index = create_index(fd, TYPE_INT8, buf);
if(!index) {
perror("create_index");
perror("create_int8_index");
return 3;
}
if(le64(index-10) != HEADERSZ) {
@@ -36,6 +36,7 @@ int main() {
insert_item(fd, TYPE_INT8, index, 45, 345743415);
insert_item(fd, TYPE_INT8, index, 67, 56787145);
insert_item(fd, TYPE_INT8, index, 123, 123567854424);
if(count_items(fd, TYPE_INT8, index) != 5) return 6;
if(find_item_by_key(fd, TYPE_INT8, index, 1) != 3456432) return 6;
if(find_item_by_key(fd, TYPE_INT8, index, 3) != 7654323456) return 7;
if(find_item_by_key(fd, TYPE_INT8, index, 45) != 345743415) return 8;
@@ -45,8 +46,8 @@ int main() {
index = NULL;
close(fd);
fd = open("types_test_tmp.bin", O_RDWR, 0644);
memset(int8buf, 0, INT8_INDEX_SZ+10);
index = load_index(fd, TYPE_INT8, HEADERSZ, int8buf);
memset(buf, 0, INT8_INDEX_SZ+10);
index = load_index(fd, TYPE_INT8, HEADERSZ, buf);
if(find_item_by_key(fd, TYPE_INT8, index, 1) != 3456432) return 6;
if(find_item_by_key(fd, TYPE_INT8, index, 3) != 7654323456) return 7;
if(find_item_by_key(fd, TYPE_INT8, index, 45) != 345743415) return 8;
@@ -55,7 +56,44 @@ int main() {
if(find_item_by_key(fd, TYPE_INT8, index, 255) != 0) return 11;
remove_item_by_key(fd, TYPE_INT8, index, 123);
if(find_item_by_key(fd, TYPE_INT8, index, 123) != 0) return 12;
if(count_items(fd, TYPE_INT8, index) != 4) return 13;
if(remove_index(fd, TYPE_INT8, index)) return 14;
index = create_index(fd, TYPE_INT8, buf);
if(!index) {
perror("create_int8_index");
return 3;
}
if(count_items(fd, TYPE_INT8, index) != 0) return 15;
close(fd);
/* end test int8 */
/* test int16 */
fd = open("types_test_tmp.bin", O_RDWR | O_CREAT | O_TRUNC, 0644);
if(fd < 0) {
perror("create");
return 1;
}
if(init_file_header_page(fd) < 0) return 2;
index = create_index(fd, TYPE_INT16, buf);
if(!index) {
perror("create_int16_index");
return 3;
}
for(int i = 57344, cnt = 0; i < 65536+4099; i++, cnt++) {
int n;
if((n=count_items(fd, TYPE_INT16, index)) != cnt) {
printf("%d != %d\n", cnt, n);
return 4;
}
if(insert_item(fd, TYPE_INT16, index, (key_t)i, i)) {
printf("%u ", (uint16_t)i);
fflush(stdout);
perror("insert_int16_item");
return 4;
}
}
close(fd);
/* end test int16 */
// remove("types_test_tmp.bin");
}