From 67b9176615d0fbd701629bc98b171a2ce7561164 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=BA=90=E6=96=87=E9=9B=A8?= <41315874+fumiama@users.noreply.github.com> Date: Sun, 1 May 2022 21:16:17 +0800 Subject: [PATCH] add types --- api/dbfile.md | 2 +- api/index.md | 3 +- api/table.md | 2 +- api/types.md | 8 ++-- include/file.h | 14 +++++++ include/page.h | 12 ++++++ include/table.h | 6 +++ include/types.h | 38 ++++++++++++++++++ include/types/int8.h | 16 ++++++++ src/file.c | 10 ++++- src/page.c | 30 ++++++++++++++- src/table.c | 0 src/types.c | 91 ++++++++++++++++++++++++++++++++++++++++++++ src/types/int8.c | 29 ++++++++++++++ tests/CMakeLists.txt | 6 ++- tests/page_test.c | 48 +++++++++++++++-------- tests/types_test.c | 21 ++++++++++ 17 files changed, 310 insertions(+), 26 deletions(-) create mode 100644 include/table.h create mode 100644 include/types.h create mode 100644 include/types/int8.h create mode 100644 src/table.c create mode 100644 src/types.c create mode 100644 src/types/int8.c create mode 100644 tests/types_test.c diff --git a/api/dbfile.md b/api/dbfile.md index a4ecef1..143aeaa 100644 --- a/api/dbfile.md +++ b/api/dbfile.md @@ -3,7 +3,7 @@ 由于文件内普遍以uint64作为指针,因此理论最大支持文件大小为`16384PB`,在现有条件下完全足够使用。 ## 文件头 -文件最开头填充了固定的8字节ascii编码`FUMIDB\1\0`。前6字节必须为`FUMIDB`,表明本文件为`fumidb`数据库文件格式。7, 8字节`\1\0`作为数据库的版本,是一个小端的uint16,在这里表示第1版,之后将依次递增。今后可能会在文件头增加更多字段。 +文件最开头填充了固定的8字节ascii编码`FUMIDB\1\0`。前6字节必须为`FUMIDB`,表明本文件为`fumidb`数据库文件格式。7, 8字节`\1\0`作为数据库的版本,是一个小端的uint16,在这里表示第1版,之后将依次递增。今后可能会在文件头增加更多字段,因此前`256`字节保留以备后用。 ``` 0 8 16 ┌───────────────────┬───────────────────┐ diff --git a/api/index.md b/api/index.md index 469fdee..61ca000 100644 --- a/api/index.md +++ b/api/index.md @@ -10,6 +10,7 @@ │ ptr 000 │ ptr 001 │ ptr ... │ ptr 255 │ └─────────┴─────────┴─────────┴─────────┘ ``` +特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 ## int16 > 查找速度为 > - 无该表项:O(1) @@ -39,7 +40,7 @@ │next ptr│ ptr000 │ ptr... │ ptr255 │ └────────┴────────┴────────┴────────┘ ``` - +特别地,当值可重复时,索引指向的是一个链表的头,详见[types](/api/types.md#数字)。 ## int32/float 使用B+树建立索引,每个节点大小为`4096`字节,最多可有`n=341`个扇出,`340`个值;最少则有`170`个值(根节点不遵守最少值规则)。 > 下面每格4字节 diff --git a/api/table.md b/api/table.md index 101cf10..9d5eea4 100644 --- a/api/table.md +++ b/api/table.md @@ -1,6 +1,6 @@ # 数据表格式 ## 表头 -如下所示,加上文件中附加的`ptr of next table`,表头永远是`4k`对齐的,其中行类型列表的No.1自动成为主键,强制应用`unique`类型修饰符;`data blocks`可以为任意数据,如索引,表项等。由于数据块均为定长,增加时直接添加或重用已删除区块,修改时直接覆盖,删除时直接在索引中移除该项,将块首地址附加到已删除块的链表即可。 +如下所示,加上文件中附加的`ptr of next table`,表头永远是`4k`对齐的,其中行类型列表的No.1自动成为主键,强制应用`unique`+`nonnull`类型修饰符;`data blocks`可以为任意数据,如索引,表项等。由于数据块均为定长,增加时直接添加或重用已删除区块,修改时直接覆盖,删除时直接在索引中移除该项,将块首地址附加到已删除块的链表即可。 ``` ┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐ │ 0 - 7 │ 8 - 15 │ 16 -- 23 │ 24 -- 31 │ 32 -- 39 │ 40 -- 47 │ 48 -- 55 │ 56 -- 63 │ diff --git a/api/types.md b/api/types.md index c63d70c..3246cf8 100644 --- a/api/types.md +++ b/api/types.md @@ -7,9 +7,9 @@ ### 类型列表 | 类型代号 | 类型 | 存储方式 | | --- | --- | --- | -| 0 | int8 | 直接存储 | -| 1 | int16 | 直接存储 | -| 2 | int32 | 直接存储 | +| 0 | int8 | 下一个哈希相同的数据项的指针(当值可重复时)+int8 | +| 1 | int16 | 下一个哈希相同的数据项的指针(当值可重复时)+int16 | +| 2 | int32 | 直接存储 | | 3 | int64 | 直接存储 | | 4 | float | 直接存储 | | 5 | double | 直接存储 | @@ -19,7 +19,7 @@ ### 类型列表 | 类型代号 | 类型 | 存储方式 | | --- | --- | --- | -| 6 | string | 下一个哈希相同的数据项的指针(uint64)+该长度的数据(字节数组) | +| 6 | string | 下一个哈希相同的数据项的指针+该长度的数据(字节数组) | ## 二进制数据块 > 二进制数据块为定长(不超过uint64)的不参与索引的字节数组。 diff --git a/include/file.h b/include/file.h index 15fc1c5..2e2fb03 100644 --- a/include/file.h +++ b/include/file.h @@ -3,19 +3,33 @@ #include +#define HEADERSZ 256 + // 初始化并写入数据库文件头 +// 返回: +// 0 成功 +// 1 write 失败 +// EOF lseek 失败 int init_file_header_page(int fd); // 获得数据库版本 uint16_t get_db_version(int fd); // 设置 ptr of unused blk 字段 +// 返回: +// 0 成功 +// 1 write 失败 +// EOF lseek 失败 int set_first_unused_block(int fd, uint64_t ptr); // 获得 ptr of unused blk 字段 uint64_t get_first_unused_block(int fd); // 设置 ptr of next table 字段 +// 返回: +// 0 成功 +// 1 write 失败 +// EOF lseek 失败 int set_first_table(int fd, uint64_t ptr); // 获得 ptr of next table 字段 diff --git a/include/page.h b/include/page.h index 2cee124..9b1e7b9 100644 --- a/include/page.h +++ b/include/page.h @@ -16,6 +16,12 @@ // page 指针 void* alloc_page(int fd); +// 获取 ptr 处的页 +// 返回: +// NULL 错误,参见 errno +// page 指针 +void* get_page(int fd, uint64_t ptr); + // 刷新一页 // 返回: // EOF lseek 错误,参见 errno @@ -44,6 +50,12 @@ int free_page(int fd, void* page); // blk 指针 void* alloc_block(int fd, uint16_t size); +// 获取 ptr 处的块 +// 返回: +// NULL 错误,参见 errno +// blk 指针 +void* get_block(int fd, uint16_t size, uint64_t ptr); + // 刷新 block 到文件 // 返回: // EOF lseek 错误或 size 过大,参见 errno diff --git a/include/table.h b/include/table.h new file mode 100644 index 0000000..d419a77 --- /dev/null +++ b/include/table.h @@ -0,0 +1,6 @@ +#ifndef _TABLE_H_ +#define _TABLE_H_ + + + +#endif \ No newline at end of file diff --git a/include/types.h b/include/types.h new file mode 100644 index 0000000..eee9d1c --- /dev/null +++ b/include/types.h @@ -0,0 +1,38 @@ +#ifndef _TYPES_H_ +#define _TYPES_H_ + +#include +#include + +#define TYPE_INT8 0 +#define TYPE_INT16 1 +#define TYPE_INT32 2 +#define TYPE_INT64 3 +#define TYPE_FLOAT 4 +#define TYPE_DOUBLE 5 +#define TYPE_STRING 6 +#define TYPE_BINARY 7 + +#define EXTYPE_NULL 0x00 +#define EXTYPE_UNIQUE 0x40 +#define EXTYPE_NONNULL 0x80 +#define EXTYPE_FOREIGNKEY 0xc0 + +typedef uint8_t type_t; +typedef uint64_t key_t; + +// 为类型 type 创建索引 +// 返回:索引头节点的指针 index +uint64_t create_index(int fd, type_t t); + +// 插入一条索引 +int insert_item(int fd, type_t t, uint64_t index, key_t k, uint64_t ptr); + +// 使用索引查找目标 +// 返回:ptr +uint64_t find_item_by_key(int fd, type_t t, uint64_t index, key_t k); + +// 使用索引删除项 +int remove_item_by_key(int fd, type_t t, uint64_t index, key_t k); + +#endif \ No newline at end of file diff --git a/include/types/int8.h b/include/types/int8.h new file mode 100644 index 0000000..fee602f --- /dev/null +++ b/include/types/int8.h @@ -0,0 +1,16 @@ +#ifndef _TYPE_INT8_H_ +#define _TYPE_INT8_H_ + +#include "../types.h" + +#define INT8_INDEX_SZ (256*8) + +uint64_t create_int8_index(int fd); + +int insert_int8_item(int fd, uint64_t index, key_t k, uint64_t ptr); + +uint64_t find_item_by_int8_key(int fd, uint64_t index, key_t k); + +int remove_item_by_int8_key(int fd, uint64_t index, key_t k); + +#endif \ No newline at end of file diff --git a/src/file.c b/src/file.c index 687a8be..e359dab 100644 --- a/src/file.c +++ b/src/file.c @@ -9,8 +9,16 @@ uint8_t header[PAGESZ] = {'F', 'U', 'M', 'I', 'D', 'B', 1, 0}; int init_file_header_page(int fd) { + uint8_t buf[8]; lseek(fd, 0, SEEK_SET); - return write(fd, header, PAGESZ) != PAGESZ; + if(write(fd, header, PAGESZ) != PAGESZ) return 1; + // 将头的 HEADERSZ 字节之后的空间纳入空闲块 + if(lseek(fd, 8, SEEK_SET) < 0) return EOF; + putle64(buf, HEADERSZ); + if(write(fd, buf, 8) != 8) return 1; + if(lseek(fd, HEADERSZ+8, SEEK_SET) < 0) return EOF; + putle16(buf, PAGESZ - HEADERSZ); + return write(fd, buf, 2) != 2; } uint16_t get_db_version(int fd) { diff --git a/src/page.c b/src/page.c index 6b9d0a2..8c5fa11 100644 --- a/src/page.c +++ b/src/page.c @@ -10,7 +10,7 @@ #include "../include/binary.h" #include "../include/page.h" -uint8_t nullpage[PAGESZ]; +static const uint8_t nullpage[PAGESZ]; void* alloc_page(int fd) { uint64_t ptr = 8, prev_ptr = 0, prev_prev_ptr = 0; @@ -62,6 +62,20 @@ void* alloc_page(int fd) { return page+8; } +void* get_page(int fd, uint64_t ptr) { + if(ptr%PAGESZ) return NULL; + if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; + void* page = malloc(PAGESZ+8); + if(page == NULL) return NULL; + putle64(page, ptr); + page += 8; + if(read(fd, page, PAGESZ) != PAGESZ) { + free(page-8); + return NULL; + } + return page; +} + int sync_page(int fd, void* page) { uint64_t ptr = le64(page-8); if(lseek(fd, ptr, SEEK_SET) < 0) return EOF; @@ -181,6 +195,20 @@ void* alloc_block(int fd, uint16_t size) { return blk+10; } +void* get_block(int fd, uint16_t size, uint64_t ptr) { + if(lseek(fd, ptr, SEEK_SET) < 0) return NULL; + void* blk = malloc(size+10); + if(blk == NULL) return NULL; + putle64(blk, ptr); + putle16(blk+8, size); + blk += 10; + if(read(fd, blk, size) != size) { + free(blk); + return NULL; + } + return blk; +} + int sync_block(int fd, void* blk) { uint64_t off = le64(blk-10); uint16_t size = le16(blk-2); diff --git a/src/table.c b/src/table.c new file mode 100644 index 0000000..e69de29 diff --git a/src/types.c b/src/types.c new file mode 100644 index 0000000..577aada --- /dev/null +++ b/src/types.c @@ -0,0 +1,91 @@ +#include "../include/types.h" +#include "../include/types/int8.h" + +// ptr = init(fd) +typedef uint64_t (*_type_init_t)(int); +// ret = insert_item(fd, index, k, ptr) +typedef int (*_insert_item_t)(int, uint64_t, key_t, uint64_t); +// ptr = find_by_key(fd, index, k) +typedef uint64_t (*_find_by_key_t)(int, uint64_t, key_t); +// ret = remove_by_key(fd, index, k) +typedef int (*_remove_by_key_t)(int, uint64_t, key_t); + +// Function not implemented +static uint64_t create_not_impl_index(int fd) { + errno = ENOSYS; + return 0; +} + +// Function not implemented +static int insert_not_impl_item(int fd, uint64_t index, key_t k, uint64_t ptr) { + errno = ENOSYS; + return 0; +} + +// Function not implemented +static uint64_t find_item_by_not_impl_key(int fd, uint64_t index, key_t k) { + errno = ENOSYS; + return 0; +} + +// Function not implemented +static int remove_item_by_not_impl_key(int fd, uint64_t index, key_t k) { + errno = ENOSYS; + return 0; +} + +static _type_init_t _types_init[] = { + create_int8_index, + create_not_impl_index, + create_not_impl_index, + create_not_impl_index, + create_not_impl_index, + create_not_impl_index, + create_not_impl_index +}; + +static _insert_item_t _insert_item[] = { + insert_int8_item, + insert_not_impl_item, + insert_not_impl_item, + insert_not_impl_item, + insert_not_impl_item, + insert_not_impl_item, + insert_not_impl_item +}; + +static _find_by_key_t _find_item_by_key[] = { + find_item_by_int8_key, + find_item_by_not_impl_key, + find_item_by_not_impl_key, + find_item_by_not_impl_key, + find_item_by_not_impl_key, + find_item_by_not_impl_key, + find_item_by_not_impl_key +}; + +static _remove_by_key_t _remove_item_by_key[] = { + remove_item_by_int8_key, + remove_item_by_not_impl_key, + remove_item_by_not_impl_key, + remove_item_by_not_impl_key, + remove_item_by_not_impl_key, + remove_item_by_not_impl_key, + remove_item_by_not_impl_key +}; + +uint64_t create_index(int fd, type_t t) { + return _types_init[t&7](fd); +} + +int insert_item(int fd, type_t t, uint64_t index, key_t k, uint64_t ptr) { + return _insert_item[t&7](fd, index, k, ptr); +} + +uint64_t find_item_by_key(int fd, type_t t, uint64_t index, key_t k) { + return _find_item_by_key[t&7](fd, index, k); +} + +int remove_item_by_key(int fd, type_t t, uint64_t index, key_t k) { + return _remove_item_by_key[t&7](fd, index, k); +} diff --git a/src/types/int8.c b/src/types/int8.c new file mode 100644 index 0000000..c0de574 --- /dev/null +++ b/src/types/int8.c @@ -0,0 +1,29 @@ +#include +#include +#include +#include "../../include/page.h" +#include "../../include/types/int8.h" + +uint64_t create_int8_index(int fd) { + uint64_t* blk = alloc_block(fd, INT8_INDEX_SZ); + if(blk == NULL) return 0; + memset(blk, 0, INT8_INDEX_SZ); + uint64_t index = blk[-1]; + unmount_block(fd, blk); + return index; +} + +int insert_int8_item(int fd, uint64_t index, key_t k, uint64_t ptr) { + errno = ENOSYS; + return 0; +} + +uint64_t find_item_by_int8_key(int fd, uint64_t index, key_t k) { + errno = ENOSYS; + return 0; +} + +int remove_item_by_int8_key(int fd, uint64_t index, key_t k) { + errno = ENOSYS; + return 0; +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a3c4272..1129b8d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -2,7 +2,9 @@ cmake_minimum_required(VERSION 3.0.0) project(fumidb_test VERSION 1.0) add_executable(binary_test binary_test.c) -add_executable(page_test page_test.c ../src/page.c ../src/file.c) +add_executable(page_test page_test.c ../src/page.c ../src/file.c) +add_executable(types_test types_test.c ../src/types.c ../src/types/int8.c ../src/page.c ../src/file.c) add_test(test_binary binary_test COMMAND binary_test) -add_test(test_page page_test COMMAND page_test) \ No newline at end of file +add_test(test_page page_test COMMAND page_test) +add_test(test_types types_test COMMAND types_test) \ No newline at end of file diff --git a/tests/page_test.c b/tests/page_test.c index 8f5e195..8f8b9c0 100644 --- a/tests/page_test.c +++ b/tests/page_test.c @@ -8,6 +8,14 @@ void* pages[16]; uint8_t nullpage[PAGESZ]; +static uint64_t get_second_unused_block(int fd) { + uint64_t ptr = get_first_unused_block(fd); + if(!ptr) return 0; + if(lseek(fd, ptr, SEEK_SET) < 0) return 0; + readle64(fd, ptr); + return ptr; +} + int main() { int fd = open("page_test_tmp.bin", O_RDWR | O_CREAT | O_TRUNC, 0644); if(fd < 0) { @@ -25,15 +33,20 @@ int main() { } puts("free!"); free_page(fd, pages[15]); - if(get_first_unused_block(fd) != 16*PAGESZ) return 5; + if(get_second_unused_block(fd) != 16*PAGESZ) return 5; + puts("free 15!"); free_page(fd, pages[12]); - if(get_first_unused_block(fd) != 13*PAGESZ) return 6; + if(get_second_unused_block(fd) != 13*PAGESZ) return 6; + puts("free 12!"); free_page(fd, pages[1]); - if(get_first_unused_block(fd) != 2*PAGESZ) return 7; + if(get_second_unused_block(fd) != 2*PAGESZ) return 7; + puts("free 1!"); free_page(fd, pages[10]); - if(get_first_unused_block(fd) != 2*PAGESZ) return 8; + if(get_second_unused_block(fd) != 2*PAGESZ) return 8; + puts("free 10!"); free_page(fd, pages[9]); - if(get_first_unused_block(fd) != 2*PAGESZ) return 8; + if(get_second_unused_block(fd) != 2*PAGESZ) return 8; + puts("free 9!"); pages[1] = alloc_page(fd); if(le64(pages[1]-8) != (uint64_t)(2*PAGESZ)) { printf("1: %016llx != %016llx\n", le64(pages[1]-8), (uint64_t)(2*PAGESZ)); @@ -72,30 +85,35 @@ int main() { uint8_t* blk4 = alloc_block(fd, 4095); memcpy(blk1, "hello world!", 13); sync_block(fd, blk1); - lseek(fd, PAGESZ, SEEK_SET); + lseek(fd, HEADERSZ, SEEK_SET); read(fd, blk2, 13); + puts("hello world!"); if(strcmp((const char *)blk2, (const char *)blk1)) { return 16; } + puts("hello world 1!"); sync_block(fd, blk2); - lseek(fd, PAGESZ+40, SEEK_SET); + lseek(fd, HEADERSZ+40, SEEK_SET); read(fd, blk3, 13); if(strcmp((const char *)blk3, (const char *)blk1)) { return 17; } + puts("hello world 2!"); sync_block(fd, blk3); - lseek(fd, PAGESZ+40+22, SEEK_SET); + lseek(fd, HEADERSZ+40+22, SEEK_SET); read(fd, blk4+222, 13); sync_block(fd, blk4); if(strcmp((const char *)&blk4[222], (const char *)blk1)) { return 18; } + puts("hello world 3!"); memset(blk1, 0, 40); - lseek(fd, PAGESZ*2+222, SEEK_SET); + lseek(fd, PAGESZ+222, SEEK_SET); read(fd, blk1, 13); if(strcmp((const char *)blk1, (const char *)blk2)) { return 19; } + puts("hello world4!"); if(free_block(fd, blk1)) { perror("free_block(fd, blk1)"); return 20; @@ -115,7 +133,7 @@ int main() { blk1 = alloc_block(fd, 40+22+33); memcpy(blk1+44, "hello world!", 13); sync_block(fd, blk1); - lseek(fd, PAGESZ+44, SEEK_SET); + lseek(fd, HEADERSZ+44, SEEK_SET); char buf[13]; read(fd, buf, 13); if(strcmp((const char *)&blk1[44], (const char *)buf)) { @@ -135,26 +153,26 @@ int main() { blk4 = alloc_block(fd, 4095); memcpy(blk1, "hello world!", 13); sync_block(fd, blk1); - lseek(fd, PAGESZ, SEEK_SET); + lseek(fd, HEADERSZ, SEEK_SET); read(fd, blk2, 13); if(strcmp((const char *)blk2, (const char *)blk1)) { return 16; } sync_block(fd, blk2); - lseek(fd, PAGESZ+40, SEEK_SET); + lseek(fd, HEADERSZ+40, SEEK_SET); read(fd, blk3, 13); if(strcmp((const char *)blk3, (const char *)blk1)) { return 17; } sync_block(fd, blk3); - lseek(fd, PAGESZ+40+22, SEEK_SET); + lseek(fd, HEADERSZ+40+22, SEEK_SET); read(fd, blk4+222, 13); sync_block(fd, blk4); if(strcmp((const char *)&blk4[222], (const char *)blk1)) { return 18; } memset(blk1, 0, 40); - lseek(fd, PAGESZ*2+222, SEEK_SET); + lseek(fd, PAGESZ+222, SEEK_SET); read(fd, blk1, 13); if(strcmp((const char *)blk1, (const char *)blk2)) { return 19; @@ -178,7 +196,7 @@ int main() { blk1 = alloc_block(fd, 40+22+33); memcpy(blk1+44, "hello world!", 13); sync_block(fd, blk1); - lseek(fd, PAGESZ+44, SEEK_SET); + lseek(fd, HEADERSZ+44, SEEK_SET); memset(buf, 0, 13); read(fd, buf, 13); if(strcmp((const char *)&blk1[44], (const char *)buf)) { diff --git a/tests/types_test.c b/tests/types_test.c new file mode 100644 index 0000000..10036b0 --- /dev/null +++ b/tests/types_test.c @@ -0,0 +1,21 @@ +#include +#include +#include "../include/binary.h" +#include "../include/file.h" +#include "../include/types.h" + +int main() { + int fd = open("types_test_tmp.bin", O_RDWR | O_CREAT | O_TRUNC, 0644); + if(fd < 0) { + perror("create"); + return 1; + } + if(init_file_header_page(fd) < 0) return 2; + uint64_t ptr = create_index(fd, TYPE_INT8); + if(!ptr) { + perror("create_index"); + return 3; + } + close(fd); + // remove("types_test_tmp.bin"); +}