赞
踩
英文连接:https://cstack.github.io/db_tutorial/parts/part6.html
为了实现B-tree,这一节先对当前的实现进行一点重构。
我们增加一个概念:Cursor(游标),代表了对象在数据库中的位置。那么关于cursor有几件事需要完成
1、在表之前创建cursor
2、在表之后创建cursor
3、访问cursor所指向的行
4、访问cursor的下一行
完成这些之后,我们还会继续实现:
1、使用cursor删除行
2、使用cursor修改行
3、使用ID查询表,并且在在该行后创建cursor
在没有实现ado,Cursor类型暂时按如下定义:
- 批注:ADO 连接对象(ADO Connection Object)
- ADO 连接对象用来创建到某个数据源的开放连接。通过此连接,您可以对此数据库进行访问和操作。
- 查看此连接对象的所有方法和属性
- struct Cursor_t {
- Table* table;
- uint32_t row_num;
- bool end_of_table; // Indicates a position one past the last element
- };
- typedef struct Cursor_t Cursor;
table_start()
and table_end()
create new cursors:
- Cursor* table_start(Table* table) {
- Cursor* cursor = malloc(sizeof(Cursor));
- cursor->table = table;
- cursor->row_num = 0;
- cursor->end_of_table = (table->num_rows == 0);
-
- return cursor;
- }
-
- Cursor* table_end(Table* table) {
- Cursor* cursor = malloc(sizeof(Cursor));
- cursor->table = table;
- cursor->row_num = table->num_rows;
- cursor->end_of_table = true;
-
- return cursor;
- }
row_slot()修改为cursor_value()
, 该函数作用:指向了sursor所执行的位置
定义1个函数cursor_advance,实现对num_rows加1。
- void cursor_advance(Cursor* cursor) {
- cursor->row_num = 1;
- if (cursor->row_num >= cursor->table->num_rows) {
- cursor->end_of_table = true;
- }
- }
最后,我们修改“virtual machine”,改用抽象的对象:Cursor。当插入一行时,我们打开一个Cursor,指向表尾。在cursor后写入后,关闭Cursor。
- Row* row_to_insert = &(statement->row_to_insert);
- + Cursor* cursor = table_end(table);
-
- - serialize_row(row_to_insert, row_slot(table, table->num_rows));
- + serialize_row(row_to_insert, cursor_value(cursor));
- table->num_rows += 1;
-
- + free(cursor);
- +
- return EXECUTE_SUCCESS;
- }
同理,修改execute_select的实现,使用cursor替换row_slot
- ExecuteResult execute_select(Statement* statement, Table* table) {
- + Cursor* cursor = table_start(table);
- +
- Row row;
- - for (uint32_t i = 0; i < table->num_rows; i++) {
- - deserialize_row(row_slot(table, i), &row);
- + while (!(cursor->end_of_table)) {
- + deserialize_row(cursor_value(cursor), &row);
- print_row(&row);
- + cursor_advance(cursor);
- }
- +
- + free(cursor);
- +
- return EXECUTE_SUCCESS;
- }
至此,execute_select和execute_insert就不需要在做任何关于表存储的假设,就可以通过cursor和table进行交互了。
还是使用上节的用例测试下:
- db > insert 1 cstack foo@bar.com
- Executed.
- db > insert 2 hello hello@126.com
- Executed.
- db > select
- (1, cstack, foo@bar.com)
- (2, hello, hello@126.com)
- (1, cstack, foo@bar.com)
- (2, hello, hello@126.com)
- Executed.
- db >
至此最新代码:
- #include <errno.h>
- #include <fcntl.h>
- #include <stdbool.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
-
- /* 定义 元数据操作结果*/
- enum MetaCommandResult_t {
- META_COMMAND_SUCCESS,
- META_COMMAND_UNRECOGNIZED_COMMAND
- };
- typedef enum MetaCommandResult_t MetaCommandResult;
-
- /* 执行结果*/
- enum ExecuteResult_t { EXECUTE_SUCCESS, EXECUTE_TABLE_FULL };
- typedef enum ExecuteResult_t ExecuteResult;
-
- /* sql解析结果 */
- enum PrepareResult_t {
- PREPARE_SUCCESS,
- PREPARE_NEGATIVE_ID,
- PREPARE_STRING_TOO_LONG,
- PREPARE_SYNTAX_ERROR,
- PREPARE_UNRECOGNIZED_STATEMENT
- };
- typedef enum PrepareResult_t PrepareResult;
-
- /* 行定义,对应具体的业务 */
- const uint32_t COLUMN_USERNAME_SIZE = 32;
- const uint32_t COLUMN_EMAIL_SIZE = 255;
- struct Row_t {
- uint32_t id;
- char username[COLUMN_USERNAME_SIZE + 1];
- char email[COLUMN_EMAIL_SIZE + 1];
- };
- typedef struct Row_t Row;
-
-
- /* sql type*/
- enum StatementType_t{
- STATEMENT_INSERT,
- STATEMENT_SELECT
- };
-
- typedef enum StatementType_t StatementType;
-
- struct Statement_t {
- StatementType type;
- Row row_to_insert; /* only used by insert statement */
-
- };
- typedef struct Statement_t Statement;
-
- /**/
-
- #define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
-
- const uint32_t ID_SIZE = size_of_attribute(Row, id);
- const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
- const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
- const uint32_t ID_OFFSET = 0;
- const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
- const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
- const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
-
- /* 大部分系统结构的页大小都是4K ,所以这里也定义为4K 这样就不用做转换 */
- const uint32_t PAGE_SIZE = 4096;
- const uint32_t TABLE_MAX_PAGES = 100;
- const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
- const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
-
- /*
- * 页和表定义
- */
- struct Pager_t {
- int file_descriptor;
- uint32_t file_length;
- void *pages[TABLE_MAX_PAGES];
- };
- typedef struct Pager_t Pager;
-
- struct Table_t {
- Pager *pager;
- uint32_t num_rows;
- };
- typedef struct Table_t Table;
-
- /* 序列化: 将row写到内存中 */
- void serialize_row(Row* source, void* destination) {
- memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
- memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
- memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
- }
-
- /* 反序列化 */
- void deserialize_row(void* source, Row* destination) {
- memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
- memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
- memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
- }
-
- /**
- * 获取页num对应的页
- * @param table
- * @param row_num
- * @return
- */
- void *get_page(Pager *pager, uint32_t page_num)
- {
- if (page_num > TABLE_MAX_PAGES) {
- printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
- TABLE_MAX_PAGES);
- exit(EXIT_FAILURE);
- }
-
- if (pager->pages[page_num] == NULL) {
- // Cache miss. Allocate memory and load from file.
- void *page = malloc(PAGE_SIZE);
- uint32_t num_pages = pager->file_length / PAGE_SIZE;
-
- // We might save a partial page at the end of the file
- if (pager->file_length % PAGE_SIZE) {
- num_pages += 1;
- }
-
- if (page_num <= num_pages) {
- lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
- ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
- if (bytes_read == -1) {
- printf("Error reading file: %d\n", errno);
- exit(EXIT_FAILURE);
- }
- }
-
- pager->pages[page_num] = page;
- }
-
- return pager->pages[page_num];
- }
-
-
- /**
- * 打开pager
- * @param filename
- * @return
- */
- Pager *pager_open(const char *filename)
- {
- int fd = open(filename,
- O_RDWR | // Read/Write mode
- O_CREAT, // Create file if it does not exist
- S_IWUSR | // User write permission
- S_IRUSR // User read permission
- );
-
- if (fd == -1) {
- printf("Unable to open file\n");
- exit(EXIT_FAILURE);
- }
-
- off_t file_length = lseek(fd, 0, SEEK_END);
-
- Pager *pager = malloc(sizeof(Pager));
- pager->file_descriptor = fd;
- pager->file_length = file_length;
-
- for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
- pager->pages[i] = NULL;
- }
-
- return pager;
- }
-
- /**
- * Cursor定义
- */
- struct Cursor_t {
- Table* table;
- uint32_t row_num;
- bool end_of_table; // Indicates a position one past the last element
- };
- typedef struct Cursor_t Cursor;
-
- /**
- * Cursor api
- * @param row
- */
- Cursor* table_start(Table* table) {
- Cursor* cursor = malloc(sizeof(Cursor));
- cursor->table = table;
- cursor->row_num = 0;
- cursor->end_of_table = (table->num_rows == 0);
-
- return cursor;
- }
-
- Cursor* table_end(Table* table) {
- Cursor* cursor = malloc(sizeof(Cursor));
- cursor->table = table;
- cursor->row_num = table->num_rows;
- cursor->end_of_table = true;
-
- return cursor;
- }
-
-
- /**
- * 计算插入位置, 行插入槽
- */
- void* cursor_value(Cursor* cursor)
- {
- uint32_t row_num = cursor->row_num;
- uint32_t page_num = row_num / ROWS_PER_PAGE;
- void* page = get_page(cursor->table->pager, page_num);
- uint32_t row_offset = row_num % ROWS_PER_PAGE;
- uint32_t byte_offset = row_offset * ROW_SIZE;
- return page + byte_offset;
- }
-
- /**
- * rownum 加1
- * @param cursor
- */
- void cursor_advance(Cursor* cursor)
- {
- cursor->row_num += 1;
- if (cursor->row_num >= cursor->table->num_rows) {
- cursor->end_of_table = true;
- }
- }
-
-
-
- void print_row(Row* row) {
- printf("(%d, %s, %s)\n", row->id, row->username, row->email);
- }
-
- /**
- * 打开数据库文件并建立连接
- * @param filename
- * @return
- */
- Table *db_open(const char *filename)
- {
- Pager *pager = pager_open(filename);
- uint32_t num_rows = pager->file_length / ROW_SIZE;
-
- Table *table = malloc(sizeof(Table));
- table->num_rows = 0;
- table->pager = pager;
- table->num_rows = num_rows;
-
- return table;
- }
-
- /* 接收输入*/
- struct InputBuffer_t {
- char* buffer;
- size_t buffer_length;
- ssize_t input_length;
- };
- typedef struct InputBuffer_t InputBuffer;
-
- /* 初始化buffer */
- InputBuffer* new_input_buffer()
- {
- InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
- input_buffer->buffer = NULL;
- input_buffer->buffer_length = 0;
- input_buffer->input_length = 0;
-
- return input_buffer;
- }
-
- void print_prompt() { printf("db > "); }
-
- /* 按行从标准输入读取 */
- void read_input(InputBuffer* input_buffer)
- {
- ssize_t bytes_read =
- getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
-
- if (bytes_read <= 0) {
- printf("Error reading input\n");
- exit(EXIT_FAILURE);
- }
-
- // Ignore trailing newline
- input_buffer->input_length = bytes_read - 1;
- input_buffer->buffer[bytes_read - 1] = 0;
- }
-
- /**
- * 刷新页到文件(指定页号)
- * @param pager
- * @param page_num
- * @param size
- */
- void pager_flush(Pager *pager, uint32_t page_num, uint32_t size)
- {
- if (pager->pages[page_num] == NULL) {
- printf("Tried to flush null page\n");
- exit(EXIT_FAILURE);
- }
-
- off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
-
- if (offset == -1) {
- printf("Error seeking: %d\n", errno);
- exit(EXIT_FAILURE);
- }
-
- ssize_t bytes_written =
- write(pager->file_descriptor, pager->pages[page_num], size);
-
- if (bytes_written == -1) {
- printf("Error writing: %d\n", errno);
- exit(EXIT_FAILURE);
- }
- }
-
- /**
- * 关闭数据连接
- */
- void db_close(Table *table) {
- Pager *pager = table->pager;
- uint32_t num_full_pages = table->num_rows / ROWS_PER_PAGE;
-
- for (uint32_t i = 0; i < num_full_pages; i++) {
- if (pager->pages[i] == NULL) {
- continue;
- }
- pager_flush(pager, i, PAGE_SIZE);
- free(pager->pages[i]);
- pager->pages[i] = NULL;
- }
-
- // There may be a partial page to write to the end of the file
- // This should not be needed after we switch to a B-tree
- uint32_t num_additional_rows = table->num_rows % ROWS_PER_PAGE;
- if (num_additional_rows > 0) {
- uint32_t page_num = num_full_pages;
- if (pager->pages[page_num] != NULL) {
- pager_flush(pager, page_num, num_additional_rows * ROW_SIZE);
- free(pager->pages[page_num]);
- pager->pages[page_num] = NULL;
- }
- }
-
- int result = close(pager->file_descriptor);
- if (result == -1) {
- printf("Error closing db file.\n");
- exit(EXIT_FAILURE);
- }
- for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
- void *page = pager->pages[i];
- if (page) {
- free(page);
- pager->pages[i] = NULL;
- }
- }
- free(pager);
- }
-
-
- /* 元数据命令处理 */
- MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table)
- {
- if (strcmp(input_buffer->buffer, ".exit") == 0) {
- db_close(table);
- exit(EXIT_SUCCESS);
- } else {
- return META_COMMAND_UNRECOGNIZED_COMMAND;
- }
- }
-
- /* insert解析和校验*/
- PrepareResult prepare_insert(InputBuffer *input_buffer, Statement *statement) {
- statement->type = STATEMENT_INSERT;
-
- char *keyword = strtok(input_buffer->buffer, " ");
- char *id_string = strtok(NULL, " ");
- char *username = strtok(NULL, " ");
- char *email = strtok(NULL, " ");
-
- if (id_string == NULL || username == NULL || email == NULL) {
- return PREPARE_SYNTAX_ERROR;
- }
-
- int id = atoi(id_string);
- if (id < 0) {
- return PREPARE_NEGATIVE_ID;
- }
- if (strlen(username) > COLUMN_USERNAME_SIZE) {
- return PREPARE_STRING_TOO_LONG;
- }
- if (strlen(email) > COLUMN_EMAIL_SIZE) {
- return PREPARE_STRING_TOO_LONG;
- }
-
- statement->row_to_insert.id = id;
- strcpy(statement->row_to_insert.username, username);
- strcpy(statement->row_to_insert.email, email);
-
- return PREPARE_SUCCESS;
- }
-
- /* sql解析 */
- PrepareResult prepare_statement(InputBuffer* input_buffer,Statement* statement)
- {
- if (strncasecmp(input_buffer->buffer, "insert", 6) == 0) {
- return prepare_insert(input_buffer, statement);
- }
- if (strncasecmp(input_buffer->buffer, "select", 6) == 0) {
- statement->type = STATEMENT_SELECT;
- return PREPARE_SUCCESS;
- }
-
- return PREPARE_UNRECOGNIZED_STATEMENT;
- }
-
- /* 执行insert*/
- ExecuteResult execute_insert(Statement *statement, Table *table)
- {
- if (table->num_rows >= TABLE_MAX_ROWS) {
- return EXECUTE_TABLE_FULL;
- }
-
- Row *row_to_insert = &(statement->row_to_insert);
- Cursor* cursor = table_end(table);
-
- serialize_row(row_to_insert, cursor_value(cursor));
- table->num_rows += 1;
-
- free(cursor);
-
- return EXECUTE_SUCCESS;
- }
-
- /* 执行查询*/
- ExecuteResult execute_select(Statement *statement, Table *table)
- {
- Row row;
- Cursor* cursor = table_start(table);
-
- while (!(cursor->end_of_table)) {
- deserialize_row(cursor_value(cursor), &row);
- print_row(&row);
- cursor_advance(cursor);
- }
-
- free(cursor);
-
- return EXECUTE_SUCCESS;
- }
- /* sql执行*/
- ExecuteResult execute_statement(Statement* statement , Table* table)
- {
- switch (statement->type)
- {
- case (STATEMENT_INSERT):
- return execute_insert(statement, table);
- case (STATEMENT_SELECT):
- return execute_select(statement, table);
- }
- }
-
-
-
- /* 主函数*/
- int main(int argc, char* argv[])
- {
- if (argc < 2) {
- printf("Must supply a database filename.\n");
- exit(EXIT_FAILURE);
- }
-
- char *filename = argv[1];
- Table *table = db_open(filename);
-
- InputBuffer *input_buffer = new_input_buffer();
- while (true)
- {
- print_prompt();
- read_input(input_buffer);
-
- if (input_buffer->buffer[0] == '.')
- {
- switch (do_meta_command(input_buffer,table))
- {
- case (META_COMMAND_SUCCESS):
- continue;
- case (META_COMMAND_UNRECOGNIZED_COMMAND):
- printf("Unrecognized command '%s'\n", input_buffer->buffer);
- continue;
- }
- }
-
- Statement statement;
- switch (prepare_statement(input_buffer, &statement))
- {
- case (PREPARE_SUCCESS):
- break;
- case (PREPARE_NEGATIVE_ID):
- printf("ID must be positive.\n");
- continue;
- case (PREPARE_STRING_TOO_LONG):
- printf("String is too long.\n");
- continue;
- case (PREPARE_SYNTAX_ERROR):
- printf("Syntax error. Could not parse statement.\n");
- continue;
- case (PREPARE_UNRECOGNIZED_STATEMENT):
- printf("Unrecognized keyword at start of '%s'.\n",
- input_buffer->buffer);
- continue;
- }
-
- switch (execute_statement(&statement, table))
- {
- case (EXECUTE_SUCCESS):
- printf("Executed.\n");
- break;
- case (EXECUTE_TABLE_FULL):
- printf("Error: Table full.\n");
- break;
- }
- }
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。