赞
踩
PostgreSQL缓冲区管理器由三层组成,即缓冲表层、缓冲区描述符层和缓冲池层。缓冲表层是一个散列表,它存储着页面的buffer_tag与描述符的buffer_id之间的映射关系;缓冲区描述符层是一个由缓冲区描述符组成的数组(每个描述符与缓冲池槽一一对应,并保存着相应槽的元数据);缓冲池层是一个数组(每个槽都存储一个数据文件页,数组槽的索引为buffer_id)。
缓冲池只是一个用于存储关系数据文件(例如表或索引)页面的简单数组。缓冲池数组的序号索引就是buffer_id。缓冲池槽的大小为8KB,等于页面大小,因而每个槽都能存储整个页面Page(src/include/storage/bufpage.h)。如上图(熊灿灿大神出品)中Shared Buffers中的Buffer Pool框图,其包含的页面数量定义在NBuffers GUC参数中。
Buffer Pool初始化代码定义在src/backend/storage/buffer/buf_init.c文件中,BufferShmemSize函数用于计算buffer pool共享内存的大小(包含data pages、buffer descriptprs、hash tables等)。最好在BufferDesc中包含I/O锁,但这会将BufferDesc的大小增加到多个缓存行,并且基准测试表明,保持每个BufferDesc在缓存行边界上对齐对性能很重要。因此,相反,I/O锁的阵列被分配在一个单独的部分中。因为这些锁不是高度竞争的,所以我们用最小的填充来布置阵列。
Size BufferShmemSize(void) {
Size size = 0;
size = add_size(size, mul_size(NBuffers, sizeof(BufferDescPadded))); /* size of buffer descriptors */ <--- 缓冲区描述符层
size = add_size(size, PG_CACHE_LINE_SIZE); /* to allow aligning buffer descriptors */
size = add_size(size, mul_size(NBuffers, sizeof(LWLockMinimallyPadded))); /* It would be nice to include the I/O locks in the BufferDesc, but that would increase the size of a BufferDesc to more than one cache line, and benchmarking has shown that keeping every BufferDesc aligned on a cache line boundary is important for performance. So, instead, the array of I/O locks is allocated in a separate tranche. Because those locks are not highly contended, we lay out the array with minimal padding. */
size = add_size(size, PG_CACHE_LINE_SIZE); /* to allow aligning the above */
size = add_size(size, mul_size(NBuffers, BLCKSZ)); /* size of data pages */ <--- 缓冲池层
size = add_size(size, StrategyShmemSize()); /* size of stuff controlled by freelist.c */ <-- 缓冲表层+freelist
size = add_size(size, mul_size(NBuffers, sizeof(CkptSortItem))); /* size of checkpoint sort array in bufmgr.c */
return size;
}
从InitBufferPool中可以看出Buffer Descriptors数组指针为BufferDescriptors、Buffer Blocks数组指针为BufferBlocks。共享内存申请完毕后,对BufferDescriptors进行初始化,其中需要将buf_id设置为元素在数组中的序号,将freeNext设置为下一个元组在数组中的序号(最后一个设置为FREENEXT_END_OF_LIST),初始化buffer_content和buffer_ioLWLock(buffer_content位于BufferDesc中,buffer_io位于BufferIOLWLockArray中);初始化缓冲表层(大小为NBuffers + NUM_BUFFER_PARTITIONS),为StrategyControl申请共享内存(设置firstFreeBuffer为0,lastFreeBuffer为NBuffers - 1,设置bgwprocno为-1[No pending notification])。
void InitBufferPool(void) {
bool foundBufs,foundDescs,foundIOLocks,foundBufCkpt;
BufferDescriptors = (BufferDescPadded *)ShmemInitStruct("Buffer Descriptors",NBuffers * sizeof(BufferDescPadded),&foundDescs); /* Align descriptors to a cacheline boundary. */
BufferIOLWLockArray = (LWLockMinimallyPadded *)ShmemInitStruct("Buffer IO Locks",NBuffers * (Size) sizeof(LWLockMinimallyPadded),&foundIOLocks); /* Align lwlocks to cacheline boundary */
BufferBlocks = (char *)ShmemInitStruct("Buffer Blocks",NBuffers * (Size) BLCKSZ, &foundBufs);
LWLockRegisterTranche(LWTRANCHE_BUFFER_IO_IN_PROGRESS, "buffer_io");
LWLockRegisterTranche(LWTRANCHE_BUFFER_CONTENT, "buffer_content");
CkptBufferIds = (CkptSortItem *)ShmemInitStruct("Checkpoint BufferIds",NBuffers * sizeof(CkptSortItem), &foundBufCkpt); /* The array used to sort to-be-checkpointed buffer ids is located in shared memory, to avoid having to allocate significant amounts of memory at runtime. As that'd be in the middle of a checkpoint, or when the checkpointer is restarted, memory allocation failures would be painful. */
if (foundDescs || foundBufs || foundIOLocks || foundBufCkpt){/* should find all of these, or none of them */
Assert(foundDescs && foundBufs && foundIOLocks && foundBufCkpt);/* note: this path is only taken in EXEC_BACKEND case */
}else{
for (int i = 0; i < NBuffers; i++) { /* Initialize all the buffer headers. */
BufferDesc *buf = GetBufferDescriptor(i);
CLEAR_BUFFERTAG(buf->tag);
pg_atomic_init_u32(&buf->state, 0);
buf->wait_backend_pid = 0;
buf->buf_id = i;
/* Initially link all the buffers together as unused. Subsequent management of this list is done by freelist.c. */
buf->freeNext = i + 1;
LWLockInitialize(BufferDescriptorGetContentLock(buf), LWTRANCHE_BUFFER_CONTENT);
LWLockInitialize(BufferDescriptorGetIOLock(buf), LWTRANCHE_BUFFER_IO_IN_PROGRESS);
}
GetBufferDescriptor(NBuffers - 1)->freeNext = FREENEXT_END_OF_LIST; /* Correct last entry of linked list */
}
StrategyInitialize(!foundDescs); /* Init other shared buffer-management stuff */
WritebackContextInit(&BackendWritebackContext, &backend_flush_after); /* Initialize per-backend file flush context */
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。