2.1 MemoryContext结构

typedef struct MemoryContextData


       NodeTag             type;                    /* identifies exact kind of context */

       /* these two fields are placed here to minimize alignment wastage: */

       bool              isReset;                      /* T = no space alloced since last reset */

       bool              allowInCritSection;   /* allow palloc in critical section */

       Size       mem_allocated;                /* track memory allocated for this context */

       const MemoryContextMethods *methods;  /* virtual function table */

       MemoryContext parent;                 /* NULL if no parent (toplevel context) */

       MemoryContext firstchild;            /* head of linked list of children */

       MemoryContext prevchild;           /* previous child of same parent */

       MemoryContext nextchild;           /* next child of same parent */

       const char *name;                         /* context name (just for debugging) */

       const char *ident;                          /* context ID if any (just for debugging) */

       MemoryContextCallback *reset_cbs;   /* list of reset/delete callbacks */

} MemoryContextData;


/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */

typedef struct MemoryContextData *MemoryContext


  1. type是枚举类型变量,表示内存上下文的类型:
/* src/include/nodes/nodes.h */

typedef enum NodeTag


       T_Invalid = 0,


#include "nodes/nodetags.h"  // src/include/nodes/nodetags.h

} NodeTag;




5.methods记录了内存上下文使用的函数指针。methods的数据类型是MemoryContextMethods ,其结构如下:

/* src/include/nodes/memnodes.h */

typedef struct MemoryContextMethods


       void          *(*alloc) (MemoryContext context, Size size);

       /* call this free_p in case someone #define's free() */

       void              (*free_p) (void *pointer);

       void          *(*realloc) (void *pointer, Size size);

       void              (*reset) (MemoryContext context);

       void              (*delete_context) (MemoryContext context);

       Size       (*get_chunk_space) (void *pointer);

       bool              (*is_empty) (MemoryContext context);

       void              (*stats) (MemoryContext context,

                                            MemoryStatsPrintFunc printfunc, void *passthru,

                                            MemoryContextCounters *totals,

                                            bool print_to_stderr);

} MemoryContextMethods;




8.prevchild 指向前一个兄弟节点




12.reset_cbs回调函数列表。在下一次重置或删除上下文之前,将调用一次此类函数。它可以用来放弃在某种意义上与上下文中分配的对象相关联的资源。其数据类型为MemoryContextCallback ,结构如下所示:

/* src/include/utils/palloc.h


 * A memory context can have callback functions registered on it.  Any such

 * function will be called once just before the context is next reset or

 * deleted.  The MemoryContextCallback struct describing such a callback

 * typically would be allocated within the context itself, thereby avoiding

 * any need to manage it explicitly (the reset/delete action will free it).


typedef void (*MemoryContextCallbackFunction) (void *arg);


typedef struct MemoryContextCallback


       MemoryContextCallbackFunction func; /* function to call */

       void          *arg;                     /* argument to pass it */

       struct MemoryContextCallback *next; /* next in list of callbacks */

} MemoryContextCallback;


2.2 AllocSetContext

/* src/backend/utils/mmgr/aset.c */


 * AllocSetContext is our standard implementation of MemoryContext.


 * Note: header.isReset means there is nothing for AllocSetReset to do.

 * This is different from the aset being physically empty (empty blocks list)

 * because we will still have a keeper block.  It's also different from the set

 * being logically empty, because we don't attempt to detect pfree'ing the

 * last active chunk.


typedef struct AllocSetContext


       MemoryContextData header;  /* Standard memory-context fields */

       /* Info about storage allocated in this context: */

       AllocBlock   blocks;                /* head of list of blocks in this set */

       MemoryChunk *freelist[ALLOCSET_NUM_FREELISTS]; /* free chunk lists */

       /* Allocation parameters for this context: */

       Size       initBlockSize;     /* initial block size */

       Size       maxBlockSize;   /* maximum block size */

       Size       nextBlockSize;   /* next block size to allocate */

       Size       allocChunkLimit;      /* effective chunk size limit */

       AllocBlock   keeper;                /* keep this block over resets */

       /* freelist this context could be put in, or -1 if not a candidate: */

       int                 freeListIndex;     /* index in context_freelists[], or -1 */

} AllocSetContext;


typedef AllocSetContext *AllocSet;



3.freelist组织该内存上下文里所有内存块中已释放的内存片的链表结构。它是一个数组成员,其数组大小是ALLOCSET_NUM_FREELISTS(该值是11)。freelist数组中各成员分别表示不同大小的AllocChunkData。在AllocSetContext的实现中,对于小的内存块(8 ~ 8192Byte)来说,当释放的时候不会归还给OS,而是将其缓存到freelist中。






9.freeListIndex在context_freelists全局数组中的顺序,0表示默认freelist,1表示小内存的freelist,-1表示不需要进入freelist(比如超过allocChunkLimit的Block)。context_freelists的的数据类型是AllocSetFreeList ,结构如下所示:

/* src/backend/utils/mmgr/aset.c */

  // 为了提升效率,避免重复的删除和创建内存上下文,维护了一个freelist存放一些之前撤下来的内存上下文。当把一个内存上下文放入freelist之前,必须对这个内存上下文执行reset操作,这样就能保证内存上下文只有初始化时的内存片;同时要保证这个内存上下文要和list中的其他内存上下文在minContextSize和initBlockSize有相同的值。MaxBlockSize只是对我们初始化时与其他值做比较使用,与此操作并不相关。

typedef struct AllocSetFreeList


       int                 num_free;            /* current list length */

       AllocSetContext *first_free;   /* list header */

} AllocSetFreeList;

/* context_freelists[0] is for default params, [1] for small params */

static AllocSetFreeList context_freelists[2] =



              0, NULL



              0, NULL




2.3 AllocBlockData
PostgreSQL向OS申请内存分配的基本单位是Block(块),一个Block可能被拆分为若干个Chunk,也可能只包含一个Chunk(比如较大块内存)。在chunk释放时候 “某些chunk” 会放入freelist链表中,以便于下次分配,最后由Block统一释放归还给OS。


/* src/backend/utils/mmgr/aset.c */


 * AllocBlock

 *          An AllocBlock is the unit of memory that is obtained by aset.c

 *          from malloc().  It contains one or more MemoryChunks, which are

 *          the units requested by palloc() and freed by pfree(). MemoryChunks

 *          cannot be returned to malloc() individually, instead they are put

 *          on freelists by pfree() and re-used by the next palloc() that has

 *          a matching request size.


 *          AllocBlockData is the header data for a block --- the usable space

 *          within the block begins at the next alignment boundary.


typedef struct AllocBlockData


       AllocSet       aset;                   /* aset that owns this block */

       AllocBlock   prev;                    /* prev block in aset's blocks list, if any */

       AllocBlock   next;             /* next block in aset's blocks list, if any */

       char    *freeptr;            /* start of free space in this block */

       char    *endptr;                /* end of space in this block */

} AllocBlockData;

typedef struct AllocBlockData *AllocBlock;    /* forward reference */


  1. aset指向该AllocBlockData所属的AllocSetContext。

  2. prev指向AllocSetContext块列表中的上一块(如果有的话)。

  3. next指向AllocSetContext块列表中的下一块(如果有的话)。

  4. freeptr指向块中可用空闲区域的起始地址。

  5. endptr指向块中可用空闲区域的结束地址。

2.4 AllocChunkData

/* src/backend/utils/mmgr/aset.c */


 * AllocChunk

 *           The prefix of each piece of memory in an AllocBlock


 * Note: to meet the memory context APIs, the payload area of the chunk must

 * be maxaligned, and the "aset" link must be immediately adjacent to the

 * payload area (cf. GetMemoryChunkContext).  We simplify matters for this

 * module by requiring sizeof(AllocChunkData) to be maxaligned, and then

 * we can ensure things work by adding any required alignment padding before

 * the "aset" field.  There is a static assertion below that the alignment

 * is done correctly.


typedef struct AllocChunkData


       /* size is always the size of the usable space in the chunk */

       Size        size;  // 分配出去的大小


       /* when debugging memory usage, also store actual requested size */

       /* this is zero in a free chunk */

       Size        requested_size; // 用户实际需求大小





#endif                                               /* MEMORY_CONTEXT_CHECKING */


       /* ensure proper alignment by adding padding if needed */


       char        padding[MAXIMUM_ALIGNOF - ALLOCCHUNK_RAWSIZE % MAXIMUM_ALIGNOF]; //  为了保证对齐,padding数组用来做填充



       /* aset is the owning aset if allocated, or the freelist link if free */

       void    *aset;  // 该chunk如果被分配出去,aset指向一个AllocSetContext;如果没有被分配,aset指向一个freelist link。

       /* there must not be any padding to reach a MAXALIGN boundary here! */

} AllocChunkData;


3.1 初始化内存上下文

   /* src/backend/main/main.c */



        * Fire up essential subsystems: error and memory management

        * Code after this point is allowed to use elog/ereport, though

        * localization of messages may not work right away, and messages won't go

        * anywhere but stderr until GUC settings get loaded.



/* src/backend/utils/mmgr/mcxt.c */


 * MemoryContextInit

 *           Start up the memory-context subsystem.


 * This must be called before creating contexts or allocating memory in

 * contexts.  TopMemoryContext and ErrorContext are initialized here;

 * other contexts must be created afterwards.


 * In normal multi-backend operation, this is called once during

 * postmaster startup, and not at all by individual backend startup

 * (since the backends inherit an already-initialized context subsystem

 * by virtue of being forked off the postmaster).  But in an EXEC_BACKEND

 * build, each process must do this for itself.


 * In a standalone backend this must be called during backend startup.





       AssertState(TopMemoryContext == NULL);



        * First, initialize TopMemoryContext, which is the parent of all others.


       TopMemoryContext = AllocSetContextCreate((MemoryContext) NULL,





        * Not having any other place to point CurrentMemoryContext, make it point

        * to TopMemoryContext.  Caller should change this soon!


       CurrentMemoryContext = TopMemoryContext;



        * Initialize ErrorContext as an AllocSetContext with slow growth rate ---

        * we don't really expect much to be allocated in it. More to the point,

        * require it to contain at least 8K at all times. This is the only case

        * where retained memory in a context is *essential* --- we want to be

        * sure ErrorContext still has some memory even if we've run out

        * elsewhere! Also, allow allocations in ErrorContext within a critical

        * section. Otherwise a PANIC will cause an assertion failure in the error

        * reporting code, before printing out the real cause of the failure.


        * This should be the last step in this function, as elog.c assumes memory

        * management works once ErrorContext is non-null.


       ErrorContext = AllocSetContextCreate(TopMemoryContext,


                                                                       8 * 1024,

                                                                       8 * 1024,

                                                                       8 * 1024);

       MemoryContextAllowInCriticalSection(ErrorContext, true);



/* src/include/utils/memutils.h */


 * Recommended default alloc parameters, suitable for "ordinary" contexts

 * that might hold quite a lot of data.



#define ALLOCSET_DEFAULT_INITSIZE   (8 * 1024)

#define ALLOCSET_DEFAULT_MAXSIZE  (8 * 1024 * 1024)




/ * src/include/utils/memutils.h */


 * This wrapper macro exists to check for non-constant strings used as context

 * names; that's no longer supported.  (Use MemoryContextSetIdentifier if you

 * want to provide a variable identifier.)



#define AllocSetContextCreate(parent, name, ...) \

       (StaticAssertExpr(__builtin_constant_p(name), \

                                     "memory context names must be constant strings"), \

        AllocSetContextCreateInternal(parent, name, __VA_ARGS__))


#define AllocSetContextCreate \


/* src/backend/utils/mmgr/aset.c */


 * AllocSetContextCreateInternal

 *           Create a new AllocSet context.


 * parent: parent context, or NULL if top-level context

 * name: name of context (must be statically allocated)

 * minContextSize: minimum context size

 * initBlockSize: initial allocation block size

 * maxBlockSize: maximum allocation block size


 * Most callers should abstract the context size parameters using a macro



 * Note: don't call this directly; go through the wrapper macro

 * AllocSetContextCreate.



AllocSetContextCreateInternal(MemoryContext parent,

                                                   const char *name,

                                                   Size minContextSize,

                                                   Size initBlockSize,

                                                   Size maxBlockSize)


       int          freeListIndex;

       Size        firstBlockSize;

       AllocSet  set;

       AllocBlock      block;


       /* Assert we padded AllocChunkData properly */


                                    "sizeof(AllocChunkData) is not maxaligned");

       StaticAssertStmt(offsetof(AllocChunkData, aset) + sizeof(MemoryContext) ==


                                    "padding calculation in AllocChunkData is wrong");



        * First, validate allocation parameters.  Once these were regular runtime

        * test and elog's, but in practice Asserts seem sufficient because nobody

        * varies their parameters at runtime.  We somewhat arbitrarily enforce a

        * minimum 1K block size.


       Assert(initBlockSize == MAXALIGN(initBlockSize) &&

                 initBlockSize >= 1024);

       Assert(maxBlockSize == MAXALIGN(maxBlockSize) &&

                 maxBlockSize >= initBlockSize &&

                 AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */

       Assert(minContextSize == 0 ||

                 (minContextSize == MAXALIGN(minContextSize) &&

                     minContextSize >= 1024 &&

                     minContextSize <= maxBlockSize));



        * Check whether the parameters match either available freelist.  We do

        * not need to demand a match of maxBlockSize.


       if (minContextSize == ALLOCSET_DEFAULT_MINSIZE &&

              initBlockSize == ALLOCSET_DEFAULT_INITSIZE)

              freeListIndex = 0;

       else if (minContextSize == ALLOCSET_SMALL_MINSIZE &&

                      initBlockSize == ALLOCSET_SMALL_INITSIZE)

              freeListIndex = 1;


              freeListIndex = -1;



        * If a suitable freelist entry exists, just recycle that context.


       if (freeListIndex >= 0)


              AllocSetFreeList *freelist = &context_freelists[freeListIndex];


              if (freelist->first_free != NULL)


                     /* Remove entry from freelist */

                     set = freelist->first_free;

                     freelist->first_free = (AllocSet) set->header.nextchild;



                     /* Update its maxBlockSize; everything else should be OK */

                     set->maxBlockSize = maxBlockSize;


                     /* Reinitialize its header, installing correct name and parent */

                     MemoryContextCreate((MemoryContext) set,






                     ((MemoryContext) set)->mem_allocated =

                            set->keeper->endptr - ((char *) set);


                     return (MemoryContext) set;




       /* Determine size of initial block */

       firstBlockSize = MAXALIGN(sizeof(AllocSetContext)) + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;

       if (minContextSize != 0)

              firstBlockSize = Max(firstBlockSize, minContextSize);


              firstBlockSize = Max(firstBlockSize, initBlockSize);



        * Allocate the initial block.  Unlike other aset.c blocks, it starts with

        * the context header and its block header follows that.


       set = (AllocSet) malloc(firstBlockSize);

       if (set == NULL)


              if (TopMemoryContext)




                             errmsg("out of memory"),

                             errdetail("Failed while creating memory context \"%s\".",





        * Avoid writing code that can fail between here and MemoryContextCreate;

        * we'd leak the header/initial block if we ereport in this stretch.



       /* Fill in the initial block's block header */

       block = (AllocBlock) (((char *) set) + MAXALIGN(sizeof(AllocSetContext)));

       block->aset = set;

       block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;

       block->endptr = ((char *) set) + firstBlockSize;

       block->prev = NULL;

       block->next = NULL;


       /* Mark unallocated space NOACCESS; leave the block header alone. */

       VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, block->endptr - block->freeptr);


       /* Remember block as part of block list */

       set->blocks = block;

       /* Mark block as not to be released at reset time */

       set->keeper = block;


       /* Finish filling in aset-specific parts of the context header */

       MemSetAligned(set->freelist, 0, sizeof(set->freelist));


       set->initBlockSize = initBlockSize;

       set->maxBlockSize = maxBlockSize;

       set->nextBlockSize = initBlockSize;

       set->freeListIndex = freeListIndex;



        * Compute the allocation chunk size limit for this context.  It can't be

        * more than ALLOC_CHUNK_LIMIT because of the fixed number of freelists.

        * If maxBlockSize is small then requests exceeding the maxBlockSize, or

        * even a significant fraction of it, should be treated as large chunks

        * too.  For the typical case of maxBlockSize a power of 2, the chunk size

        * limit will be at most 1/8th maxBlockSize, so that given a stream of

        * requests that are all the maximum chunk size we will waste at most

        * 1/8th of the allocated space.


        * We have to have allocChunkLimit a power of two, because the requested

        * and actually-allocated sizes of any chunk must be on the same side of

        * the limit, else we get confused about whether the chunk is "big".


        * Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD.



                                    "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD");


       set->allocChunkLimit = ALLOC_CHUNK_LIMIT;


       while ((Size) (set->allocChunkLimit + ALLOC_CHUNKHDRSZ) >

                 (Size) ((maxBlockSize - ALLOC_BLOCKHDRSZ) / ALLOC_CHUNK_FRACTION))

              set->allocChunkLimit >>= 1;


       /* Finally, do the type-independent part of context creation */

       MemoryContextCreate((MemoryContext) set,






       ((MemoryContext) set)->mem_allocated = firstBlockSize;


       return (MemoryContext) set;


3.2.1 静态断言是否正确填充AllocChunkData

#define StaticAssertStmt(condition, errmessage) \

static_assert(condition, errmessage)

【注】关于static_assert,在编译时测试断言。 如果指定的常数表达式为 false,则编译器显示指定的消息,并且编译失败;否则,不会产生任何影响。 C11 中的新增功能。_Static_assert 是 C11 中引入的关键字。 static_assert 是 C11 中引入的宏,它映射到 _Static_assert 关键字:

/* assert.h */

#if defined __USE_ISOC11 && !defined __cplusplus

/* Static assertion.  Requires support in the compiler.  */

# undef static_assert

# define static_assert _Static_assert



可在编译时计算的整型常数表达式。 如果表达式为零 (false),则显示 string_literal 参数,并且编译因出错而失败。 如果表达式不为零 (true),则不会产生任何影响。


如果 constant-expression 计算结果为零 (false),则显示此消息。 此消息必须使用编译器的基本字符集来生成。 字符不能为多字节字符或宽字符。_Static_assert 关键字和 static_assert 宏均在编译时测试软件断言。 它们可用于全局或函数范围。相反,assert 宏、_assert 和 _wassert 函数在运行时测试软件断言,并产生运行时成本。

#define ALLOC_CHUNKHDRSZ sizeof(struct AllocChunkData)



       (((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) & ~((uintptr_t) ((ALIGNVAL) - 1)))

/* Define as the maximum alignment requirement of any C data type. */


TYPEALIGN能够保证对齐之后向上取整, 用&运算实现除法或者说逻辑右移运算。

StaticAssertStmt(offsetof(AllocChunkData, aset) + sizeof(MemoryContext) ==
 "padding calculation in AllocChunkData is wrong");


/* Offset of member MEMBER in a struct of type TYPE. */

#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)

#define offsetof(TYPE, MEMBER)   ((size_t) &((TYPE *)0)->MEMBER)


3.2.2 对参数进行校验


        * First, validate allocation parameters.  Once these were regular runtime

        * test and elog's, but in practice Asserts seem sufficient because nobody

        * varies their parameters at runtime.  We somewhat arbitrarily enforce a

        * minimum 1K block size.


       Assert(initBlockSize == MAXALIGN(initBlockSize) &&

                 initBlockSize >= 1024);

       Assert(maxBlockSize == MAXALIGN(maxBlockSize) &&

                 maxBlockSize >= initBlockSize &&

                 AllocHugeSizeIsValid(maxBlockSize)); /* must be safe to double */

       Assert(minContextSize == 0 ||

                 (minContextSize == MAXALIGN(minContextSize) &&

                     minContextSize >= 1024 &&

              minContextSize <= maxBlockSize));


#define MaxAllocHugeSize     (SIZE_MAX / 2)

#define AllocHugeSizeIsValid(size) ((Size) (size) <= MaxAllocHugeSize)

/* Limit of `size_t' type.  */

#if __WORDSIZE == 64

#define SIZE_MAX        (18446744073709551615UL)



#define SIZE_MAX            (4294967295UL)


#define SIZE_MAX            (4294967295U)



MaxAllocHugeSize的值为: (unsigned long long)(-1) >> 1

3.2.3 检查参数是否与freelist匹配


        * Check whether the parameters match either available freelist.  We do

        * not need to demand a match of maxBlockSize.


       if (minContextSize == ALLOCSET_DEFAULT_MINSIZE &&

              initBlockSize == ALLOCSET_DEFAULT_INITSIZE)

              freeListIndex = 0;

       else if (minContextSize == ALLOCSET_SMALL_MINSIZE &&

                      initBlockSize == ALLOCSET_SMALL_INITSIZE)

              freeListIndex = 1;


              freeListIndex = -1;



// 关于这几个宏的定义:


 * Recommended default alloc parameters, suitable for "ordinary" contexts

 * that might hold quite a lot of data.




#define ALLOCSET_DEFAULT_MAXSIZE   (8 * 1024 * 1024)






 * Recommended alloc parameters for "small" contexts that are never expected

 * to contain much data (for example, a context to contain a query plan).



#define ALLOCSET_SMALL_INITSIZE  (1 * 1024)

#define ALLOCSET_SMALL_MAXSIZE      (8 * 1024)



这里是对TomMemoryContext内存上下文进行初始化,且其实参是minContextSize = 0,initBlockSize = 8 * 1024。所以freeListIndex = 0。freeListIndex用于对全局结构体数组变量(context_freelists)中的成员进行释放操作,该结构体后面会详细说明。

3.2.4 如果有匹配的freelist,则释放其空间

由于这里freeListIndex = 0,且得到的freelist指针变量中其成员变量first_free的值为NULL,不会进入if (freelist->first_free != NULL)的函数体中。freelist的内容如何来的,在4.5节中有详细说明。


        * If a suitable freelist entry exists, just recycle that context.


     // 如果在已有的freelist中有合适的项,直接拿来用,不再进行后续malloc操作。

       if (freeListIndex >= 0)


              AllocSetFreeList *freelist = &context_freelists[freeListIndex];

              if (freelist->first_free != NULL)


                     /* Remove entry from freelist */

                     set = freelist->first_free;

                     freelist->first_free = (AllocSet) set->header.nextchild;



                     /* Update its maxBlockSize; everything else should be OK */

                     set->maxBlockSize = maxBlockSize;


                     /* Reinitialize its header, installing correct name and parent */

             // 注释中Reinitialize代表这个freelist中的内存上下文已经被初始化一次了,实际上freelist中的内存上下文就是之前用过的,在删除的时候没有归还给系统,直接放到了freelist中,所以这里是根据当前的参数再次初始化。

                     MemoryContextCreate((MemoryContext) set,






                     ((MemoryContext) set)->mem_allocated =

                            set->keeper->endptr - ((char *) set);

                     return (MemoryContext) set;



3.2.5 确定初始化块大小

   /* Determine size of initial block */

   firstBlockSize = MAXALIGN(sizeof(AllocSetContext)) +


   if (minContextSize != 0)

          firstBlockSize = Max(firstBlockSize, minContextSize);


          firstBlockSize = Max(firstBlockSize, initBlockSize);


#define ALLOC_BLOCKHDRSZ   MAXALIGN(sizeof(AllocBlockData))

#define ALLOC_CHUNKHDRSZ   sizeof(struct AllocChunkData)

实参minContextSize 的值为0,所以走else{}分支。其中initBlockSize的值是8KB,显然这里的MAXALIGN(sizeof(AllocSetContext)) + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ值是小于8KB的,因此firstBlockSize 最终的值是8KB。

3.2.6 分配AllocSetContextData块



        * Allocate the initial block.  Unlike other aset.c blocks, it starts with

        * the context header and its block header follows that.


       set = (AllocSet) malloc(firstBlockSize);

       if (set == NULL)


              if (TopMemoryContext)




                             errmsg("out of memory"),

                             errdetail("Failed while creating memory context \"%s\".",





 * MemoryContextStats

 *           Print statistics about the named context and all its descendants.


 * This is just a debugging utility, so it's not very fancy.  However, we do

 * make some effort to summarize when the output would otherwise be very long.

 * The statistics are sent to stderr.



MemoryContextStats(MemoryContext context)


       /* A hard-wired limit on the number of children is usually good enough */

       MemoryContextStatsDetail(context, 100, true);




 * MemoryContextStatsDetail


 * Entry point for use if you want to vary the number of child contexts shown.


 * If print_to_stderr is true, print statistics about the memory contexts

 * with fprintf(stderr), otherwise use ereport().



MemoryContextStatsDetail(MemoryContext context, int max_children,

                                           bool print_to_stderr)


       MemoryContextCounters grand_totals;


       memset(&grand_totals, 0, sizeof(grand_totals));


       MemoryContextStatsInternal(context, 0, true, max_children, &grand_totals, print_to_stderr);


       if (print_to_stderr)


                            "Grand total: %zu bytes in %zu blocks; %zu free (%zu chunks); %zu used\n",

                            grand_totals.totalspace, grand_totals.nblocks,

                            grand_totals.freespace, grand_totals.freechunks,

                            grand_totals.totalspace - grand_totals.freespace);




               * Use LOG_SERVER_ONLY to prevent the memory contexts from being sent

               * to the connected client.


               * We don't buffer the information about all memory contexts in a

               * backend into StringInfo and log it as one message. Otherwise which

               * may require the buffer to be enlarged very much and lead to OOM

               * error since there can be a large number of memory contexts in a

               * backend. Instead, we log one message per memory context.





                             errmsg_internal("Grand total: %zu bytes in %zu blocks; %zu free (%zu chunks); %zu used",

                                                         grand_totals.totalspace, grand_totals.nblocks,

                                                         grand_totals.freespace, grand_totals.freechunks,

                                                         grand_totals.totalspace - grand_totals.freespace)));




 * MemoryContextCounters

 *           Summarization state for MemoryContextStats collection.


 * The set of counters in this struct is biased towards AllocSet; if we ever

 * add any context types that are based on fundamentally different approaches,

 * we might need more or different counters here.  A possible API spec then

 * would be to print only nonzero counters, but for now we just summarize in

 * the format historically used by AllocSet.


typedef struct MemoryContextCounters


       Size        nblocks;          /* Total number of malloc blocks */

       Size        freechunks;            /* Total number of free chunks */

       Size        totalspace;              /* Total bytes requested from malloc */

       Size        freespace;       /* The unused portion of totalspace */

} MemoryContextCounters;

3.2.7 填写初始块的块头
申请了firstBlockSize(8 * 1024Byte)大小的内存,并把这段内存的首地址转换为AllocSetContext的指针,接下来在AllocSetContext后面分配一个AllocBlockData大小的区域,对此区域做初始化工作。


        * Avoid writing code that can fail between here and MemoryContextCreate;

        * we'd leak the header/initial block if we ereport in this stretch.



       /* Fill in the initial block's block header */

       block = (AllocBlock) (((char *) set) + MAXALIGN(sizeof(AllocSetContext)));

       block->aset = set;


    // freeptr指向block+MAXALIGN(sizeof(AllocBlockData))的位置处

       block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;


       block->endptr = ((char *) set) + firstBlockSize;

       block->prev = NULL;

       block->next = NULL;


       /* Mark unallocated space NOACCESS; leave the block header alone. */

VALGRIND_MAKE_MEM_NOACCESS(block->freeptr, block->endptr - block->freeptr);


/* Remember block as part of block list */

       set->blocks = block;

       /* Mark block as not to be released at reset time */

       set->keeper = block;

       /* Finish filling in aset-specific parts of the context header */

       MemSetAligned(set->freelist, 0, sizeof(set->freelist));


       set->initBlockSize = initBlockSize;

       set->maxBlockSize = maxBlockSize;

       set->nextBlockSize = initBlockSize;

       set->freeListIndex = freeListIndex;


3.2.8 计算内存上下文分配内存片大小限制

接下来计算指定的内存上下文中的分配内存片大小限制。由于freelist成员数组大小固定(为数组大小是11),它不能超过ALLOC_CHUNK_LIMIT(8192 Byte)。8*1024 Byte以下当作chunk处理,以上会申请专用block,专用block有block header和chunk header。如果 maxBlockSize 很小,那么超过 maxBlockSize 甚至很大一部分的请求也应该被视为大内存片。对于maxBlockSize的2次方的典型情况,内存片大小的限制最多为maxBlockSize的1/8,因此,给定一个全部为最大块内存片大小的请求流,我们最多会浪费1/8的分配空间。


        * Compute the allocation chunk size limit for this context.  It can't be

        * more than ALLOC_CHUNK_LIMIT because of the fixed number of freelists.

        * If maxBlockSize is small then requests exceeding the maxBlockSize, or

        * even a significant fraction of it, should be treated as large chunks

        * too.  For the typical case of maxBlockSize a power of 2, the chunk size

        * limit will be at most 1/8th maxBlockSize, so that given a stream of

        * requests that are all the maximum chunk size we will waste at most

        * 1/8th of the allocated space.


        * We have to have allocChunkLimit a power of two, because the requested

        * and actually-allocated sizes of any chunk must be on the same side of

        * the limit, else we get confused about whether the chunk is "big".


        * Also, allocChunkLimit must not exceed ALLOCSET_SEPARATE_THRESHOLD.



                                    "ALLOC_CHUNK_LIMIT != ALLOCSET_SEPARATE_THRESHOLD");


       set->allocChunkLimit = ALLOC_CHUNK_LIMIT;

       while ((Size) (set->allocChunkLimit + ALLOC_CHUNKHDRSZ) >

                 (Size) ((maxBlockSize - ALLOC_BLOCKHDRSZ) / ALLOC_CHUNK_FRACTION))

              set->allocChunkLimit >>= 1;



 * Chunk freelist k holds chunks of size 1 << (k + ALLOC_MINBITS),

 * for k = 0 .. ALLOCSET_NUM_FREELISTS-1.


 * Note that all chunks in the freelists have power-of-2 sizes.  This

 * improves recyclability: we may waste some space, but the wasted space

 * should stay pretty constant as requests are made and released.


 * A request too large for the last freelist is handled by allocating a

 * dedicated block from malloc().  The block still has a block header and

 * chunk header, but when the chunk is freed we'll return the whole block

 * to malloc(), not put it on our freelists.


 * CAUTION: ALLOC_MINBITS must be large enough so that

 * 1<<ALLOC_MINBITS is at least MAXALIGN,

 * or we may fail to align the smallest chunks adequately.

 * 8-byte alignment is enough on all currently known machines.


 * With the current parameters, request sizes up to 8K are treated as chunks,

 * larger requests go into dedicated blocks.  Change ALLOCSET_NUM_FREELISTS

 * to adjust the boundary point; and adjust ALLOCSET_SEPARATE_THRESHOLD in

 * memutils.h to agree.  (Note: in contexts with small maxBlockSize, we may

 * set the allocChunkLimit to less than 8K, so as to avoid space wastage.)




#define ALLOC_MINBITS            3     /* smallest chunk size is 8 bytes */



/* Size of largest chunk that we use a fixed size for */


/* We allow chunks to be at most 1/4 of maxBlockSize (less overhead) */

3.2.9 执行与类型无关的上下文创建部分



 * This is the virtual function table for AllocSet contexts.


static const MemoryContextMethods AllocSetMethods = {














MemoryContextCreate(MemoryContext node,

                                   NodeTag tag,

                                   const MemoryContextMethods *methods,

                                   MemoryContext parent,

                                   const char *name)


       /* Creating new memory contexts is not allowed in a critical section */

       Assert(CritSectionCount == 0);


       /* Initialize all standard fields of memory context header */

       node->type = tag;

       node->isReset = true;

       node->methods = methods;

       node->parent = parent;

       node->firstchild = NULL;

       node->mem_allocated = 0;

       node->prevchild = NULL;

       node->name = name;

       node->ident = NULL;

       node->reset_cbs = NULL;


       /* OK to link node into context tree */

       if (parent)


              node->nextchild = parent->firstchild;

              if (parent->firstchild != NULL)

                     parent->firstchild->prevchild = node;

              parent->firstchild = node;

              /* inherit allowInCritSection flag from parent */

              node->allowInCritSection = parent->allowInCritSection;




              node->nextchild = NULL;

              node->allowInCritSection = false;



       VALGRIND_CREATE_MEMPOOL(node, 0, false);




 ((MemoryContext) set)->mem_allocated = firstBlockSize;

return (MemoryContext) set;


ErrorContext = AllocSetContextCreate(TopMemoryContext,


                                   8 * 1024, // minContextSize

                                   8 * 1024, // initBlockSize

                                   8 * 1024); // maxBlockSize


4、AllocSet contexts虚函数表的具体实现
4.1 AllocSetAlloc–内存分配


/* source file: src/backend/utils/mmgr/aset.c */


 * AllocSetAlloc

 *           Returns pointer to allocated memory of given size or NULL if

 *           request could not be completed; memory is added to the set.


 * No request may exceed:


 * All callers use a much-lower limit.


 * Note: when using valgrind, it doesn't matter how the returned allocation

 * is marked, as mcxt.c will set it to UNDEFINED.  In some paths we will

 * return space that is marked NOACCESS - AllocSetRealloc has to beware!


void *

AllocSetAlloc(MemoryContext context, Size size)


       AllocSet  set = (AllocSet) context;

       AllocBlock      block;

       AllocChunk *chunk;

       int                 fidx;

       Size        chunk_size;

       Size        blksize;





        * If requested size exceeds maximum for chunks, allocate an entire block

        * for this request.


       if (size > set->allocChunkLimit)



              /* ensure there's always space for the sentinel byte */

              chunk_size = MAXALIGN(size + 1);


              chunk_size = MAXALIGN(size);



              blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;

              block = (AllocBlock) malloc(blksize);

              if (block == NULL)

                     return NULL;


              context->mem_allocated += blksize;


              block->aset = set;

              block->freeptr = block->endptr = ((char *) block) + blksize;


              chunk = (MemoryChunk *) (((char *) block) + ALLOC_BLOCKHDRSZ);


              /* mark the MemoryChunk as externally managed */

              MemoryChunkSetHdrMaskExternal(chunk, MCTX_ASET_ID);



              chunk->requested_size = size;

              /* set mark to catch clobber of "unused" space */

              Assert(size < chunk_size);

              set_sentinel(MemoryChunkGetPointer(chunk), size);



              /* fill the allocated space with junk */

              randomize_mem((char *) MemoryChunkGetPointer(chunk), size);




               * Stick the new block underneath the active allocation block, if any,

               * so that we don't lose the use of the space remaining therein.


              if (set->blocks != NULL)


                     block->prev = set->blocks;

                     block->next = set->blocks->next;

                     if (block->next)

                            block->next->prev = block;

                     set->blocks->next = block;




                     block->prev = NULL;

                     block->next = NULL;

                     set->blocks = block;



              /* Ensure any padding bytes are marked NOACCESS. */

              VALGRIND_MAKE_MEM_NOACCESS((char *) MemoryChunkGetPointer(chunk) + size,

                                                           chunk_size - size);


              /* Disallow external access to private part of chunk header. */



              return MemoryChunkGetPointer(chunk);




#define SIZE_MAX (~(size_t)0) // 当前系统所能表示的最大整数所需的二进制位都为1



       (((uintptr_t) (LEN)) & ~((uintptr_t) ((ALIGNVAL) - 1)))

/* Define as the maximum alignment requirement of any C data type. */


4.1.1 变量初始化和断言

 AllocSet  set = (AllocSet) context; //转换数据类型

   AllocBlock      block;

   AllocChunk *chunk;

   int                 fidx;

   Size        chunk_size;

   Size        blksize;


   Assert(AllocSetIsValid(set)); // 判断指针是否为空

4.1.2 待申请内存大于set->allocChunkLimit

        * If requested size exceeds maximum for chunks, allocate an entire block

        * for this request.


       if (size > set->allocChunkLimit)


        // 对用户申请的内存大小进行微调,保证申请内存大小时8B对齐,可能会比size大一点点。

              chunk_size = MAXALIGN(size);

// 整个block的大小=chunk_size + AllocBlockData + AllocChunkData, 这三个都是保证向上8字节对齐的。

              blksize = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;

        // 向操作系统申请内存,并转换为AllocBlock类型,即首地址开始的若干字节存储的是AllocBlockData类型数据

              block = (AllocBlock) malloc(blksize);

              if (block == NULL)

                     return NULL;

        // 修改context:将申请的内存大小记录到此MemoryContextData中的mem_allocated

              context->mem_allocated += blksize;

        // 将新分配的block的aset指向此内存上下文,可以对照前面的图或者下图理解。

              block->aset = set;

        // block空闲指针和结束指针都指向刚申请的内存块的结尾,因为一整个block分配的,不会在这个block上面划分chunk了

              block->freeptr = block->endptr = ((char *) block) + blksize;

        // 存放AllocChunkData类型数据的位置

              chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ);

        // 对AllocChunkData中的成员变量做初始化工作

              chunk->aset = set;

        // size存放程序微调后的大小

              chunk->size = chunk_size;


        // requested_size存放用户真正所需的内存大小,chunk->size >= chunk->requested_size

              chunk->requested_size = size;

              /* set mark to catch clobber of "unused" space */

              if (size < chunk_size)

                     set_sentinel(AllocChunkGetPointer(chunk), size);



              /* fill the allocated space with junk */

              randomize_mem((char *) AllocChunkGetPointer(chunk), size);



        // 双向链表组织block,根据原有链表信息先填充刚分配的block相关指针

        // 再把它挂到已有链表上

        // case 1: 该内存上下文没有block,那么set->blocks指向新分配的block

        // case 2: 该内存上下文原先有block,新分配的block挂到第一个block之后,第一个block是 active allocation block。


               * Stick the new block underneath the active allocation block, if any,

               * so that we don't lose the use of the space remaining therein.


              if (set->blocks != NULL)


                     block->prev = set->blocks;

                     block->next = set->blocks->next;

                     if (block->next)

                            block->next->prev = block;

                     set->blocks->next = block;




                     block->prev = NULL;

                     block->next = NULL;

                     set->blocks = block;



              /* Ensure any padding bytes are marked NOACCESS. */

              VALGRIND_MAKE_MEM_NOACCESS((char *) AllocChunkGetPointer(chunk) + size,

                                                           chunk_size - size);


              /* Disallow external access to private part of chunk header. */


        // 返回地址是chunk的起始地址+AllocChunkData的大小

              return AllocChunkGetPointer(chunk);


#define AllocChunkGetPointer(chk) \

                            ((AllocPointer)(((char *)(chk)) + ALLOC_CHUNKHDRSZ))


4.1.3 待申请内存小于等于set->allocChunkLimit


        * Request is small enough to be treated as a chunk.  Look in the

        * corresponding free list to see if there is a free chunk we could reuse.

        * If one is found, remove it from the free list, make it again a member

        * of the alloc set and return its data address.


       // 根据size,找freelist的下标

       fidx = AllocSetFreeIndex(size);

       // 得到指向某个chunk的指针

       chunk = set->freelist[fidx];

       // 如果chunk不为NULL, 返回这个chunk,返回之前需调整freelist的单链表结构

       if (chunk != NULL)


              Assert(chunk->size >= size);

              set->freelist[fidx] = (AllocChunk) chunk->aset; // 指向该chunk的下一个chunk, 在这里可以看到aset存储2种类型的地址。

              chunk->aset = (void *) set;

              return AllocChunkGetPointer(chunk);



根据给定的size查询该size对应的freelist数组的下标,下标为k(0,1,2…10),对应的下标的chunk size为 chunk_size = f(k) = 1 << (k + ALLOC_MINBITS),所以 2k*2ALLOC_MINBITS >= size, k >= log2(size >> ALLOC_MINBITS),k=ceil(log2(size>>ALLOC_MINBITS))。

case 1:假设 size = 2k*2ALLOC_MINBITS (size > 8),k = log2(size >> ALLOC_MINBITS),从位运算的角度来考虑,size >> ALLOC_MINBITS = 1 << k,所以以2为底的对数函数求值可以转换为位运算,在这种情况下 size >> ALLOC_MINBITS = 00…001[0…0],k的值就是这个1右边0的个数。

case 2:假设 size < 2k*2ALLOC_MINBITS (size > 8),k > log2(size >> ALLOC_MINBITS),那么结果值要+1。比如size=9=(1001)2, 右移3位值为00…01, 1右边0的个数位数为0,结果加1得1, 即2^(ALLOC_MINBITS+1)=16 > 9,所以size为9,会从freelist中分配下标为1也就是chunk_size为16的片给用户。

case 3: 特殊情况,上述分类如果使用函数实现大概率会考虑条件分支,但是用户可以调整参数来避免这种实现。比如将case1的这种情况通过算术运算让它归属到case 2的情况,在case 1 要让size=2k*2ALLOC_MINBITS变为size<2k*2ALLOC_MINBITS,只需让size - 1。从size的位级表示考虑,如果size-1在位级表示没有在最高位产生借位,这种情况肯定隶属case 2,size-1没有影响,因为-1操作没有影响最高位的1;如果size-1对最高位的1产生了借位,最高位由1变0,这时统计1右边0的个数将少了1个,case2通过+1弥补了。


static inline int

AllocSetFreeIndex(Size size)


       int                 idx;

    // 最小的chunk_size是8B

    #define ALLOC_MINIBITS 3

       if (size > (1 << ALLOC_MINBITS))


             idx = 31 - __builtin_clz((uint32) size - 1) - ALLOC_MINBITS + 1;



              idx = 0;

       return idx;

4.1.4 context的第一个活动块有可用空间,空间不够本次size


        * Choose the actual chunk size to allocate.


       chunk_size = (1 << ALLOC_MINBITS) << fidx;

       Assert(chunk_size >= size);



        * If there is enough room in the active allocation block, we will put the

        * chunk into that block.  Else must start a new one.


       if ((block = set->blocks) != NULL)


              // 计算该内存上下文活动block剩余可用空间

              Size        availspace = block->endptr - block->freeptr;

              // 剩余空间不足本次分配大小,将划分该block剩余空间挂到freelist上。

              // 因为空间不足意味着即将要分配一个新的block,而新的block将作为活动block,后续对该block的访问就通过freelist

              if (availspace < (chunk_size + ALLOC_CHUNKHDRSZ)) / / 划分起码能够画出一个最小的chunk来



                      * The existing active (top) block does not have enough room for

                      * the requested allocation, but it might still have a useful

                      * amount of space in it.  Once we push it down in the block list,

                      * we'll never try to allocate more space from it. So, before we

                      * do that, carve up its free space into chunks that we can put on

                      * the set's freelists.


                      * Because we can only get here when there's less than

                      * ALLOC_CHUNK_LIMIT left in the block, this loop cannot iterate

                      * more than ALLOCSET_NUM_FREELISTS-1 times.


              // 将这些空间挂到freelist上,从大到小分配,分的快

                     while (availspace >= ((1 << ALLOC_MINBITS) + ALLOC_CHUNKHDRSZ))


                            Size        availchunk = availspace - ALLOC_CHUNKHDRSZ;

                            int          a_fidx = AllocSetFreeIndex(availchunk);


                             * In most cases, we'll get back the index of the next larger

                             * freelist than the one we need to put this chunk on.  The

                             * exception is when availchunk is exactly a power of 2.


                                  // 调整availchunk的大小,使之为2的幂,满足freelist的条件,8 16 32 .... 8KB

                            if (availchunk != ((Size) 1 << (a_fidx + ALLOC_MINBITS)))



                                   Assert(a_fidx >= 0);

                                   availchunk = ((Size) 1 << (a_fidx + ALLOC_MINBITS));



                            chunk = (AllocChunk) (block->freeptr);


                            /* Prepare to initialize the chunk header. */

                            VALGRIND_MAKE_MEM_UNDEFINED(chunk, ALLOC_CHUNKHDRSZ);

                            // 挂到freelist上,相当于分配出去了,移动freeptr

                            block->freeptr += (availchunk + ALLOC_CHUNKHDRSZ);

                            // 更新可用空闲空间

                            availspace -= (availchunk + ALLOC_CHUNKHDRSZ);

                            chunk->size = availchunk;


                            chunk->requested_size = 0;     /* mark it free */ 


                            chunk->aset = (void *) set->freelist[a_fidx]; // aset变量存储两种类型的地址:1. 指向下个freelist的元素 2. 指向一个allocsetcontext

                            set->freelist[a_fidx] = chunk; // 将chunk放在freelist[idx]的第一位置



                     /* Mark that we need to create a new block */

                     block = NULL; // 为NULL, 马上分配一个新的block



假设用户需要的size是7KB,那么chunk_size就是8KB,现有的available space是7KB+ ALLOC_CHUNKHDRSZ,如下图所示,(此处没有根据ErrorContext详细的去算available space是多少,所以实际运行当中可能不会产生这样的数值),旨在了解freelist的一个分配过程。




        * Time to create a new regular (multi-chunk) block?


       if (block == NULL)


              Size        required_size;



               * The first such block has size initBlockSize, and we double the

               * space in each succeeding block, but not more than maxBlockSize.


              // 新分配的block大小为nextBlockSize

              blksize = set->nextBlockSize;

              // 下次再分配要 * 2, 所以新分配的block将会越来越大,可能出现当前OS没有足够的空间满足pg的要求

              set->nextBlockSize <<= 1;

              if (set->nextBlockSize > set->maxBlockSize)

                     set->nextBlockSize = set->maxBlockSize;



               * If initBlockSize is less than ALLOC_CHUNK_LIMIT, we could need more

               * space... but try to keep it a power of 2.


              required_size = chunk_size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;

              while (blksize < required_size)

                     blksize <<= 1;


              /* Try to allocate it */

              block = (AllocBlock) malloc(blksize);



               * We could be asking for pretty big blocks here, so cope if malloc

               * fails.  But give up if there's less than 1 MB or so available...


              while (block == NULL && blksize > 1024 * 1024)


                     blksize >>= 1;

                     if (blksize < required_size)


                     block = (AllocBlock) malloc(blksize);



              if (block == NULL)

                     return NULL;

              // 更新该内存上下文的内存分配字节数

              context->mem_allocated += blksize;

              // 初始化block

              block->aset = set;

              block->freeptr = ((char *) block) + ALLOC_BLOCKHDRSZ;

              block->endptr = ((char *) block) + blksize;


              /* Mark unallocated space NOACCESS. */


                                                           blksize - ALLOC_BLOCKHDRSZ);

             // 更新block双向链表, 很明显新分配的block已经是该内存上下文的活动block了。

              block->prev = NULL;

              block->next = set->blocks;

              if (block->next)

                     block->next->prev = block;

              set->blocks = block;





        * OK, do the allocation



       chunk = (AllocChunk) (block->freeptr);


       /* Prepare to initialize the chunk header. */


       // 移动block空闲指针, freeptr - (void *) chunk就是包含chunk header在内的chunk大小

       block->freeptr += (chunk_size + ALLOC_CHUNKHDRSZ);

       Assert(block->freeptr <= block->endptr);

       // 初始化chunk

       chunk->aset = (void *) set;

       chunk->size = chunk_size;


       chunk->requested_size = size;

       /* set mark to catch clobber of "unused" space */

       if (size < chunk->size)

              set_sentinel(AllocChunkGetPointer(chunk), size);



       /* fill the allocated space with junk */

       randomize_mem((char *) AllocChunkGetPointer(chunk), size);



       /* Ensure any padding bytes are marked NOACCESS. */

       VALGRIND_MAKE_MEM_NOACCESS((char *) AllocChunkGetPointer(chunk) + size,

                                                    chunk_size - size);


       /* Disallow external access to private part of chunk header. */


 // 返回的地址

return AllocChunkGetPointer(chunk);

4.2 AllocSetFree–释放内存


 * AllocSetFree

 *           Frees allocated memory; memory is removed from the set.


static void

AllocSetFree(MemoryContext context, void *pointer)


       // 转换地址类型

       AllocSet  set = (AllocSet) context;

      // 获取chunk header的地址

#define AllocPointerGetChunk(ptr) \

                                   ((AllocChunk)(((char *)(ptr)) - ALLOC_CHUNKHDRSZ))

       AllocChunk     chunk = AllocPointerGetChunk(pointer);


       /* Allow access to private part of chunk header. */




       /* Test for someone scribbling on unused space in chunk */

       if (chunk->requested_size < chunk->size)

              if (!sentinel_ok(pointer, chunk->requested_size))

                     elog(WARNING, "detected write past chunk end in %s %p",

                             set->, chunk);


    // case 1: 4.1.2对应这种情况,分配了一整个block给用户,这种block只有一个chunk,那么把这个block交还给操作系统

       if (chunk->size > set->allocChunkLimit)



               * Big chunks are certain to have been allocated as single-chunk

               * blocks.  Just unlink that block and return it to malloc().


              //  block即是整个block的初始地址

              AllocBlock      block = (AllocBlock) (((char *) chunk) - ALLOC_BLOCKHDRSZ);



               * Try to verify that we have a sane block pointer: it should

               * reference the correct aset, and freeptr and endptr should point

               * just past the chunk.


              // 校验,因为pointer是用户传递过来的,不一定是真的chunk地址

              if (block->aset != set ||

                     block->freeptr != block->endptr ||

                     block->freeptr != ((char *) block) +

                     (chunk->size + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ))

                     elog(ERROR, "could not find block containing chunk %p", chunk);

              // 调整block双向链表

              /* OK, remove block from aset's list and free it */

              if (block->prev)

                     block->prev->next = block->next;


                     set->blocks = block->next;

              if (block->next)

                     block->next->prev = block->prev;

             // 总该内存上下文总的内存大小

              context->mem_allocated -= block->endptr - ((char *) block);



              wipe_mem(block, block->freeptr - ((char *) block));


             // 释放该block



    // case 2:小于或者等于8KB的内存片:

    // 1: 对应于4.1.3,这个内存片来源于这个set的freelist,重新挂到freelist中。

    // 2: 对应于4.1.4,这个内存片来源于新建的block,因为原来的活动block不能满足其大小,那么这个内存片将是第一次被加入到freelist中,注意这种情况不会更改此block的freeptr,因此不会影响该block的切割和以后的相关统计



              /* Normal case, put the chunk into appropriate freelist */

              int                 fidx = AllocSetFreeIndex(chunk->size);


        // freelist是指向AllocChunkData类型的头结点,单链表的形式

        // chunk中的aset被分配出去时,aset指向的是具体的AllocSetContext

        // 存在于freelist中的chunk没有被分配出去时,aset指向的是下一个AllocChunkData

        // 当把chunk放入到freelist中时,aset就要指向下个节点,freelist[fidx]指向它,它成为第一个节点,(单链表的头插法),requested_size置0.

              chunk->aset = (void *) set->freelist[fidx];



              wipe_mem(pointer, chunk->size);




              /* Reset requested_size to 0 in chunks that are on freelist */

              chunk->requested_size = 0;


              set->freelist[fidx] = chunk;



4.3 AllocSetRealloc–重新分配内存


 * AllocSetRealloc

 *           Returns new pointer to allocated memory of given size or NULL if

 *           request could not be completed; this memory is added to the set.

 *           Memory associated with given pointer is copied into the new memory,

 *           and the old memory is freed.


 * Without MEMORY_CONTEXT_CHECKING, we don't know the old request size.  This

 * makes our Valgrind client requests less-precise, hazarding false negatives.

 * (In principle, we could use VALGRIND_GET_VBITS() to rediscover the old

 * request size.)


static void *

AllocSetRealloc(MemoryContext context, void *pointer, Size size)


       AllocSet  set = (AllocSet) context;

       AllocChunk     chunk = AllocPointerGetChunk(pointer);

       Size        oldsize;

       oldsize = chunk->size;

    // case 1: 对应于4.1.2,原先的内存存在于一个大的block中

       if (oldsize > set->allocChunkLimit)



               * The chunk must have been allocated as a single-chunk block.  Use

               * realloc() to make the containing block bigger, or smaller, with

               * minimum space wastage.


              AllocBlock      block = (AllocBlock) (((char *) chunk) - ALLOC_BLOCKHDRSZ);

              Size        chksize;

              Size        blksize;

              Size        oldblksize;



               * Try to verify that we have a sane block pointer: it should

               * reference the correct aset, and freeptr and endptr should point

               * just past the chunk.


         // 指针运算进行校验由用户传入的pointer是否合法

              if (block->aset != set ||

                     block->freeptr != block->endptr ||

                     block->freeptr != ((char *) block) +

                     (oldsize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ))

                     elog(ERROR, "could not find block containing chunk %p", chunk);



               * Even if the new request is less than set->allocChunkLimit, we stick

               * with the single-chunk block approach.  Therefore we need

               * chunk->size to be bigger than set->allocChunkLimit, so we don't get

               * confused about the chunk's status in future calls.


         // 这种大的block当时是因为用户传入的size大于set->allocChunkLimit, 但是这次重新分配的size <= allocChunkLimit

         // 仍然采用新分配一整个block的做法。因此实际使用的chksize会比用户传入的size要大。

              chksize = Max(size, set->allocChunkLimit + 1);

              chksize = MAXALIGN(chksize);


              /* Do the realloc */

              blksize = chksize + ALLOC_BLOCKHDRSZ + ALLOC_CHUNKHDRSZ;

              oldblksize = block->endptr - ((char *) block);

                 // block的地址,可能跟之前地址一样也可能不一样

              block = (AllocBlock) realloc(block, blksize);

              if (block == NULL)


                     return NULL;



         // 更新context中的内容

              /* updated separately, not to underflow when (oldblksize > blksize) */

              context->mem_allocated -= oldblksize;

              context->mem_allocated += blksize;

        // 更新block中的内容

              block->freeptr = block->endptr = ((char *) block) + blksize;

      // 划分chunk

              /* Update pointers since block has likely been moved */

              chunk = (AllocChunk) (((char *) block) + ALLOC_BLOCKHDRSZ);

              pointer = AllocChunkGetPointer(chunk);

        // realloc之后block地址可能会发生改变,所以根据原来的block信息修改双向链表前后节点的指向

              if (block->prev)

                     block->prev->next = block;


                     set->blocks = block;

              if (block->next)

                     block->next->prev = block;

              chunk->size = chksize;

              chunk->requested_size = size;

              return pointer;




        * Chunk sizes are aligned to power of 2 in AllocSetAlloc().  Maybe the

        * allocated area already is >= the new size.  (In particular, we will

        * fall out here if the requested size is a decrease.)


    // case 2: oldsize >= size,原先的内存够用了,把requested_size改一下就行。

       else if (oldsize >= size)


              chunk->requested_size = size; 

              return pointer;



    case 3:size > oldsize,直接用AllocSetAlloc分配一块新的内存,把原有内存内容拷贝到新内存上,然后释放原有内存。



               * Enlarge-a-small-chunk case.  We just do this by brute force, ie,

               * allocate a new chunk and copy the data.  Since we know the existing

               * data isn't huge, this won't involve any great memcpy expense, so

               * it's not worth being smarter.  (At one time we tried to avoid

               * memcpy when it was possible to enlarge the chunk in-place, but that

               * turns out to misbehave unpleasantly for repeated cycles of

               * palloc/repalloc/pfree: the eventually freed chunks go into the

               * wrong freelist for the next initial palloc request, and so we leak

               * memory indefinitely.  See pgsql-hackers archives for 2007-08-11.)


                 AllocPointer newPointer; // 相当于void *newPointer

              /* allocate new chunk */

              newPointer = AllocSetAlloc((MemoryContext) set, size);


              /* leave immediately if request was not completed */

              if (newPointer == NULL)


                     return NULL;



              /* transfer existing data (certain to fit) */

              memcpy(newPointer, pointer, oldsize);


              /* free old chunk */

              AllocSetFree((MemoryContext) set, pointer);


              return newPointer;



4.4 AllocSetReset–释放set中除keeper指向的block之外的所有内存


 * AllocSetReset

 *           Frees all memory which is allocated in the given set.


 * Actually, this routine has some discretion about what to do.

 * It should mark all allocated chunks freed, but it need not necessarily

 * give back all the resources the set owns.  Our actual implementation is

 * that we give back all but the "keeper" block (which we must keep, since

 * it shares a malloc chunk with the context header).  In this way, we don't

 * thrash malloc() when a context is repeatedly reset after small allocations,

 * which is typical behavior for per-tuple contexts.


static void

AllocSetReset(MemoryContext context)


       AllocSet  set = (AllocSet) context;

       AllocBlock      block;

       Size        keepersize = set->keeper->endptr - ((char *) set);




       /* Clear chunk freelists */

    // 下面要释放一些block,为了避免指针悬挂,所以在这里先把set中的freelist的指针全部赋值为0

       MemSetAligned(set->freelist, 0, sizeof(set->freelist));

   // 从第一个block即活动block开始遍历

       block = set->blocks;

       /* New blocks list will be just the keeper block */

  // set->blocks指向初始化内存上下文时的block

       set->blocks = set->keeper;


       while (block != NULL)


              AllocBlock      next = block->next;


              if (block == set->keeper)


                     /* Reset the block, but don't return it to malloc */

                     char    *datastart = ((char *) block) + ALLOC_BLOCKHDRSZ;

                     block->freeptr = datastart;

                     block->prev = NULL;

                     block->next = NULL;




                     /* Normal case, release the block */

                     context->mem_allocated -= block->endptr - ((char *) block);



              block = next;



       Assert(context->mem_allocated == keepersize);

       /* Reset block size allocation sequence, too */

       set->nextBlockSize = set->initBlockSize;


4.5 AllocSetDelete–释放set中所有的内存


 * AllocSetDelete

 *           Frees all memory which is allocated in the given set,

 *           in preparation for deletion of the set.


 * Unlike AllocSetReset, this *must* free all resources of the set.


static void

AllocSetDelete(MemoryContext context)


       AllocSet  set = (AllocSet) context;

       AllocBlock      block = set->blocks;

       Size        keepersize = set->keeper->endptr - ((char *) set);




        * If the context is a candidate for a freelist, put it into that freelist

        * instead of destroying it.


    // minContextSize == 0 && initBlockSize == 8*1024, freeListIndex = 0;

    // minContextSize == 0 && initBlockSize == 1*1024, freeListIndex = 1;

    // 其他情况freeList = -1;

    // 内存上下文初始化时,会根据这个两个值确定freeListIndex的值,分别为0、1、-1,如果为0或者1那么该内存上下文具备进入到freelist的条件。

       if (set->freeListIndex >= 0)


              AllocSetFreeList *freelist = &context_freelists[set->freeListIndex];



               * Reset the context, if it needs it, so that we aren't hanging on to

               * more than the initial malloc chunk.


        // 在这个内存上下文产生内存的分配,比如调用了alloc、realloc,或者向这个内存上下文注册了回调函数,isReset都为false。

              if (!context->isReset)



               * If the freelist is full, just discard what's already in it.  See

               * comments with context_freelists[].


              // MAX_FREE_CONTEXTS == 100

             //  如果freelist元素的数量大于等于100,把freelist中的元素全部归还给系统

              if (freelist->num_free >= MAX_FREE_CONTEXTS)


                     while (freelist->first_free != NULL)


                            AllocSetContext *oldset = freelist->first_free; 

                            freelist->first_free = (AllocSetContext *) oldset->header.nextchild;


                            /* All that remains is to free the header/initial block */



                     Assert(freelist->num_free == 0);


         // 我们修改了这个内存上下文的下个兄弟节点的指向,现在让它指向的是freelist中的下个节点。

              /* Now add the just-deleted context to the freelist. */

              set->header.nextchild = (MemoryContext) freelist->first_free;

        // freelist->first_free指向函数参数指定的内存上下文

              freelist->first_free = set;

        // 当前freelist中的内存上下文数+1





       /* Free all blocks, except the keeper which is part of context header */

    // 除了keeper指向的block,其他的block都free掉。

    // keeper指向的block是在初始化内存上下文时,这个block是同AllocSetContext一同分配的,但是malloc这块内存时返回的地址并不是block所在位置,而是AllocSetContext的首地址,函数free只能释放malloc、calloc、realloc的返回值,因此free掉这块内存只能用free(set)。

       while (block != NULL)


              AllocBlock      next = block->next;


              if (block != set->keeper)

                     context->mem_allocated -= block->endptr - ((char *) block);

              if (block != set->keeper)


              block = next;



       Assert(context->mem_allocated == keepersize);

       /* Finally, free the context header, including the keeper block */



 * MemoryContextResetOnly

 *           Release all space allocated within a context.

 *           Nothing is done to the context's descendant contexts.


 // MemoryContxtResetOnly

 // 只释放由参数指定的内存上下文中的已分配的空间,该函数不会作用其后代的内存上下文


MemoryContextResetOnly(MemoryContext context)




       /* Nothing to do if no pallocs since startup or last reset */

    // 如果在该内存上下文初始化之后,没有进行任何的分配(包括注册回调函数)等操作,那么就没有必要进行reset操作,不会进入到下面if的代码块。

       if (!context->isReset)





               * If context->ident points into the context's memory, it will become

               * a dangling pointer.  We could prevent that by setting it to NULL

               * here, but that would break valid coding patterns that keep the

               * ident elsewhere, e.g. in a parent context.  So for now we assume

               * the programmer got it right.



              context->isReset = true;



4.6 AllocSetGetChunkSpace


 * AllocSetGetChunkSpace

 *           Given a currently-allocated chunk, determine the total space

 *           it occupies (including all memory-allocation overhead).


static Size

AllocSetGetChunkSpace(MemoryContext context, void *pointer)


       AllocChunk     chunk = AllocPointerGetChunk(pointer);

       Size        result;


       result = chunk->size + ALLOC_CHUNKHDRSZ;

       return result;


4.7 AllocSetIsEmpty


 * AllocSetIsEmpty

 *           Is an allocset empty of any allocated space?


static bool

AllocSetIsEmpty(MemoryContext context)



        * For now, we say "empty" only if the context is new or just reset. We

        * could examine the freelists to determine if all space has been freed,

        * but it's not really worth the trouble for present uses of this

        * functionality.


       if (context->isReset)

              return true;

       return false;


4.8 AllocSetStats

typedef void (*MemoryStatsPrintFunc) (MemoryContext context, void *passthru,

                                                                 const char *stats_string,

                                                                 bool print_to_stderr);


 * AllocSetStats

 *           Compute stats about memory consumption of an allocset.


 * printfunc: if not NULL, pass a human-readable stats string to this.

 * passthru: pass this pointer through to printfunc.

 * totals: if not NULL, add stats about this context into *totals.

 * print_to_stderr: print stats to stderr if true, elog otherwise.


static void

AllocSetStats(MemoryContext context,

                       MemoryStatsPrintFunc printfunc, void *passthru,

                       MemoryContextCounters *totals, bool print_to_stderr)


       AllocSet  set = (AllocSet) context;

       Size        nblocks = 0;

       Size        freechunks = 0;

       Size        totalspace;

       Size        freespace = 0;

       AllocBlock      block;

       int                 fidx;


       /* Include context header in totalspace */

    // totalspace先初始化为AllocSsetContext类型大小,8字节对齐。

       totalspace = MAXALIGN(sizeof(AllocSetContext));

    // 遍历block链表,统计totalspace和freespace

       for (block = set->blocks; block != NULL; block = block->next)



              totalspace += block->endptr - ((char *) block);

              freespace += block->endptr - block->freeptr;


    // 再统计set中freelist的空间,freelist中11个元素,每个元素指向的是一个单链表的第一个节点。

       for (fidx = 0; fidx < ALLOCSET_NUM_FREELISTS; fidx++)


              AllocChunk     chunk;


              for (chunk = set->freelist[fidx]; chunk != NULL;

                      chunk = (AllocChunk) chunk->aset)



                     freespace += chunk->size + ALLOC_CHUNKHDRSZ;



    // 如果定义了这个函数并且printfunc不为NULL,格式化参数,把参数传入这个函数

       if (printfunc)


              char        stats_string[200];


              snprintf(stats_string, sizeof(stats_string),

                             "%zu total in %zd blocks; %zu free (%zd chunks); %zu used",

                             totalspace, nblocks, freespace, freechunks,

                             totalspace - freespace);

              printfunc(context, passthru, stats_string, print_to_stderr);


    // totals一个包含下面4个成员变量的全局数组

       if (totals)


              totals->nblocks += nblocks;

              totals->freechunks += freechunks;

              totals->totalspace += totalspace;

              totals->freespace += freespace;




SELECT * FROM pg_backend_memory_contexts WHERE name = 'ErrorContext'


1 创建一个内存上下文

CronLoopContext = AllocSetContextCreate(CurrentMemoryContext,

                                                                               "pg_cron loop context",






static inline MemoryContext

MemoryContextSwitchTo(MemoryContext context)


       MemoryContext old = CurrentMemoryContext;


       CurrentMemoryContext = context;

       return old;



void *

palloc(Size size)


       /* duplicates MemoryContextAlloc to avoid increased overhead */

       void    *ret;

    // Current已经指向了新建的内存上下文

       MemoryContext context = CurrentMemoryContext;

    // 校验部分



       if (!AllocSizeIsValid(size))

              elog(ERROR, "invalid memory alloc request size %zu", size);

    // 马上进行分配工作,所以isReset设置为false

       context->isReset = false;

    // 详细见4.1节

       ret = context->methods->alloc(context, size);

    // 失败进行统计

       if (ret == NULL)





                             errmsg("out of memory"),

                             errdetail("Failed on request of size %zu in memory context \"%s\".",

                                             size, context->name)));



       return ret;

  1. 释放内存:

 * pfree

 *           Release an allocated chunk.



pfree(void *pointer)


       MemoryContext context = GetMemoryChunkContext(pointer);

       context->methods->free_p(context, pointer);


pointer是chunk的首地址,减去sizeof(void )是AllocChunkData中aset的变量的首地址,在这个地址存储的内容是AllocSetContext的地址,在这里进行强转,(MemoryContext *) (((char *) pointer) - sizeof(void *))就是这个意思。

static inline MemoryContext

GetMemoryChunkContext(void *pointer)


       MemoryContext context;



        * Try to detect bogus pointers handed to us, poorly though we can.

        * Presumably, a pointer that isn't MAXALIGNED isn't pointing at an

        * allocated chunk.


       Assert(pointer != NULL);

       Assert(pointer == (void *) MAXALIGN(pointer));



        * OK, it's probably safe to look at the context.


       context = *(MemoryContext *) (((char *) pointer) - sizeof(void *));




       return context;



MemoryContextReset(CronLoopContext); / MemoryContextDelete(CronLoopContext);





