- 论坛徽章:
- 0
|
现在只了解fsm是一个二叉树,pages也是一个树
MaxFSMRequestSize是什么作用?- #include "postgres.h"
- #include "access/htup.h"
- #include "access/xlogutils.h"
- #include "miscadmin.h"
- #include "storage/bufmgr.h"
- #include "storage/bufpage.h"
- #include "storage/freespace.h"
- #include "storage/fsm_internals.h"
- #include "storage/lmgr.h"
- #include "storage/lwlock.h"
- #include "storage/smgr.h"
- #include "utils/rel.h"
- /*
- * We use just one byte to store the amount of free space on a page, so we
- * divide the amount of free space a page can have into 256 different
- * categories. The highest category, 255, represents a page with at least
- * MaxFSMRequestSize bytes of free space, and the second highest category
- * represents the range from 254 * FSM_CAT_STEP, inclusive, to
- * MaxFSMRequestSize, exclusive.
- *
- * MaxFSMRequestSize depends on the architecture and BLCKSZ, but assuming
- * default 8k BLCKSZ, and that MaxFSMRequestSize is 24 bytes, the categories
- * look like this
- *
- *
- * Range Category
- * 0 - 31 0
- * 32 - 63 1
- * ... ... ...
- * 8096 - 8127 253
- * 8128 - 8163 254
- * 8164 - 8192 255
- *
- * The reason that MaxFSMRequestSize is special is that if MaxFSMRequestSize
- * isn't equal to a range boundary, a page with exactly MaxFSMRequestSize
- * bytes of free space wouldn't satisfy a request for MaxFSMRequestSize
- * bytes. If there isn't more than MaxFSMRequestSize bytes of free space on a
- * completely empty page, that would mean that we could never satisfy a
- * request of exactly MaxFSMRequestSize bytes.
- */
- #define FSM_CATEGORIES 256
- #define FSM_CAT_STEP (BLCKSZ / FSM_CATEGORIES)
- #define MaxFSMRequestSize MaxHeapTupleSize
- /*
- * Depth of the on-disk tree. We need to be able to address 2^32-1 blocks,
- * and 1626 is the smallest number that satisfies X^3 >= 2^32-1. Likewise,
- * 216 is the smallest number that satisfies X^4 >= 2^32-1. In practice,
- * this means that 4096 bytes is the smallest BLCKSZ that we can get away
- * with a 3-level tree, and 512 is the smallest we support.
- */
- #define FSM_TREE_DEPTH ((SlotsPerFSMPage >= 1626) ? 3 : 4)
- #define FSM_ROOT_LEVEL (FSM_TREE_DEPTH - 1)
- #define FSM_BOTTOM_LEVEL 0
- /*
- * The internal FSM routines work on a logical addressing scheme. Each
- * level of the tree can be thought of as a separately addressable file.
- */
- typedef struct
- {
- int level; /* level */
- int logpageno; /* page number within the level */
- } FSMAddress;
- /* Address of the root page. */
- static const FSMAddress FSM_ROOT_ADDRESS = {FSM_ROOT_LEVEL, 0};
- /* functions to navigate the tree */
- static FSMAddress fsm_get_child(FSMAddress parent, uint16 slot);
- static FSMAddress fsm_get_parent(FSMAddress child, uint16 *slot);
- static FSMAddress fsm_get_location(BlockNumber heapblk, uint16 *slot);
- static BlockNumber fsm_get_heap_blk(FSMAddress addr, uint16 slot);
- static BlockNumber fsm_logical_to_physical(FSMAddress addr);
- static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend);
- static void fsm_extend(Relation rel, BlockNumber fsm_nblocks);
- /* functions to convert amount of free space to a FSM category */
- static uint8 fsm_space_avail_to_cat(Size avail);
- static uint8 fsm_space_needed_to_cat(Size needed);
- static Size fsm_space_cat_to_avail(uint8 cat);
- /* workhorse functions for various operations */
- static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
- uint8 newValue, uint8 minValue);
- static BlockNumber fsm_search(Relation rel, uint8 min_cat);
- static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof);
- /******** Public API ********/
- /*
- * GetPageWithFreeSpace - try to find a page in the given relation with
- * at least the specified amount of free space.
- *
- * If successful, return the block number; if not, return InvalidBlockNumber.
- *
- * The caller must be prepared for the possibility that the returned page
- * will turn out to have too little space available by the time the caller
- * gets a lock on it. In that case, the caller should report the actual
- * amount of free space available on that page and then try again (see
- * RecordAndGetPageWithFreeSpace). If InvalidBlockNumber is returned,
- * extend the relation.
- */
- BlockNumber
- GetPageWithFreeSpace(Relation rel, Size spaceNeeded)
- {
- uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded);
- return fsm_search(rel, min_cat);
- }
- /*
- * RecordAndGetPageWithFreeSpace - update info about a page and try again.
- *
- * We provide this combo form to save some locking overhead, compared to
- * separate RecordPageWithFreeSpace + GetPageWithFreeSpace calls. There's
- * also some effort to return a page close to the old page; if there's a
- * page with enough free space on the same FSM page where the old one page
- * is located, it is preferred.
- */
- BlockNumber
- RecordAndGetPageWithFreeSpace(Relation rel, BlockNumber oldPage,
- Size oldSpaceAvail, Size spaceNeeded)
- {
- int old_cat = fsm_space_avail_to_cat(oldSpaceAvail);
- int search_cat = fsm_space_needed_to_cat(spaceNeeded);
- FSMAddress addr;
- uint16 slot;
- int search_slot;
- /* Get the location of the FSM byte representing the heap block */
- addr = fsm_get_location(oldPage, &slot);
- search_slot = fsm_set_and_search(rel, addr, slot, old_cat, search_cat);
- /*
- * If fsm_set_and_search found a suitable new block, return that.
- * Otherwise, search as usual.
- */
- if (search_slot != -1)
- return fsm_get_heap_blk(addr, search_slot);
- else
- return fsm_search(rel, search_cat);
- }
- /*
- * RecordPageWithFreeSpace - update info about a page.
- *
- * Note that if the new spaceAvail value is higher than the old value stored
- * in the FSM, the space might not become visible to searchers until the next
- * FreeSpaceMapVacuum call, which updates the upper level pages.
- */
- void
- RecordPageWithFreeSpace(Relation rel, BlockNumber heapBlk, Size spaceAvail)
- {
- int new_cat = fsm_space_avail_to_cat(spaceAvail);
- FSMAddress addr;
- uint16 slot;
- /* Get the location of the FSM byte representing the heap block */
- addr = fsm_get_location(heapBlk, &slot);
- fsm_set_and_search(rel, addr, slot, new_cat, 0);
- }
- /*
- * XLogRecordPageWithFreeSpace - like RecordPageWithFreeSpace, for use in
- * WAL replay
- */
- /*
- * GetRecordedFreePage - return the amount of free space on a particular page,
- * according to the FSM.
- */
- Size
- GetRecordedFreeSpace(Relation rel, BlockNumber heapBlk)
- {
- FSMAddress addr;
- uint16 slot;
- Buffer buf;
- uint8 cat;
- /* Get the location of the FSM byte representing the heap block */
- addr = fsm_get_location(heapBlk, &slot);
- buf = fsm_readbuf(rel, addr, false);
- if (!BufferIsValid(buf))
- return 0;
- cat = fsm_get_avail(BufferGetPage(buf), slot);
- ReleaseBuffer(buf);
- return fsm_space_cat_to_avail(cat);
- }
- /*
- * FreeSpaceMapVacuum - scan and fix any inconsistencies in the FSM
- */
- void
- FreeSpaceMapVacuum(Relation rel)
- {
- bool dummy;
- /*
- * Traverse the tree in depth-first order. The tree is stored physically
- * in depth-first order, so this should be pretty I/O efficient.
- */
- fsm_vacuum_page(rel, FSM_ROOT_ADDRESS, &dummy);
- }
- /******** Internal routines ********/
- /*
- * Return category corresponding x bytes of free space
- */
- static uint8
- fsm_space_avail_to_cat(Size avail)
- {
- int cat;
- Assert(avail < BLCKSZ);
- if (avail >= MaxFSMRequestSize)
- return 255;
- cat = avail / FSM_CAT_STEP;
- /*
- * The highest category, 255, is reserved for MaxFSMRequestSize bytes or
- * more.
- */
- if (cat > 254)
- cat = 254;
- return (uint8) cat;
- }
- /*
- * Return the lower bound of the range of free space represented by given
- * category.
- */
- static Size
- fsm_space_cat_to_avail(uint8 cat)
- {
- /* The highest category represents exactly MaxFSMRequestSize bytes. */
- if (cat == 255)
- return MaxFSMRequestSize;
- else
- return cat * FSM_CAT_STEP;
- }
- /*
- * Which category does a page need to have, to accommodate x bytes of data?
- * While fsm_size_to_avail_cat() rounds down, this needs to round up.
- */
- static uint8
- fsm_space_needed_to_cat(Size needed)
- {
- int cat;
- /* Can't ask for more space than the highest category represents */
- if (needed > MaxFSMRequestSize)
- elog(ERROR, "invalid FSM request size %lu",
- (unsigned long) needed);
- if (needed == 0)
- return 1;
- cat = (needed + FSM_CAT_STEP - 1) / FSM_CAT_STEP;
- if (cat > 255)
- cat = 255;
- return (uint8) cat;
- }
- /*
- * Returns the physical block number an FSM page
- */
- static BlockNumber
- fsm_logical_to_physical(FSMAddress addr)
- {
- BlockNumber pages;
- int leafno;
- int l;
- /*
- * Calculate the logical page number of the first leaf page below the
- * given page.
- */
- leafno = addr.logpageno;
- for (l = 0; l < addr.level; l++)
- leafno *= SlotsPerFSMPage;
- /* Count upper level nodes required to address the leaf page */
- pages = 0;
- for (l = 0; l < FSM_TREE_DEPTH; l++)
- {
- pages += leafno + 1;
- leafno /= SlotsPerFSMPage;
- }
- /*
- * If the page we were asked for wasn't at the bottom level, subtract the
- * additional lower level pages we counted above.
- */
- pages -= addr.level;
- /* Turn the page count into 0-based block number */
- return pages - 1;
- }
- /*
- * Return the FSM location corresponding to given heap block.
- */
- static FSMAddress
- fsm_get_location(BlockNumber heapblk, uint16 *slot)
- {
- FSMAddress addr;
- addr.level = FSM_BOTTOM_LEVEL;
- addr.logpageno = heapblk / SlotsPerFSMPage;
- *slot = heapblk % SlotsPerFSMPage;
- return addr;
- }
- /*
- * Return the heap block number corresponding to given location in the FSM.
- */
- static BlockNumber
- fsm_get_heap_blk(FSMAddress addr, uint16 slot)
- {
- Assert(addr.level == FSM_BOTTOM_LEVEL);
- return ((unsigned int) addr.logpageno) * SlotsPerFSMPage + slot;
- }
- /*
- * Given a logical address of a child page, get the logical page number of
- * the parent, and the slot within the parent corresponding to the child.
- */
- static FSMAddress
- fsm_get_parent(FSMAddress child, uint16 *slot)
- {
- FSMAddress parent;
- Assert(child.level < FSM_ROOT_LEVEL);
- parent.level = child.level + 1;
- parent.logpageno = child.logpageno / SlotsPerFSMPage;
- *slot = child.logpageno % SlotsPerFSMPage;
- return parent;
- }
- /*
- * Given a logical address of a parent page, and a slot number get the
- * logical address of the corresponding child page.
- */
- static FSMAddress
- fsm_get_child(FSMAddress parent, uint16 slot)
- {
- FSMAddress child;
- Assert(parent.level > FSM_BOTTOM_LEVEL);
- child.level = parent.level - 1;
- child.logpageno = parent.logpageno * SlotsPerFSMPage + slot;
- return child;
- }
- /*
- * Read a FSM page.
- *
- * If the page doesn't exist, InvalidBuffer is returned, or if 'extend' is
- * true, the FSM file is extended.
- */
- static Buffer
- fsm_readbuf(Relation rel, FSMAddress addr, bool extend)
- {
- BlockNumber blkno = fsm_logical_to_physical(addr);
- Buffer buf;
- RelationOpenSmgr(rel);
- /*
- * If we haven't cached the size of the FSM yet, check it first. Also
- * recheck if the requested block seems to be past end, since our cached
- * value might be stale. (We send smgr inval messages on truncation, but
- * not on extension.)
- */
- if (rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber ||
- blkno >= rel->rd_smgr->smgr_fsm_nblocks)
- {
- if (smgrexists(rel->rd_smgr, FSM_FORKNUM))
- rel->rd_smgr->smgr_fsm_nblocks = smgrnblocks(rel->rd_smgr,
- FSM_FORKNUM);
- else
- rel->rd_smgr->smgr_fsm_nblocks = 0;
- }
- /* Handle requests beyond EOF */
- if (blkno >= rel->rd_smgr->smgr_fsm_nblocks)
- {
- if (extend)
- fsm_extend(rel, blkno + 1);
- else
- return InvalidBuffer;
- }
- /*
- * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM
- * information is not accurate anyway, so it's better to clear corrupt
- * pages than error out. Since the FSM changes are not WAL-logged, the
- * so-called torn page problem on crash can lead to pages with corrupt
- * headers, for example.
- */
- buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL);
- if (PageIsNew(BufferGetPage(buf)))
- PageInit(BufferGetPage(buf), BLCKSZ, 0);
- return buf;
- }
- /*
- * Ensure that the FSM fork is at least fsm_nblocks long, extending
- * it if necessary with empty pages. And by empty, I mean pages filled
- * with zeros, meaning there's no free space.
- */
- static void
- fsm_extend(Relation rel, BlockNumber fsm_nblocks)
- {
- BlockNumber fsm_nblocks_now;
- Page pg;
- pg = (Page) palloc(BLCKSZ);
- PageInit(pg, BLCKSZ, 0);
- /*
- * We use the relation extension lock to lock out other backends trying to
- * extend the FSM at the same time. It also locks out extension of the
- * main fork, unnecessarily, but extending the FSM happens seldom enough
- * that it doesn't seem worthwhile to have a separate lock tag type for
- * it.
- *
- * Note that another backend might have extended or created the relation
- * by the time we get the lock.
- */
- LockRelationForExtension(rel, ExclusiveLock);
- /* Might have to re-open if a cache flush happened */
- RelationOpenSmgr(rel);
- /*
- * Create the FSM file first if it doesn't exist. If smgr_fsm_nblocks is
- * positive then it must exist, no need for an smgrexists call.
- */
- if ((rel->rd_smgr->smgr_fsm_nblocks == 0 ||
- rel->rd_smgr->smgr_fsm_nblocks == InvalidBlockNumber) &&
- !smgrexists(rel->rd_smgr, FSM_FORKNUM))
- smgrcreate(rel->rd_smgr, FSM_FORKNUM, false);
- fsm_nblocks_now = smgrnblocks(rel->rd_smgr, FSM_FORKNUM);
- while (fsm_nblocks_now < fsm_nblocks)
- {
- smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
- (char *) pg, rel->rd_istemp);
- fsm_nblocks_now++;
- }
- /* Update local cache with the up-to-date size */
- rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
- UnlockRelationForExtension(rel, ExclusiveLock);
- pfree(pg);
- }
- /*
- * Set value in given FSM page and slot.
- *
- * If minValue > 0, the updated page is also searched for a page with at
- * least minValue of free space. If one is found, its slot number is
- * returned, -1 otherwise.
- */
- static int
- fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot,
- uint8 newValue, uint8 minValue)
- {
- Buffer buf;
- Page page;
- int newslot = -1;
- buf = fsm_readbuf(rel, addr, true);
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- page = BufferGetPage(buf);
- if (fsm_set_avail(page, slot, newValue))
- MarkBufferDirty(buf);
- if (minValue != 0)
- {
- /* Search while we still hold the lock */
- newslot = fsm_search_avail(buf, minValue,
- addr.level == FSM_BOTTOM_LEVEL,
- true);
- }
- UnlockReleaseBuffer(buf);
- return newslot;
- }
- /*
- * Search the tree for a heap page with at least min_cat of free space
- */
- static BlockNumber
- fsm_search(Relation rel, uint8 min_cat)
- {
- int restarts = 0;
- FSMAddress addr = FSM_ROOT_ADDRESS;
- for (;;)
- {
- int slot;
- Buffer buf;
- uint8 max_avail = 0;
- /* Read the FSM page. */
- buf = fsm_readbuf(rel, addr, false);
- /* Search within the page */
- if (BufferIsValid(buf))
- {
- LockBuffer(buf, BUFFER_LOCK_SHARE);
- slot = fsm_search_avail(buf, min_cat,
- (addr.level == FSM_BOTTOM_LEVEL),
- false);
- if (slot == -1)
- max_avail = fsm_get_max_avail(BufferGetPage(buf));
- UnlockReleaseBuffer(buf);
- }
- else
- slot = -1;
- if (slot != -1)
- {
- /*
- * Descend the tree, or return the found block if we're at the
- * bottom.
- */
- if (addr.level == FSM_BOTTOM_LEVEL)
- return fsm_get_heap_blk(addr, slot);
- addr = fsm_get_child(addr, slot);
- }
- else if (addr.level == FSM_ROOT_LEVEL)
- {
- /*
- * At the root, failure means there's no page with enough free
- * space in the FSM. Give up.
- */
- return InvalidBlockNumber;
- }
- else
- {
- uint16 parentslot;
- FSMAddress parent;
- /*
- * At lower level, failure can happen if the value in the upper-
- * level node didn't reflect the value on the lower page. Update
- * the upper node, to avoid falling into the same trap again, and
- * start over.
- *
- * There's a race condition here, if another backend updates this
- * page right after we release it, and gets the lock on the parent
- * page before us. We'll then update the parent page with the now
- * stale information we had. It's OK, because it should happen
- * rarely, and will be fixed by the next vacuum.
- */
- parent = fsm_get_parent(addr, &parentslot);
- fsm_set_and_search(rel, parent, parentslot, max_avail, 0);
- /*
- * If the upper pages are badly out of date, we might need to loop
- * quite a few times, updating them as we go. Any inconsistencies
- * should eventually be corrected and the loop should end. Looping
- * indefinitely is nevertheless scary, so provide an emergency
- * valve.
- */
- if (restarts++ > 10000)
- return InvalidBlockNumber;
- /* Start search all over from the root */
- addr = FSM_ROOT_ADDRESS;
- }
- }
- }
- /*
- * Recursive guts of FreeSpaceMapVacuum
- */
- static uint8
- fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p)
- {
- Buffer buf;
- Page page;
- uint8 max_avail;
- /* Read the page if it exists, or return EOF */
- buf = fsm_readbuf(rel, addr, false);
- if (!BufferIsValid(buf))
- {
- *eof_p = true;
- return 0;
- }
- else
- *eof_p = false;
- page = BufferGetPage(buf);
- /*
- * Recurse into children, and fix the information stored about them at
- * this level.
- */
- if (addr.level > FSM_BOTTOM_LEVEL)
- {
- int slot;
- bool eof = false;
- for (slot = 0; slot < SlotsPerFSMPage; slot++)
- {
- int child_avail;
- CHECK_FOR_INTERRUPTS();
- /* After we hit end-of-file, just clear the rest of the slots */
- if (!eof)
- child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof);
- else
- child_avail = 0;
- /* Update information about the child */
- if (fsm_get_avail(page, slot) != child_avail)
- {
- LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
- fsm_set_avail(BufferGetPage(buf), slot, child_avail);
- MarkBufferDirty(buf);
- LockBuffer(buf, BUFFER_LOCK_UNLOCK);
- }
- }
- }
- max_avail = fsm_get_max_avail(BufferGetPage(buf));
- /*
- * Reset the next slot pointer. This encourages the use of low-numbered
- * pages, increasing the chances that a later vacuum can truncate the
- * relation.
- */
- ((FSMPage) PageGetContents(page))->fp_next_slot = 0;
- ReleaseBuffer(buf);
- return max_avail;
- }
复制代码 |
|