三里屯摇滚 发表于 2012-03-03 11:32

Linux缓存机制之块缓存 2.。。。。。。

    Linux缓存机制之块缓存 2.。。。。。。









   除了读操作之外,页面的写操作也可以划分为更小的单位。只有页中实际修改的内容需要回写,而不用回写整页的内容。遗憾的是,从缓冲区的角度来看,写操作的实现比上述的读操作复杂的多。

__block_wirte_full_page函数中回写脏页面设计的缓冲区相关操作。 view plaincopyprint?
/*
* NOTE! All mapped/uptodate combinations are valid:
*
*MappedUptodate    Meaning
*
*NoNo      "unknown" - must do get_block()
*NoYes   "hole" - zero-filled
*Yes No      "allocated" - allocated on disk, not read in
*Yes Yes   "valid" - allocated and up-to-date in memory.
*
* "Dirty" is valid only with the last case (mapped+uptodate).
*/

/*
* While block_write_full_page is writing back the dirty buffers under
* the page lock, whoever dirtied the buffers may decide to clean them
* again at any time.We handle that by only looking at the buffer
* state inside lock_buffer().
*
* If block_write_full_page() is called for regular writeback
* (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
* locked buffer.   This only can happen if someone has written the buffer
* directly, with submit_bh().At the address_space level PageWriteback
* prevents this contention from occurring.
*
* If block_write_full_page() is called with wbc->sync_mode ==
* WB_SYNC_ALL, the writes are posted using WRITE_SYNC_PLUG; this
* causes the writes to be flagged as synchronous writes, but the
* block device queue will NOT be unplugged, since usually many pages
* will be pushed to the out before the higher-level caller actually
* waits for the writes to be completed.The various wait functions,
* such as wait_on_writeback_range() will ultimately call sync_page()
* which will ultimately call blk_run_backing_dev(), which will end up
* unplugging the device queue.
*/
static int __block_write_full_page(struct inode *inode, struct page *page,
            get_block_t *get_block, struct writeback_control *wbc,
            bh_end_io_t *handler)
{
    int err;
    sector_t block;
    sector_t last_block;
    struct buffer_head *bh, *head;
    const unsigned blocksize = 1 << inode->i_blkbits;
    int nr_underway = 0;
    int write_op = (wbc->sync_mode == WB_SYNC_ALL ?
            WRITE_SYNC_PLUG : WRITE);

    BUG_ON(!PageLocked(page));

    last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
    /*页面是否有关联缓冲区,如果没有创建他*/
    if (!page_has_buffers(page)) {
      create_empty_buffers(page, blocksize,
                  (1 << BH_Dirty)|(1 << BH_Uptodate));
    }

    /*
   * Be very careful.We have no exclusion from __set_page_dirty_buffers
   * here, and the (potentially unmapped) buffers may become dirty at
   * any time.If a buffer becomes dirty here after we've inspected it
   * then we just miss that fact, and the page stays dirty.
   *
   * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
   * handle that here by just cleaning them.
   */

    block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
    head = page_buffers(page);
    bh = head;

    /*
   * Get all the dirty buffers mapped to disk addresses and
   * handle any aliases from the underlying blockdev's mapping.
   */
   /*对所有未映射的脏缓冲区,在缓冲区和块设备
    之间建立映射*/
    do {
      if (block > last_block) {
            /*
             * mapped buffers outside i_size will occur, because
             * this page can be outside i_size when there is a
             * truncate in progress.
             */
            /*
             * The buffer was zeroed by block_write_full_page()
             */
            clear_buffer_dirty(bh);
            set_buffer_uptodate(bh);
      } else if ((!buffer_mapped(bh) || buffer_delay(bh)) &&
               buffer_dirty(bh)) {
            WARN_ON(bh->b_size != blocksize);
            /*查找块设备上与缓冲区项匹配的块*/
            err = get_block(inode, block, bh, 1);
            if (err)
                goto recover;
            clear_buffer_delay(bh);
            if (buffer_new(bh)) {
                /* blockdev mappings never come here */
                clear_buffer_new(bh);
                unmap_underlying_metadata(bh->b_bdev,
                            bh->b_blocknr);
            }
      }
      bh = bh->b_this_page;
      block++;
    } while (bh != head);
    /*第二遍遍历,将滤出所有的脏缓冲区*/
    do {
      if (!buffer_mapped(bh))
            continue;
      /*
         * If it's a fully non-blocking write attempt and we cannot
         * lock the buffer then redirty the page.Note that this can
         * potentially cause a busy-wait loop from writeback threads
         * and kswapd activity, but those code paths have their own
         * higher-level throttling.
         */
      if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
            lock_buffer(bh);
      } else if (!trylock_buffer(bh)) {
            redirty_page_for_writepage(wbc, page);
            continue;
      }
      /*如果设置了脏页标志,则会在调用该函数时清除
      因为缓冲区的内容将立即回写*/
      if (test_clear_buffer_dirty(bh)) {
            /*设置BH_Async_Write状态位,并将end_buffer_async_write
            指定为BIO完成处理程序即b_end_io*/
            mark_buffer_async_write_endio(bh, handler);
      } else {
            unlock_buffer(bh);
      }
    } while ((bh = bh->b_this_page) != head);

    /*
   * The page and its buffers are protected by PageWriteback(), so we can
   * drop the bh refcounts early.
   */
    BUG_ON(PageWriteback(page));
    set_page_writeback(page);
    /*最后一次遍历*/
    do {
      struct buffer_head *next = bh->b_this_page;
      if (buffer_async_write(bh)) {
            /*将前一次遍历中标记为BH_Async_Write的所有缓冲区
            转交给块层执行实际的写操作,该函数向块层提交
            了对应的请求*/
            submit_bh(write_op, bh);
            nr_underway++;
      }
      bh = next;
    } while (bh != head);
    unlock_page(page);

    err = 0;
done:
    if (nr_underway == 0) {
      /*
         * The page was marked dirty, but the buffers were
         * clean.Someone wrote them back by hand with
         * ll_rw_block/submit_bh.A rare case.
         */
      end_page_writeback(page);

      /*
         * The page and buffer_heads can be released at any time from
         * here on.
         */
    }
    return err;

recover:
    /*
   * ENOSPC, or some other error.We may already have added some
   * blocks to the file, so we need to write these out to avoid
   * exposing stale data.
   * The page is currently locked and not marked for writeback
   */
    bh = head;
    /* Recovery: lock and submit the mapped buffers */
    do {
      if (buffer_mapped(bh) && buffer_dirty(bh) &&
            !buffer_delay(bh)) {
            lock_buffer(bh);
            mark_buffer_async_write_endio(bh, handler);
      } else {
            /*
             * The buffer may have been set dirty during
             * attachment to a dirty page.
             */
            clear_buffer_dirty(bh);
      }
    } while ((bh = bh->b_this_page) != head);
    SetPageError(page);
    BUG_ON(PageWriteback(page));
    mapping_set_error(page->mapping, err);
    set_page_writeback(page);
    do {
      struct buffer_head *next = bh->b_this_page;
      if (buffer_async_write(bh)) {
            clear_buffer_dirty(bh);
            submit_bh(write_op, bh);
            nr_underway++;
      }
      bh = next;
    } while (bh != head);
    unlock_page(page);
    goto done;
}

如果有一天21 发表于 2012-03-03 11:32

谢谢分享
页: [1]
查看完整版本: Linux缓存机制之块缓存 2.。。。。。。