块缓冲区最终要提交到通用块层进行IO操作,相关的API如下
int submit_bh(int rw, struct buffer_head *bh)
void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
脏页回写
在脏页回写机制中,首先,内核使用一个backing_dev_info对象来描述一个bdi设备,即backing device info——非易失存储设备,这些backing_dev_info都会挂接到bdi_list链表中,我们可以从其注册函数bdi_register()中看出。由于bdi的低速,所以也就有了本文讨论的页缓冲机制以及脏页回写问题,当一个高速缓存页被修改时,内核就会将相应的page对象中的相应的tag置为PG_dirty,即"脏页",脏页需要在合适的时候回写到磁盘对于脏页回写,2.6.2x/3x以前的内核通过动态的创建/删除pdflush线程来实现脏页回写,但是2.6.2x/3x之后的内核对这个方面进行的改进,采用writeback机制进行回写,writeback机制需要的核心结构和方法的关系如下。
可以看出,一个逻辑磁盘--一个gendisk对象--一个request_queue对象--一个backing_dev_info对象,这个backing_dev_info对象就是脏页回写的核心结构
64 struct backing_dev_info {
65
struct list_head bdi_list;
97
struct bdi_writeback wb;
/* default writeback info for this bdi */
100
struct list_head work_list;
102
struct device *dev;
110 };
struct backing_dev_info
--65-->将所有的backing_dev_info链接起来的链表节点
--97-->bdi_writeback对象,使用延迟工作进行脏页回写
--100-->这个bdi设备中等待被处理的页的描述
--102-->表示这是一个device
37 /*
38 * Passed into wb_writeback(), essentially a subset of writeback_control
39 */
40 struct wb_writeback_work {
41
long nr_pages;
42
struct super_block *sb;
43
unsigned long *older_than_this;
44
enum writeback_sync_modes sync_mode;
45
unsigned int tagged_writepages:
1;
46
unsigned int for_kupdate:
1;
47
unsigned int range_cyclic:
1;
48
unsigned int for_background:
1;
49
unsigned int for_sync:
1;
/* sync(2) WB_SYNC_ALL writeback */
50
enum wb_reason reason;
/* why was writeback initiated? */
51
52
struct list_head
list;
/* pending work list */
53
struct completion *done;
/* set if the caller waits */
54 };
51 struct bdi_writeback {
52
struct backing_dev_info *bdi;
/* our parent bdi */
53
unsigned int nr;
54
55
unsigned long last_old_flush;
/* last old data flush */
56
57
struct delayed_work dwork;
/* work item used for writeback */
58
struct list_head b_dirty;
/* dirty inodes */
59
struct list_head b_io;
/* parked for writeback */
60
struct list_head b_more_io;
/* parked for more writeback */
61
spinlock_t list_lock;
/* protects the b_* lists */
62 };
struct bdi_writeback
--57-->延迟工作对象,最终会调用下面的函数处理脏页
778 /*
779 *
Explicit flushing
or periodic writeback
of "old" data.
780 *
781 * Define
"old": the first time one
of an inode
's pages is dirtied, we mark the
782 * dirtying-time
in the inode
's address_space. So this periodic writeback code
783 * just walks the superblock inode list, writing back any inodes which are
784 * older than a specific point
in time.
785 *
786 *
Try to run once per dirty_writeback_interval. But
if a writeback
event
787 * takes longer than a dirty_writeback_interval interval,
then leave a
788 * one-second gap.
789 *
790 * older_than_this takes precedence over nr_to_write. So we
'll only write back
791 * all dirty pages
if they are all attached
to "old" mappings.
792 */
793 static long wb_writeback(struct bdi_writeback *wb,
794
struct wb_writeback_work *work)