- 论坛徽章:
- 0
|
本部分主要讲述的是文件I/O操作的2.6.17.9内核版本实现,包括了主要的数据结构、宏定义和函数流程。以下分别讲述open,create,close,read,write,lseek系统调用。
1 重要数据结构
1.1 struct file
struct file {
/*
* fu_list becomes invalid after file_free is called and queued via
* fu_rcuhead for RCU freeing
*/
union {
struct list_head fu_list; //文件链表指针
struct rcu_head fu_rcuhead; //rcu链表
} f_u;
struct dentry *f_dentry; // 文件对应的目录结构
struct vfsmount *f_vfsmnt; // 虚拟文件系统挂载点
const struct file_operations *f_op; // 文件操作函数指针
atomic_t f_count; // 引用计数
unsigned int f_flags;
mode_t f_mode; // 文件模式
loff_t f_pos; // 文件offset
struct fown_struct f_owner; //文件owner 结构
unsigned int f_uid, f_gid;//文件用户id,组id
struct file_ra_state f_ra; // 跟踪上次文件操作状态的结构指针
unsigned long f_version;
void *f_security; // hook 文件操作的security结构指针
/* needed for tty driver, and maybe others */
void *private_data; // tty 驱动器所需数据
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct list_head f_ep_links; // EPOLL 机制检测所需链表结构
spinlock_t f_ep_lock; // 兼容早期gcc bug 的标志
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping; // 地址映射表
};
1.2 struct fown_struct
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
int pid; /* pid or -pgrp where SIGIO should be sent */
uid_t uid, euid; /* uid/euid of process setting the owner */
void *security; /*hook 文件操作的security结构指针*/
int signum; /* posix.1b rt signal to be delivered on IO */
};
1.3 struct file_ra_state
/*
* Track a single file's readahead state
*/
struct file_ra_state {
unsigned long start; /* Current window */
unsigned long size;
unsigned long flags; /* ra flags RA_FLAG_xxx*/
unsigned long cache_hit; /* cache hit count*/
unsigned long prev_page; /* Cache last read() position */
unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size;
unsigned long ra_pages; /* Maximum readahead window */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
};
1.4 struct address_space
struct address_space {
struct inode *host; /* owner: inode, block_device */
struct radix_tree_root page_tree; /* radix tree of all pages */
rwlock_t tree_lock; /* and rwlock protecting it */
unsigned int i_mmap_writable;/* count VM_SHARED mappings */
struct prio_tree_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
spinlock_t i_mmap_lock; /* protect tree, count, list */
unsigned int truncate_count; /* Cover race condition with truncate */
unsigned long nrpages; /* number of total pages */
pgoff_t writeback_index;/* writeback starts here */
struct address_space_operations *a_ops; /* methods */
unsigned long flags; /* error bits/gfp mask */
struct backing_dev_info *backing_dev_info; /* device readahead, etc */
spinlock_t private_lock; /* for use by the address_space */
struct list_head private_list; /* ditto */
struct address_space *assoc_mapping; /* ditto */
} __attribute__((aligned(sizeof(long))));
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
int (*readpage)(struct file *, struct page *);
void (*sync_page)(struct page *);
/* Write back some dirty pages from this mapping. */
int (*writepages)(struct address_space *, struct writeback_control *);
/* Set a page dirty. Return true if this dirtied it */
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages);
/*
* ext3 requires that a successful prepare_write() call be followed
* by a commit_write() call - they must be balanced
*/
int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
sector_t (*bmap)(struct address_space *, sector_t);
void (*invalidatepage) (struct page *, unsigned long);
int (*releasepage) (struct page *, gfp_t);
ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
loff_t offset, unsigned long nr_segs);
struct page* (*get_xip_page)(struct address_space *, sector_t,
int);
/* migrate the contents of a page to the specified target */
int (*migratepage) (struct page *, struct page *);
};
1.5 struct block_device
struct block_device {
dev_t bd_dev; /* not a kdev_t - it's a search key */
struct inode * bd_inode; /* will die */
int bd_openers;
struct mutex bd_mutex; /* open/close mutex */
struct mutex bd_mount_mutex; /* mount mutex */
struct list_head bd_inodes;
void * bd_holder;
int bd_holders;
#ifdef CONFIG_SYSFS
struct list_head bd_holder_list;
#endif
struct block_device * bd_contains;
unsigned bd_block_size;
struct hd_struct * bd_part;
/* number of times partitions within this device have been opened. */
unsigned bd_part_count;
int bd_invalidated;
struct gendisk * bd_disk;
struct list_head bd_list;
struct backing_dev_info *bd_inode_backing_dev_info;
/*
* Private data. You must have bd_claim'ed the block_device
* to use this. NOTE: bd_claim allows an owner to claim
* the same device multiple times, the owner must take special
* care to not mess up bd_private for that case.
*/
unsigned long bd_private;
};
1.6 struct backing_dev_info
struct backing_dev_info {
unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */
unsigned long state; /* Always use atomic bitops on this */
unsigned int capabilities; /* Device capabilities */
congested_fn *congested_fn; /* Function pointer if device is md/dm */
void *congested_data; /* Pointer to aux data for congested func */
void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
void *unplug_io_data;
};
1.7 struct files_struct
对于内核而言,所有打开文件都由文件描述符引用。文件描述符是一个非负整数。当打开一个现存文件或创建一个新文件时,内核向进程返回一个文件描述符。
当读、写一个文件时,用open或creat返回的文件描述符标识该文件,将其作为参数传送给read或write。在POSIX.1应用程序中,文件描述符为常数0、1和2分别代表STDIN_FILENO、STDOUT_FILENO和STDERR_FILENO,意即标准输入,标准输出和标准出错输出,这些常数都定义在头文件;中。
文件描述符的范围是0~OPEN_MAX,在目前常用的linux系统中,是32位整形所能表示的整数,即65535,64位机上则更多。
/*
* Open file table structure
*/
struct files_struct {
/*
* read mostly part
*/
atomic_t count; /* 引用计数 */
struct fdtable *fdt; /* 文件表指针,指向fdtab */
struct fdtable fdtab;/* 文件表 */
/*
* written part on a separate cache line in SMP
*/
spinlock_t file_lock ____cacheline_aligned_in_smp;
int next_fd; // 下一个空闲fd
struct embedded_fd_set close_on_exec_init; /* 可执行close的fd集合 */
struct embedded_fd_set open_fds_init;/* 打开的fd集合 */
struct file * fd_array[NR_OPEN_DEFAULT]; /*打开的文件列表*/
};
struct fdtable {
unsigned int max_fds; // 最大文件句柄数目
int max_fdset; // 最大的fd集合容量
struct file ** fd; /* current fd array */
fd_set *close_on_exec; // 可执行close的fd集合
fd_set *open_fds; // 打开的fd集合
struct rcu_head rcu;
struct files_struct *free_files; /*反向指针 */
struct fdtable *next; /*链表*/
};
本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u/27691/showart_376678.html |
|