免费注册 查看新帖 |

Chinaunix

  平台 论坛 博客 文库
最近访问板块 发新帖
查看: 836 | 回复: 0
打印 上一主题 下一主题

Linux 2.6.17.9内核文件系统调用详解 [复制链接]

论坛徽章:
0
跳转到指定楼层
1 [收藏(0)] [报告]
发表于 2007-09-07 20:38 |只看该作者 |倒序浏览
本部分主要讲述的是文件I/O操作的2.6.17.9内核版本实现,包括了主要的数据结构、宏定义和函数流程。以下分别讲述open,create,close,read,write,lseek系统调用。
1 重要数据结构
1.1 struct file
struct file {
    /*
    * fu_list becomes invalid after file_free is called and queued via
    * fu_rcuhead for RCU freeing
    */
    union {
        struct list_head    fu_list; //文件链表指针
        struct rcu_head    fu_rcuhead; //rcu链表
    } f_u;
    struct dentry        *f_dentry; // 文件对应的目录结构
    struct vfsmount        *f_vfsmnt; // 虚拟文件系统挂载点
    const struct file_operations    *f_op; // 文件操作函数指针
    atomic_t        f_count; // 引用计数
    unsigned int        f_flags;
    mode_t            f_mode; // 文件模式
    loff_t            f_pos; // 文件offset
    struct fown_struct    f_owner; //文件owner 结构
    unsigned int        f_uid, f_gid;//文件用户id,组id
    struct file_ra_state    f_ra; // 跟踪上次文件操作状态的结构指针
    unsigned long        f_version;
    void            *f_security; // hook 文件操作的security结构指针
    /* needed for tty driver, and maybe others */
    void            *private_data; // tty 驱动器所需数据
#ifdef CONFIG_EPOLL
    /* Used by fs/eventpoll.c to link all the hooks to this file */
    struct list_head    f_ep_links; // EPOLL 机制检测所需链表结构
    spinlock_t        f_ep_lock; // 兼容早期gcc bug 的标志
#endif /* #ifdef CONFIG_EPOLL */
    struct address_space    *f_mapping; // 地址映射表
};
1.2 struct fown_struct
struct fown_struct {
    rwlock_t lock;          /* protects pid, uid, euid fields */
    int pid;        /* pid or -pgrp where SIGIO should be sent */
    uid_t uid, euid;    /* uid/euid of process setting the owner */
    void *security; /*hook 文件操作的security结构指针*/
    int signum;        /* posix.1b rt signal to be delivered on IO */
};
1.3 struct file_ra_state
/*
* Track a single file's readahead state
*/
struct file_ra_state {
    unsigned long start;        /* Current window */
    unsigned long size;
    unsigned long flags;        /* ra flags RA_FLAG_xxx*/
    unsigned long cache_hit;    /* cache hit count*/
    unsigned long prev_page;    /* Cache last read() position */
    unsigned long ahead_start;    /* Ahead window */
    unsigned long ahead_size;
    unsigned long ra_pages;        /* Maximum readahead window */
    unsigned long mmap_hit;        /* Cache hit stat for mmap accesses */
    unsigned long mmap_miss;    /* Cache miss stat for mmap accesses */
};
1.4 struct address_space
struct address_space {
    struct inode        *host;        /* owner: inode, block_device */
    struct radix_tree_root    page_tree;    /* radix tree of all pages */
    rwlock_t        tree_lock;    /* and rwlock protecting it */
    unsigned int        i_mmap_writable;/* count VM_SHARED mappings */
    struct prio_tree_root    i_mmap;        /* tree of private and shared mappings */
    struct list_head    i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
    spinlock_t        i_mmap_lock;    /* protect tree, count, list */
    unsigned int        truncate_count;    /* Cover race condition with truncate */
    unsigned long        nrpages;    /* number of total pages */
    pgoff_t            writeback_index;/* writeback starts here */
    struct address_space_operations *a_ops;    /* methods */
    unsigned long        flags;        /* error bits/gfp mask */
    struct backing_dev_info *backing_dev_info; /* device readahead, etc */
    spinlock_t        private_lock;    /* for use by the address_space */
    struct list_head    private_list;    /* ditto */
    struct address_space    *assoc_mapping;    /* ditto */
} __attribute__((aligned(sizeof(long))));
struct address_space_operations {
    int (*writepage)(struct page *page, struct writeback_control *wbc);
    int (*readpage)(struct file *, struct page *);
    void (*sync_page)(struct page *);
    /* Write back some dirty pages from this mapping. */
    int (*writepages)(struct address_space *, struct writeback_control *);
    /* Set a page dirty.  Return true if this dirtied it */
    int (*set_page_dirty)(struct page *page);
    int (*readpages)(struct file *filp, struct address_space *mapping,
            struct list_head *pages, unsigned nr_pages);
    /*
    * ext3 requires that a successful prepare_write() call be followed
    * by a commit_write() call - they must be balanced
    */
    int (*prepare_write)(struct file *, struct page *, unsigned, unsigned);
    int (*commit_write)(struct file *, struct page *, unsigned, unsigned);
    /* Unfortunately this kludge is needed for FIBMAP. Don't use it */
    sector_t (*bmap)(struct address_space *, sector_t);
    void (*invalidatepage) (struct page *, unsigned long);
    int (*releasepage) (struct page *, gfp_t);
    ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
            loff_t offset, unsigned long nr_segs);
    struct page* (*get_xip_page)(struct address_space *, sector_t,
            int);
    /* migrate the contents of a page to the specified target */
    int (*migratepage) (struct page *, struct page *);
};
1.5 struct block_device
struct block_device {
    dev_t            bd_dev;  /* not a kdev_t - it's a search key */
    struct inode *        bd_inode;    /* will die */
    int            bd_openers;
    struct mutex        bd_mutex;    /* open/close mutex */
    struct mutex        bd_mount_mutex;    /* mount mutex */
    struct list_head    bd_inodes;
    void *            bd_holder;
    int            bd_holders;
#ifdef CONFIG_SYSFS
    struct list_head    bd_holder_list;
#endif
    struct block_device *    bd_contains;
    unsigned        bd_block_size;
    struct hd_struct *    bd_part;
    /* number of times partitions within this device have been opened. */
    unsigned        bd_part_count;
    int            bd_invalidated;
    struct gendisk *    bd_disk;
    struct list_head    bd_list;
    struct backing_dev_info *bd_inode_backing_dev_info;
    /*
    * Private data.  You must have bd_claim'ed the block_device
    * to use this.  NOTE:  bd_claim allows an owner to claim
    * the same device multiple times, the owner must take special
    * care to not mess up bd_private for that case.
    */
    unsigned long        bd_private;
};
1.6 struct backing_dev_info
struct backing_dev_info {
    unsigned long ra_pages;    /* max readahead in PAGE_CACHE_SIZE units */
    unsigned long state;    /* Always use atomic bitops on this */
    unsigned int capabilities; /* Device capabilities */
    congested_fn *congested_fn; /* Function pointer if device is md/dm */
    void *congested_data;    /* Pointer to aux data for congested func */
    void (*unplug_io_fn)(struct backing_dev_info *, struct page *);
    void *unplug_io_data;
};
1.7 struct files_struct
对于内核而言,所有打开文件都由文件描述符引用。文件描述符是一个非负整数。当打开一个现存文件或创建一个新文件时,内核向进程返回一个文件描述符。
当读、写一个文件时,用open或creat返回的文件描述符标识该文件,将其作为参数传送给read或write。在POSIX.1应用程序中,文件描述符为常数0、1和2分别代表STDIN_FILENO、STDOUT_FILENO和STDERR_FILENO,意即标准输入,标准输出和标准出错输出,这些常数都定义在头文件;中。
文件描述符的范围是0~OPEN_MAX,在目前常用的linux系统中,是32位整形所能表示的整数,即65535,64位机上则更多。
/*
* Open file table structure
*/
struct files_struct {
  /*
  * read mostly part
  */
    atomic_t count; /* 引用计数 */
    struct fdtable *fdt; /* 文件表指针,指向fdtab */
    struct fdtable fdtab;/* 文件表 */
  /*
  * written part on a separate cache line in SMP
  */
    spinlock_t file_lock ____cacheline_aligned_in_smp;
    int next_fd; // 下一个空闲fd
    struct embedded_fd_set close_on_exec_init; /* 可执行close的fd集合 */
    struct embedded_fd_set open_fds_init;/* 打开的fd集合 */
    struct file * fd_array[NR_OPEN_DEFAULT]; /*打开的文件列表*/
};
struct fdtable {
    unsigned int max_fds; // 最大文件句柄数目
    int max_fdset; // 最大的fd集合容量
    struct file ** fd;      /* current fd array */
    fd_set *close_on_exec; // 可执行close的fd集合
    fd_set *open_fds; // 打开的fd集合
    struct rcu_head rcu;
    struct files_struct *free_files; /*反向指针 */
    struct fdtable *next; /*链表*/
};


本文来自ChinaUnix博客,如果查看原文请点:http://blog.chinaunix.net/u/27691/showart_376678.html
您需要登录后才可以回帖 登录 | 注册

本版积分规则 发表回复

  

北京盛拓优讯信息技术有限公司. 版权所有 京ICP备16024965号-6 北京市公安局海淀分局网监中心备案编号:11010802020122 niuxiaotong@pcpop.com 17352615567
未成年举报专区
中国互联网协会会员  联系我们:huangweiwei@itpub.net
感谢所有关心和支持过ChinaUnix的朋友们 转载本站内容请注明原作者名及出处

清除 Cookies - ChinaUnix - Archiver - WAP - TOP