Linux内核源码分析-安装普通文件系统-sys_mount 本文主要参考《深入理解Linux内核》,结合2.6.11.1版的内核代码,分析内核文件子系统中的安装普通文件系统函数。 注意: 1、不描述内核同步、错误处理、参数合法性验证相关的内容 2、源码摘自Linux内核2.6.11.1版 3、阅读本文请结合《深入理解Linux内核》第三版相关章节 4、本文会不定时更新 1、sys_mount函数源码: - asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name,
- char __user * type, unsigned long flags,
- void __user * data)
- {
- int retval;
- unsigned long data_page;
- unsigned long type_page;
- unsigned long dev_page;
- char *dir_page;
- //从用户空间复制文件系统类型字符串到type_page指向的内存地址
- retval = copy_mount_options (type, &type_page);
- if (retval < 0)
- return retval;
- //从用户空间获取路径名
- dir_page = getname(dir_name);
- retval = PTR_ERR(dir_page);
- if (IS_ERR(dir_page))
- goto out1;
- //从用户空间复制块设备文件名到内核空间
- retval = copy_mount_options (dev_name, &dev_page);
- if (retval < 0)
- goto out2;
- //复制与文件系统相关的数据结构的地址
- retval = copy_mount_options (data, &data_page);
- if (retval < 0)
- goto out3;
- lock_kernel();
- //分析见下文
- retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
- flags, (void*)data_page);
- unlock_kernel();
- free_page(data_page);
- out3:
- free_page(dev_page);
- out2:
- putname(dir_page);
- out1:
- free_page(type_page);
- return retval;
- }
复制代码函数处理流程: 从用户空间复制数据到内核空间(准备参数)、获取大内核锁、调用do_mount函数 2、copy_mount_options
函数源码: - int copy_mount_options(const void __user *data, unsigned long *where)
- {
- int i;
- unsigned long page;
- unsigned long size;
-
- *where = 0;
- if (!data)
- return 0;
- if (!(page = __get_free_page(GFP_KERNEL)))
- return -ENOMEM;
- /* We only care that *some* data at the address the user
- * gave us is valid. Just in case, we'll zero
- * the remainder of the page.
- */
- /* copy_from_user cannot cross TASK_SIZE ! */
- size = TASK_SIZE - (unsigned long)data;
- if (size > PAGE_SIZE)
- size = PAGE_SIZE;
- i = size - exact_copy_from_user((void *)page, data, size);
- if (!i) {
- free_page(page);
- return -EFAULT;
- }
- if (i != PAGE_SIZE)
- memset((char *)page + i, 0, PAGE_SIZE - i);
- *where = page;
- return 0;
- }
复制代码函数处理流程: 1、调用函数__get_free_page分配一个空闲页框并返回页框的线性地址 2、调用函数exact_copy_from_user把用户空间的数据从data复制到该页框中 3、把页剩余的空间置0,并用*where返回页框的起始线性地址 3、exact_copy_from_user
函数源码: - /*
- * Some copy_from_user() implementations do not return the exact number of
- * bytes remaining to copy on a fault. But copy_mount_options() requires that.
- * Note that this function differs from copy_from_user() in that it will oops
- * on bad values of `to', rather than returning a short copy.
- */
- static long
- exact_copy_from_user(void *to, const void __user *from, unsigned long n)
- {
- char *t = to;
- const char __user *f = from;
- char c;
- if (!access_ok(VERIFY_READ, from, n))
- return n;
- while (n) {
- if (__get_user(c, f)) {
- memset(t, 0, n);
- break;
- }
- *t++ = c;
- f++;
- n--;
- }
- return n;
- }
复制代码函数处理流程: 调用函数__get_user一次从用户空间复制一个字符,返回未完成复制的字节数,分析参见后续文章。 4、do_mount
函数源码: - /*
- * Flags is a 32-bit value that allows up to 31 non-fs dependent flags to
- * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
- *
- * data is a (void *) that can point to any structure up to
- * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
- * information (or be NULL).
- *
- * Pre-0.97 versions of mount() didn't have a flags word.
- * When the flags word was introduced its top half was required
- * to have the magic value 0xC0ED, and this remained so until 2.4.0-test9.
- * Therefore, if this magic number is present, it carries no information
- * and must be discarded.
- */
- long do_mount(char * dev_name, char * dir_name, char *type_page,
- unsigned long flags, void *data_page)
- {
- struct nameidata nd;
- int retval = 0;
- int mnt_flags = 0;
- /* Discard magic */
- if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
- flags &= ~MS_MGC_MSK;
- /* Basic sanity checks */
- if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
- return -EINVAL;
- if (dev_name && !memchr(dev_name, 0, PAGE_SIZE))
- return -EINVAL;
- if (data_page)
- ((char *)data_page)[PAGE_SIZE - 1] = 0;
- /* Separate the per-mountpoint flags */
- if (flags & MS_NOSUID)
- mnt_flags |= MNT_NOSUID;
- if (flags & MS_NODEV)
- mnt_flags |= MNT_NODEV;
- if (flags & MS_NOEXEC)
- mnt_flags |= MNT_NOEXEC;
- flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV|MS_ACTIVE);
- /* ... and get the mountpoint */
- retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd);
- if (retval)
- return retval;
- retval = security_sb_mount(dev_name, &nd, type_page, flags, data_page);
- if (retval)
- goto dput_out;
- if (flags & MS_REMOUNT)
- retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
- data_page);
- else if (flags & MS_BIND)
- retval = do_loopback(&nd, dev_name, flags & MS_REC);
- else if (flags & MS_MOVE)
- retval = do_move_mount(&nd, dev_name);
- else
- retval = do_new_mount(&nd, type_page, flags, mnt_flags,
- dev_name, data_page);
- dput_out:
- path_release(&nd);
- return retval;
- }
复制代码函数处理流程: 1、把mount安装标识中和安装文件系统描述相关的标志进行转化 2、调用path_lookup函数对安装点路径名进行查找,查找结果存放在nameidata类型的nd局部变量中 3、根据安装标识是重新安装、绑定安装、移动安装、新安装分别调用不同的函数进行处理,本文仅描述新安装,即调用do_new_mount 5、do_new_mount
函数源码: - /*
- * create a new mount for userspace and request it to be added into the
- * namespace's tree
- */
- static int do_new_mount(struct nameidata *nd, char *type, int flags,
- int mnt_flags, char *name, void *data)
- {
- struct vfsmount *mnt;
- if (!type || !memchr(type, 0, PAGE_SIZE))
- return -EINVAL;
- /* we need capabilities... */
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
- mnt = do_kern_mount(type, flags, name, data);
- if (IS_ERR(mnt))
- return PTR_ERR(mnt);
- return do_add_mount(mnt, nd, mnt_flags, NULL);
- }
复制代码函数处理流程: 1、 调用函数do_kern_mount进行实际安装操作,返回新安装文件系统描述符的地址 2、 调用函数do_add_mount把新安装文件系统描述符插入到相关数据结构中 6、do_kern_mount函数源码: - struct vfsmount *
- do_kern_mount(const char *fstype, int flags, const char *name, void *data)
- {
- struct file_system_type *type = get_fs_type(fstype);
- struct super_block *sb = ERR_PTR(-ENOMEM);
- struct vfsmount *mnt;
- int error;
- char *secdata = NULL;
- if (!type)
- return ERR_PTR(-ENODEV);
- mnt = alloc_vfsmnt(name);
- if (!mnt)
- goto out;
- if (data) {
- secdata = alloc_secdata();
- if (!secdata) {
- sb = ERR_PTR(-ENOMEM);
- goto out_mnt;
- }
- error = security_sb_copy_data(type, data, secdata);
- if (error) {
- sb = ERR_PTR(error);
- goto out_free_secdata;
- }
- }
- sb = type->get_sb(type, flags, name, data);
- if (IS_ERR(sb))
- goto out_free_secdata;
- error = security_sb_kern_mount(sb, secdata);
- if (error)
- goto out_sb;
- mnt->mnt_sb = sb; //超级块
- mnt->mnt_root = dget(sb->s_root); //根文件系统
- mnt->mnt_mountpoint = sb->s_root; //挂载点
- mnt->mnt_parent = mnt; //父文件系统
- mnt->mnt_namespace = current->namespace; //命名空间
- up_write(&sb->s_umount);
- put_filesystem(type);
- return mnt;
- out_sb:
- up_write(&sb->s_umount);
- deactivate_super(sb);
- sb = ERR_PTR(error);
- out_free_secdata:
- free_secdata(secdata);
- out_mnt:
- free_vfsmnt(mnt);
- out:
- put_filesystem(type);
- return (struct vfsmount *)sb;
- }
复制代码函数处理流程: 1、根据文件系统类型名称,调用函数get_fs_type获得类型为file_system_type的文件系统类型对象的地址,存入局部变量type中 2、调用alloc_vfsmnt从mnt_cache slab高速缓存中分配一个新的超级块对象 3、调用依赖于文件系统的type->get_sb函数分配并初始化一个超级块,具体分析参加后续文章“Linux内核源码分析-ext2分配初始化超级块-ext2_get_sb” 4、初始化mnt相关字段,具体参见注释 7、do_add_mount
函数源码: - /*
- * add a mount into a namespace's mount tree
- * - provide the option of adding the new mount to an expiration list
- */
- int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
- int mnt_flags, struct list_head *fslist)
- {
- int err;
-
- down_write(¤t->namespace->sem);
- /* Something was mounted here while we slept */
- while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry))
- ;
- err = -EINVAL;
- if (!check_mnt(nd->mnt))
- goto unlock;
-
- /* Refuse the same filesystem on the same mount point */
- err = -EBUSY;
- if (nd->mnt->mnt_sb == newmnt->mnt_sb &&
- nd->mnt->mnt_root == nd->dentry)
- goto unlock;
-
- err = -EINVAL;
- if (S_ISLNK(newmnt->mnt_root->d_inode->i_mode))
- goto unlock;
-
- newmnt->mnt_flags = mnt_flags;
- err = graft_tree(newmnt, nd);
-
- if (err == 0 && fslist) {
- /* add to the specified expiration list */
- spin_lock(&vfsmount_lock);
- list_add_tail(&newmnt->mnt_fslink, fslist);
- spin_unlock(&vfsmount_lock);
- }
-
- unlock:
- up_write(¤t->namespace->sem);
- mntput(newmnt);
- return err;
- }
复制代码
函数处理流程: 1、当安装点路径目录项的的安装文件系统数不为0时,调用follow_down更新安装的目录项对象和安装点对应的文件系统对象,具体分析见后续文章(路径名查找) 2、验证安装点的命名空间是否还是当前命名空间,如果不是,返回错误 3、如果文件系统已被安装或安装点是一个符号链接,返回错误 4、初始化do_kern_mount分配的vfsmount 对象newmnt的mnt_flags
5、调用函数graft_tree把新分配的文件对象插入到namespace链表、散列表、父文件系统的子链表中 8、alloc_vfsmnt函数功能: 从mnt_cache slab高速缓存分配一个vfsmount对象,并初始化相关字段,具体信息见注释 函数源码: - struct vfsmount *alloc_vfsmnt(const char *name)
- {
- struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
- if (mnt) {
- memset(mnt, 0, sizeof(struct vfsmount));
- atomic_set(&mnt->mnt_count,1); //引用计数器
- INIT_LIST_HEAD(&mnt->mnt_hash); //mount_hashtable哈希表链接指针
- INIT_LIST_HEAD(&mnt->mnt_child); //子文件系统链接指针
- INIT_LIST_HEAD(&mnt->mnt_mounts); //子文件系统链表头
- INIT_LIST_HEAD(&mnt->mnt_list); //namespace链接指针
- INIT_LIST_HEAD(&mnt->mnt_fslink); //到期文件系统链接指针
- if (name) {
- int size = strlen(name)+1;
- char *newname = kmalloc(size, GFP_KERNEL);
- if (newname) {
- memcpy(newname, name, size);
- mnt->mnt_devname = newname; //文件系统设备文件名
- }
- }
- }
- return mnt;
- }
复制代码 9、graft_tree
函数源码(分析参见注释): - static int graft_tree(struct vfsmount *mnt, struct nameidata *nd)
- {
- int err;
- if (mnt->mnt_sb->s_flags & MS_NOUSER)
- return -EINVAL;
- if (S_ISDIR(nd->dentry->d_inode->i_mode) !=
- S_ISDIR(mnt->mnt_root->d_inode->i_mode))
- return -ENOTDIR;
- err = -ENOENT;
- down(&nd->dentry->d_inode->i_sem);
- if (IS_DEADDIR(nd->dentry->d_inode))
- goto out_unlock;
- err = security_sb_check_sb(mnt, nd);
- if (err)
- goto out_unlock;
- err = -ENOENT;
- spin_lock(&vfsmount_lock);
- if (IS_ROOT(nd->dentry) || !d_unhashed(nd->dentry)) {
- struct list_head head;
- attach_mnt(mnt, nd); //见下面代码注释
- list_add_tail(&head, &mnt->mnt_list);
- list_splice(&head, current->namespace->list.prev);
- mntget(mnt); //把文件对象插入命名空间链表并增加引用计数器
- err = 0;
- }
- spin_unlock(&vfsmount_lock);
- out_unlock:
- up(&nd->dentry->d_inode->i_sem);
- if (!err)
- security_sb_post_addmount(mnt, nd);
- return err;
- }
- static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd)
- {
- mnt->mnt_parent = mntget(nd->mnt); //父文件系统
- mnt->mnt_mountpoint = dget(nd->dentry); //安装点目录项
- list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry)); //哈希表mount_hashtable
- list_add_tail(&mnt->mnt_child, &nd->mnt->mnt_mounts);//父文件系统的子文件系统链表
- nd->dentry->d_mounted++; //安装点安装文件系统数
- }
复制代码
|