当前位置:   article > 正文

Linux mount 流程详解_sb_silent

sb_silent

本文代码基于Linux 5.10。

Linux mount 主要通过mount 命令或者mount api来实现, 本文主要介绍mount 调用在内核中的实现。

数据结构

fs_context

fs_context 是mount 流程中的重要数据结构, 其定义如下

  1. include/linux/fs_context.h
  2. struct fs_context {
  3. const struct fs_context_operations *ops;
  4. struct mutex uapi_mutex; /* Userspace access mutex */
  5. struct file_system_type *fs_type;
  6. void *fs_private; /* The filesystem's context */
  7. void *sget_key;
  8. struct dentry *root; /* The root and superblock */
  9. struct user_namespace *user_ns; /* The user namespace for this mount */
  10. struct net *net_ns; /* The network namespace for this mount */
  11. const struct cred *cred; /* The mounter's credentials */
  12. struct p_log log; /* Logging buffer */
  13. const char *source; /* The source name (eg. dev path) */
  14. void *security; /* Linux S&M options */
  15. void *s_fs_info; /* Proposed s_fs_info */
  16. unsigned int sb_flags; /* Proposed superblock flags (SB_*) */
  17. unsigned int sb_flags_mask; /* Superblock flags that were changed */
  18. unsigned int s_iflags; /* OR'd with sb->s_iflags */
  19. unsigned int lsm_flags; /* Information flags from the fs to the LSM */
  20. enum fs_context_purpose purpose:8;
  21. enum fs_context_phase phase:8; /* The phase the context is in */
  22. bool need_free:1; /* Need to call ops->free() */
  23. bool global:1; /* Goes into &init_user_ns */
  24. bool oldapi:1; /* Coming from mount(2) */
  25. };

Linux 对于这个结构体的注释是:

/*
* Filesystem context for holding the parameters used in the creation or
* reconfiguration of a superblock.
*
* Superblock creation fills in ->root whereas reconfiguration begins with this
* already set.
*
* See Documentation/filesystems/mount_api.rst
*/

我的理解这个结构是从 file_system_type 到 super_block 之间的桥梁, 控制了mount 流程。

fs_type: 对应的fs_type 结构体

ops: 这个比较重要, 指向了fs_context_operations, 一般会在文件系统的init_fs_context回调中对其进行赋值

  1. struct fs_context_operations {
  2. void (*free)(struct fs_context *fc);
  3. int (*dup)(struct fs_context *fc, struct fs_context *src_fc);
  4. int (*parse_param)(struct fs_context *fc, struct fs_parameter *param);
  5. int (*parse_monolithic)(struct fs_context *fc, void *data);
  6. int (*get_tree)(struct fs_context *fc);
  7. int (*reconfigure)(struct fs_context *fc);
  8. };

处理流程

mount 的整体调用栈如下, 下面我们一个一个分析:

  1. #0 exfat_fill_super (sb=0xffff888004865000, fc=0xffff888003053d80) at fs/exfat/super.c:599
  2. #1 0xffffffff8120a2e9 in get_tree_bdev (fc=0xffff888003053d80, fill_super=0xffffffff813232e5 <exfat_fill_super>) at fs/super.c:1344
  3. #2 0xffffffff813236eb in exfat_get_tree (fc=0xffff888003053d80) at fs/exfat/super.c:696
  4. #3 0xffffffff8120915c in vfs_get_tree (fc=fc@entry=0xffff888003053d80) at fs/super.c:1549
  5. #4 0xffffffff8122a997 in do_new_mount (data=0x0 <fixed_percpu_data>, name=0xffff8880032794a0 "/dev/loop0", mnt_flags=32, sb_flags=<optimized out>, fstype=0x20 <fixed_percpu_data+32> <error: Cannot access memory at address 0x20>, path=0xffffc90000183ec8) at fs/namespace.c:2875
  6. #5 path_mount (dev_name=dev_name@entry=0xffff8880032794a0 "/dev/loop0", path=path@entry=0xffffc90000183ec8, type_page=type_page@entry=0xffff8880032d1c78 "exfat", flags=<optimized out>, flags@entry=32768, data_page=data_page@entry=0x0 <fixed_percpu_data>) at fs/namespace.c:3205
  7. #6 0xffffffff8122ae10 in do_mount (dev_name=dev_name@entry=0xffff8880032794a0 "/dev/loop0", dir_name=dir_name@entry=0x7ffd4ff80f31 "/mnt", type_page=type_page@entry=0xffff8880032d1c78 "exfat", flags=flags@entry=32768, data_page=data_page@entry=0x0 <fixed_percpu_data>) at fs/namespace.c:3218
  8. #7 0xffffffff8122b246 in __do_sys_mount (data=<optimized out>, flags=32768, type=<optimized out>, dir_name=0x7ffd4ff80f31 "/mnt", dev_name=<optimized out>) at fs/namespace.c:3426
  9. #8 __se_sys_mount (data=<optimized out>, flags=32768, type=<optimized out>, dir_name=140725945110321, dev_name=<optimized out>) at fs/namespace.c:3403
  10. #9 __x64_sys_mount (regs=<optimized out>) at fs/namespace.c:3403
  11. #10 0xffffffff819bf903 in do_syscall_64 (nr=<optimized out>, regs=0xffffc90000183f58) at arch/x86/entry/common.c:46
  12. #11 0xffffffff81a0007c in entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:120
  13. #12 0x0000000000000000 in ?? ()

入口函数

如下是linux mount系统调用的定义, mount都会走到这个地方来, 主要调用do_mount 完成后续的工作。

  1. fs/namespace.c
  2. SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
  3. char __user *, type, unsigned long, flags, void __user *, data)
  4. {
  5. int ret;
  6. char *kernel_type;
  7. char *kernel_dev;
  8. void *options;
  9. kernel_type = copy_mount_string(type);
  10. ret = PTR_ERR(kernel_type);
  11. if (IS_ERR(kernel_type))
  12. goto out_type;
  13. kernel_dev = copy_mount_string(dev_name);
  14. ret = PTR_ERR(kernel_dev);
  15. if (IS_ERR(kernel_dev))
  16. goto out_dev;
  17. options = copy_mount_options(data);
  18. ret = PTR_ERR(options);
  19. if (IS_ERR(options))
  20. goto out_data;
  21. ret = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
  22. kfree(options);
  23. out_data:
  24. kfree(kernel_dev);
  25. out_dev:
  26. kfree(kernel_type);
  27. out_type:
  28. return ret;
  29. }

do_mount 主要调用了path_mount , path_mount 中主要设置了sb_flags和mnt_flags, 然后调用了do_new_mount

  1. fs/namespace.c
  2. int path_mount(const char *dev_name, struct path *path,
  3. const char *type_page, unsigned long flags, void *data_page)
  4. {
  5. unsigned int mnt_flags = 0, sb_flags;
  6. int ret;
  7. /* Discard magic */
  8. if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
  9. flags &= ~MS_MGC_MSK;
  10. /* Basic sanity checks */
  11. if (data_page)
  12. ((char *)data_page)[PAGE_SIZE - 1] = 0;
  13. if (flags & MS_NOUSER)
  14. return -EINVAL;
  15. ret = security_sb_mount(dev_name, path, type_page, flags, data_page);
  16. if (ret)
  17. return ret;
  18. if (!may_mount())
  19. return -EPERM;
  20. if ((flags & SB_MANDLOCK) && !may_mandlock())
  21. return -EPERM;
  22. /* Default to relatime unless overriden */
  23. if (!(flags & MS_NOATIME))
  24. mnt_flags |= MNT_RELATIME;
  25. /* Separate the per-mountpoint flags */
  26. if (flags & MS_NOSUID)
  27. mnt_flags |= MNT_NOSUID;
  28. if (flags & MS_NODEV)
  29. mnt_flags |= MNT_NODEV;
  30. if (flags & MS_NOEXEC)
  31. mnt_flags |= MNT_NOEXEC;
  32. if (flags & MS_NOATIME)
  33. mnt_flags |= MNT_NOATIME;
  34. if (flags & MS_NODIRATIME)
  35. mnt_flags |= MNT_NODIRATIME;
  36. if (flags & MS_STRICTATIME)
  37. mnt_flags &= ~(MNT_RELATIME | MNT_NOATIME);
  38. if (flags & MS_RDONLY)
  39. mnt_flags |= MNT_READONLY;
  40. if (flags & MS_NOSYMFOLLOW)
  41. mnt_flags |= MNT_NOSYMFOLLOW;
  42. /* The default atime for remount is preservation */
  43. if ((flags & MS_REMOUNT) &&
  44. ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
  45. MS_STRICTATIME)) == 0)) {
  46. mnt_flags &= ~MNT_ATIME_MASK;
  47. mnt_flags |= path->mnt->mnt_flags & MNT_ATIME_MASK;
  48. }
  49. sb_flags = flags & (SB_RDONLY |
  50. SB_SYNCHRONOUS |
  51. SB_MANDLOCK |
  52. SB_DIRSYNC |
  53. SB_SILENT |
  54. SB_POSIXACL |
  55. SB_LAZYTIME |
  56. SB_I_VERSION);
  57. if ((flags & (MS_REMOUNT | MS_BIND)) == (MS_REMOUNT | MS_BIND))
  58. return do_reconfigure_mnt(path, mnt_flags);
  59. if (flags & MS_REMOUNT)
  60. return do_remount(path, flags, sb_flags, mnt_flags, data_page);
  61. if (flags & MS_BIND)
  62. return do_loopback(path, dev_name, flags & MS_REC);
  63. if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
  64. return do_change_type(path, flags);
  65. if (flags & MS_MOVE)
  66. return do_move_mount_old(path, dev_name);
  67. return do_new_mount(path, type_page, sb_flags, mnt_flags, dev_name,
  68. data_page);
  69. }

分配fs_context

do_new_mount 是比较重要的函数, 这里面分配了fs_context结构体。

  1. fs/namespace.c
  2. /*
  3. * create a new mount for userspace and request it to be added into the
  4. * namespace's tree
  5. */
  6. static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
  7. int mnt_flags, const char *name, void *data)
  8. {
  9. struct file_system_type *type;
  10. struct fs_context *fc;
  11. const char *subtype = NULL;
  12. int err = 0;
  13. if (!fstype)
  14. return -EINVAL;
  15. type = get_fs_type(fstype); /* 1 */
  16. if (!type)
  17. return -ENODEV;
  18. if (type->fs_flags & FS_HAS_SUBTYPE) {
  19. subtype = strchr(fstype, '.');
  20. if (subtype) {
  21. subtype++;
  22. if (!*subtype) {
  23. put_filesystem(type);
  24. return -EINVAL;
  25. }
  26. }
  27. }
  28. fc = fs_context_for_mount(type, sb_flags); /* 2 */
  29. put_filesystem(type);
  30. if (IS_ERR(fc))
  31. return PTR_ERR(fc);
  32. if (subtype)
  33. err = vfs_parse_fs_string(fc, "subtype",
  34. subtype, strlen(subtype));
  35. if (!err && name)
  36. err = vfs_parse_fs_string(fc, "source", name, strlen(name));
  37. if (!err)
  38. err = parse_monolithic_mount_data(fc, data);
  39. if (!err && !mount_capable(fc))
  40. err = -EPERM;
  41. if (!err)
  42. err = vfs_get_tree(fc); /* 3 */
  43. if (!err)
  44. err = do_new_mount_fc(fc, path, mnt_flags);
  45. put_fs_context(fc);
  46. return err;
  47. }
  1. 根据fstype 找到对应的 file_system_type 结构体
  2. 初始化fc 结构体。 主要调用了alloc_fs_context 这个函数, 这里面,会调用文件系统自定义的init_fs_context回调; 如果没有定义fc->fs_type->init_fs_context, 则会调用legacy_init_fs_context初始化, 这里fc->ops = &legacy_fs_context_ops, 其中legacy_get_tree会调用fc->fs_type->mount,
  3. 调用vfs_get_tree, 这里会调用fc->ops->get_tree。 其中exfat 的实现为。 这里主要调用get_tree_bdev, 并传入了exfat_fill_super来作为回调填充super_block

申请super_block

get_tree_bdev 函数中会申请super_block结构体,主要流程如下:

  1. /**
  2. * get_tree_bdev - Get a superblock based on a single block device
  3. * @fc: The filesystem context holding the parameters
  4. * @fill_super: Helper to initialise a new superblock
  5. */
  6. int get_tree_bdev(struct fs_context *fc,
  7. int (*fill_super)(struct super_block *,
  8. struct fs_context *))
  9. {
  10. struct block_device *bdev;
  11. struct super_block *s;
  12. fmode_t mode = FMODE_READ | FMODE_EXCL;
  13. int error = 0;
  14. if (!(fc->sb_flags & SB_RDONLY))
  15. mode |= FMODE_WRITE;
  16. if (!fc->source)
  17. return invalf(fc, "No source specified");
  18. fc->sb_flags |= SB_NOSEC;
  19. fc->sget_key = bdev;
  20. s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc); /* 1 */
  21. mutex_unlock(&bdev->bd_fsfreeze_mutex);
  22. if (IS_ERR(s)) {
  23. blkdev_put(bdev, mode);
  24. return PTR_ERR(s);
  25. }
  26. if (s->s_root) {
  27. /* Don't summarily change the RO/RW state. */
  28. if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
  29. warnf(fc, "%pg: Can't mount, would change RO state", bdev);
  30. deactivate_locked_super(s);
  31. blkdev_put(bdev, mode);
  32. return -EBUSY;
  33. }
  34. /*
  35. * s_umount nests inside bd_mutex during
  36. * __invalidate_device(). blkdev_put() acquires
  37. * bd_mutex and can't be called under s_umount. Drop
  38. * s_umount temporarily. This is safe as we're
  39. * holding an active reference.
  40. */
  41. up_write(&s->s_umount);
  42. blkdev_put(bdev, mode);
  43. down_write(&s->s_umount);
  44. } else {
  45. s->s_mode = mode;
  46. snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
  47. sb_set_blocksize(s, block_size(bdev));
  48. error = fill_super(s, fc); /* 2 */
  49. if (error) {
  50. deactivate_locked_super(s);
  51. return error;
  52. }
  53. s->s_flags |= SB_ACTIVE;
  54. bdev->bd_super = s;
  55. }
  56. BUG_ON(fc->root);
  57. fc->root = dget(s->s_root);
  58. return 0;
  59. }

(1) alloc super_block 结构体

(2) 调用传入的fill_super函数, 执行文件系统自定义的操作。 这里一般做的是去解析文件系统的元数据, 并填充到文件系统的私有结构体中。

调用回调函数填充super_block

exfat fill_super函数的实现如下:

  1. fs/exfat/super.c
  2. static int exfat_fill_super(struct super_block *sb, struct fs_context *fc)
  3. {
  4. struct exfat_sb_info *sbi = sb->s_fs_info;
  5. struct exfat_mount_options *opts = &sbi->options;
  6. struct inode *root_inode;
  7. int err;
  8. if (opts->allow_utime == (unsigned short)-1)
  9. opts->allow_utime = ~opts->fs_dmask & 0022;
  10. if (opts->discard) {
  11. struct request_queue *q = bdev_get_queue(sb->s_bdev);
  12. if (!blk_queue_discard(q)) {
  13. exfat_warn(sb, "mounting with \"discard\" option, but the device does not support discard");
  14. opts->discard = 0;
  15. }
  16. }
  17. sb->s_flags |= SB_NODIRATIME;
  18. sb->s_magic = EXFAT_SUPER_MAGIC;
  19. sb->s_op = &exfat_sops;
  20. sb->s_time_gran = 10 * NSEC_PER_MSEC;
  21. sb->s_time_min = EXFAT_MIN_TIMESTAMP_SECS;
  22. sb->s_time_max = EXFAT_MAX_TIMESTAMP_SECS;
  23. err = __exfat_fill_super(sb); /* 1 */
  24. if (err) {
  25. exfat_err(sb, "failed to recognize exfat type");
  26. goto check_nls_io;
  27. }
  28. /* set up enough so that it can read an inode */
  29. exfat_hash_init(sb);
  30. if (!strcmp(sbi->options.iocharset, "utf8"))
  31. opts->utf8 = 1;
  32. else {
  33. sbi->nls_io = load_nls(sbi->options.iocharset);
  34. if (!sbi->nls_io) {
  35. exfat_err(sb, "IO charset %s not found",
  36. sbi->options.iocharset);
  37. err = -EINVAL;
  38. goto free_table;
  39. }
  40. }
  41. if (sbi->options.utf8)
  42. sb->s_d_op = &exfat_utf8_dentry_ops;
  43. else
  44. sb->s_d_op = &exfat_dentry_ops;
  45. root_inode = new_inode(sb);
  46. if (!root_inode) {
  47. exfat_err(sb, "failed to allocate root inode");
  48. err = -ENOMEM;
  49. goto free_table;
  50. }
  51. root_inode->i_ino = EXFAT_ROOT_INO;
  52. inode_set_iversion(root_inode, 1);
  53. err = exfat_read_root(root_inode);
  54. if (err) {
  55. exfat_err(sb, "failed to initialize root inode");
  56. goto put_inode;
  57. }
  58. exfat_hash_inode(root_inode, EXFAT_I(root_inode)->i_pos);
  59. insert_inode_hash(root_inode);
  60. sb->s_root = d_make_root(root_inode);
  61. if (!sb->s_root) {
  62. exfat_err(sb, "failed to get the root dentry");
  63. err = -ENOMEM;
  64. goto put_inode;
  65. }
  66. return 0;
  67. put_inode:
  68. iput(root_inode);
  69. sb->s_root = NULL;
  70. free_table:
  71. exfat_free_upcase_table(sbi);
  72. exfat_free_bitmap(sbi);
  73. brelse(sbi->boot_bh);
  74. check_nls_io:
  75. unload_nls(sbi->nls_io);
  76. exfat_free_iocharset(sbi);
  77. sb->s_fs_info = NULL;
  78. kfree(sbi);
  79. return err;
  80. }

主要分为两部分:

(1) 读取exfat 的文件系统信息, 解析后保存在 exfat_sb_info这个结构体中

(2) 设置super_block 的一些重要field, 例如s_op, s_root, s_d_op

装载到全局文件系统树

mount 完成后, 会调用do_new_mount_fc将新的挂载实例添加到系统中。

  1. fs/namespace.c
  2. /*
  3. * create a new mount for userspace and request it to be added into the
  4. * namespace's tree
  5. */
  6. static int do_new_mount(struct path *path, const char *fstype, int sb_flags,
  7. int mnt_flags, const char *name, void *data)
  8. {
  9. struct file_system_type *type;
  10. struct fs_context *fc;
  11. const char *subtype = NULL;
  12. int err = 0;
  13. if (!fstype)
  14. return -EINVAL;
  15. type = get_fs_type(fstype); /* 1 */
  16. if (!type)
  17. return -ENODEV;
  18. if (type->fs_flags & FS_HAS_SUBTYPE) {
  19. subtype = strchr(fstype, '.');
  20. if (subtype) {
  21. subtype++;
  22. if (!*subtype) {
  23. put_filesystem(type);
  24. return -EINVAL;
  25. }
  26. }
  27. }
  28. fc = fs_context_for_mount(type, sb_flags); /* 2 */
  29. put_filesystem(type);
  30. if (IS_ERR(fc))
  31. return PTR_ERR(fc);
  32. if (subtype)
  33. err = vfs_parse_fs_string(fc, "subtype",
  34. subtype, strlen(subtype));
  35. if (!err && name)
  36. err = vfs_parse_fs_string(fc, "source", name, strlen(name));
  37. if (!err)
  38. err = parse_monolithic_mount_data(fc, data);
  39. if (!err && !mount_capable(fc))
  40. err = -EPERM;
  41. if (!err)
  42. err = vfs_get_tree(fc); /* 3 */
  43. if (!err)
  44. err = do_new_mount_fc(fc, path, mnt_flags); /* 4 */
  45. put_fs_context(fc);
  46. return err;
  47. }

这里主要调用了 do_new_mount_fc , 创建新的挂载实例关联到系统中, 这里面数据结构涉及很多,且很混乱,其中的关系暂时没有梳理清楚。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/我家自动化/article/detail/217428
推荐阅读
相关标签
  

闽ICP备14008679号