赞
踩
近几天调研了一下fuse编写文件系统的方法,先尝试拿fuse写一套类似tmpfs的简易文件系统,文件信息都保留在内存中。文件系统需要一个数据结构来管理文件节点 inode,正好《c语言实现map-使用内核红黑树》一文将rbtree结构拿出来了可以用上。
目标:支持文件读写操作:echo、cat;支持目录操作ls、mkdir、cd。
FUSE(Filesystem in Userspace)为Linux下用户态的文件系统接口,通常情况文件系统的操作在内核态处理,存在调试不方便,开发效率低的情况,使用FUSE可以在用户空间进行方便地开发、调试。
如图所示,用户层的 list 操作,通过内核VFS\FUSE中转,在用户层通过libfuse到自定义程序hello中进行处理、返回。这种操作是非常灵活的,即list操作的结果是由你的应用来决定的,也就是说你能实现list展示你的自定义列表、自定义数据项等信息。当然,灵活性所需要付出的代价:用户态实现的操作系统会引入“内核态/用户态切换”额外的开销,进而影响性能。
fuse安装比较简单:
1、内核需要开启fuse的支持(默认带)
2、准备安装包 fuse-2.9.7.tar.gz
3、源码安装./configure --prefix=/usr && make -j4 && make install(编译过程提示我缺库,util-linux-ng-2.17.1.tar.gz)
编译过程中需要指定库文件:-lfuse -pthread
首先需要定义文件系统支持的操作函数,填在结构体 struct fuse_operations 中,其他的可以详见[附录]:
- static struct fuse_operations memfs_oper = {
-
- .getattr = memfs_getattr,
- .access = memfs_access,
- .readdir = memfs_readdir,
-
- .open = memfs_open,
- .read = memfs_read,
- .write = memfs_write,
- .release = memfs_release,
-
- .mknod = memfs_mknod,
- .unlink = memfs_unlink,
-
- .mkdir = memfs_mkdir,
- .rmdir = memfs_rmdir,
-
- .statfs = memfs_statfs,
- };
主要包含了一些基础操作:
1、新建目录:mkdir、getattr;删除目录:rmdir;遍历目录:readdir;进入目录:access;
2、新建文件:getattr、mknod、open、write、read、release;删除文件:unlink;
3、状态查看:statfs;
然后看下数据结构,memfs为全局变量(多个终端操作为多线程访问该变量),并定义、初始化了statvfs结构来维护系统状态信息,定义了文件块BlockSize大小为4096,块上限MaxBlocks为1048576个,文件数MaxInode为1048576个:
- struct memfs {
- struct rb_root root;
- struct statvfs statvfs;
- pthread_mutex_t lock;
- pthread_mutex_t lock_write;
- };
-
- #define FUSE_SUPER_MAGIC 0x65735546
- #define BLOCKSIZE (1024UL * 4)
-
- #define MAX_NAME 255
- #define MAX_INODE (1024UL * 1024)
- #define MAX_BLOCKS (1024UL * 1024)
-
- /* Set global instance */
- static struct memfs memfs = {
- .root = RB_ROOT,
- .statvfs = {
-
- .f_bsize = BLOCKSIZE, /* Filesystem block size */
- .f_frsize = BLOCKSIZE, /* Fragment size */
-
- .f_blocks = MAX_BLOCKS, /* Size of fs in f_frsize units */
- .f_bfree = MAX_BLOCKS, /* Number of free blocks */
- .f_bavail = MAX_BLOCKS, /* Number of free blocks for unprivileged users */
-
- .f_files = MAX_INODE, /* Number of inodes */
- .f_ffree = MAX_INODE, /* Number of free inodes */
- .f_favail = MAX_INODE, /* Number of free inodes for unprivileged users */
-
- .f_fsid = 0x0123456701234567, /* Filesystem ID */
- // .f_flags = 0, /* Mount flags */
- .f_namemax = MAX_NAME, /* Maximum filename length */
- },
- .lock = PTHREAD_MUTEX_INITIALIZER,
- .lock_write = PTHREAD_MUTEX_INITIALIZER,
- };
-
- /* File inodes store in rbtree */
- struct memfs_file {
- char *path; /* File path */
- void *data; /* File content */
- u8 free_on_delete;
-
- struct stat vstat; /* File stat */
- pthread_mutex_t lock;
-
- struct rb_node node;
- };
所以外部执行df,df -i的时候,将调用.statfs进行状态查询:
- static int memfs_statfs(const char *path, struct statvfs *stbuf)
- {
- printf("%s: %s\n", __FUNCTION__, path);
- *stbuf = memfs.statvfs;
- return 0;
- }
文件、目录节点均使用红黑树进行维护,相关的操作请看《c语言实现map-使用内核红黑树》;
由于数据结构将被多线程使用,所以使用mutex互斥锁对其进行保护;
getattr为非常常用的方法,用于查询节点是否存在、查询节点属性等动作:
- static int memfs_getattr(const char *path, struct stat *stbuf)
- {
- int res = 0;
- printf("%s: %s\n", __FUNCTION__, path);
- memset(stbuf, 0, sizeof(struct stat));
-
- pthread_mutex_lock(&memfs.lock);
- struct memfs_file *pf = __search(&memfs.root, path);
- if (!pf) {
- res = -ENOENT;
- }
- else {
- *stbuf = pf->vstat;
- }
- pthread_mutex_unlock(&memfs.lock);
-
- return res;
- }
进入目录、创建目录、删除目录:
- static int memfs_access(const char *path, int mask)
- {
- int res = 0;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pthread_mutex_lock(&memfs.lock);
- struct memfs_file *pf = __search(&memfs.root, path);
- if (!pf) {
- res = -ENOENT;
- }
- pthread_mutex_unlock(&memfs.lock);
-
- return res;
- }
-
- static int memfs_mkdir(const char *path, mode_t mode)
- {
- int res = 0;
- struct memfs_file *pf = NULL;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pf = __new(path, S_IFDIR | mode);
- if (!pf) {
- return -ENOMEM;
- }
-
- pthread_mutex_lock(&memfs.lock);
- res = __insert(&memfs.root, pf);
- if (res != SUCCESS) {
- __free(pf);
- res = -EEXIST;
- }
- pthread_mutex_unlock(&memfs.lock);
-
- __do_update_times(pf, U_ALL);
- return res;
- }
-
- static int memfs_rmdir(const char *path)
- {
- int res = 0;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pthread_mutex_lock(&memfs.lock);
- if (__delete(&memfs.root, path) < 0) {
- res = -ENOENT;
- }
- pthread_mutex_unlock(&memfs.lock);
- return res;
- }
试验1:cd /mnt/fuse && mkdir 1 2 3 && rmdir 1 2 3
memfs_getattr: / memfs_access: / memfs_getattr: /1 memfs_mkdir: /1 memfs_getattr: /1 memfs_getattr: /2 memfs_mkdir: /2 memfs_getattr: /2 memfs_getattr: /3 memfs_mkdir: /3 memfs_getattr: /3 memfs_getattr: / memfs_getattr: /1 memfs_rmdir: /1 memfs_getattr: /2 memfs_rmdir: /2 memfs_getattr: /3 memfs_rmdir: /3
文件操作:创建文件mknod、打开文件open、关闭文件release、删除文件unlink;
注意mknod、unlink的时候需要更新statvfs中的inode计数器。
- static int memfs_mknod(const char *path, mode_t mode, dev_t rdev)
- {
- int res = 0;
- struct memfs_file *pf = NULL;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pf = __new(path, mode);
- if (!pf) {
- return -ENOMEM;
- }
-
- pthread_mutex_lock(&memfs.lock);
- res = __insert(&memfs.root, pf);
- if (res != SUCCESS) {
- __free(pf);
- res = -EEXIST;
- }
-
- memfs.statvfs.f_favail = --memfs.statvfs.f_ffree;
-
- pthread_mutex_unlock(&memfs.lock);
- return res;
- }
-
- static int memfs_open(const char *path, struct fuse_file_info *fi)
- {
- int res = 0;
- struct memfs_file *pf = NULL;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pthread_mutex_lock(&memfs.lock);
- pf = __search(&memfs.root, path);
- if (!pf) {
- if ((fi->flags & O_ACCMODE) == O_RDONLY ||
- !(fi->flags & O_CREAT)) {
- res = -ENOENT;
- goto unlock;
- }
- pf = __new(path, S_IFREG | 0755);
- __insert(&memfs.root, pf);
- }
- else {
- if (S_ISDIR(pf->vstat.st_mode)) {
- res = -EISDIR;
- goto unlock;
- }
- }
-
- fi->fh = (unsigned long)pf;
- unlock:
- pthread_mutex_unlock(&memfs.lock);
-
- return res;
- }
-
- static int memfs_release(const char *path, struct fuse_file_info *fi)
- {
- printf("%s: %s\n", __FUNCTION__, path);
- return 0;
- }
-
- static int memfs_unlink(const char *path)
- {
- int res = 0, blocks = 0;
- printf("%s: %s\n", __FUNCTION__, path);
-
- pthread_mutex_lock(&memfs.lock);
-
- blocks = __delete(&memfs.root, path);
- if (blocks < 0) {
- res = -ENOENT;
- goto unlock;
- }
-
- memfs.statvfs.f_bfree = memfs.statvfs.f_bavail += blocks;
- memfs.statvfs.f_favail = ++memfs.statvfs.f_ffree;
-
- unlock:
- pthread_mutex_unlock(&memfs.lock);
-
- return res;
- }
文件读写操作:read、write;
注意write过程中需要对statvfs的blocks计数器进行更新,并调用__do_update_times对文件时间戳更新;
思路是open获取文件节点后,将节点挂在struct fuse_file_info结构的fh成员内,文件内容写在了memfs_file::data中;
该例子仅对单次写入进行加锁保护,但并没有加入文件级别的锁,没解决同时多人打开文件写的问题。
- #define U_ATIME (1 << 0)
- #define U_CTIME (1 << 1)
- #define U_MTIME (1 << 2)
- #define U_ALL (U_ATIME | U_CTIME | U_MTIME)
-
- static inline void __do_update_times(struct memfs_file *pf, int which)
- {
- time_t now = time(0);
- if (which & U_ATIME) {
- pf->vstat.st_atime = now;
- }
- if (which & U_CTIME) {
- pf->vstat.st_ctime = now;
- }
- if (which & U_MTIME) {
- pf->vstat.st_mtime = now;
- }
- }
-
- static int memfs_write(const char *path,
- const char *buf, size_t size, off_t offset,
- struct fuse_file_info *fi)
- {
- struct memfs_file *pf = (struct memfs_file *)fi->fh;
- printf("%s: %s, size: %zd\n", __FUNCTION__, path, size);
-
- // TODO Check whether the file was opened for reading
-
- blkcnt_t req_blocks = (offset + size + BLOCKSIZE - 1) / BLOCKSIZE;
-
- pthread_mutex_lock(&pf->lock);
- if (pf->vstat.st_blocks < req_blocks) {
- void *newdata = realloc(pf->data, req_blocks * BLOCKSIZE);
- if (!newdata) {
- return -ENOMEM;
- }
-
- memfs.statvfs.f_bfree = memfs.statvfs.f_bavail -= req_blocks - pf->vstat.st_blocks;
- pf->data = newdata;
- pf->vstat.st_blocks = req_blocks;
- }
- memcpy(pf->data + offset, buf, size);
-
- // Update file size if necessary
- off_t minsize = offset + size;
- if (minsize > pf->vstat.st_size) {
- pf->vstat.st_size = minsize;
- }
- pthread_mutex_unlock(&pf->lock);
-
- __do_update_times(pf, U_ALL);
- return size;
- }
-
- static int memfs_read(const char *path,
- char *buf, size_t size, off_t offset,
- struct fuse_file_info *fi)
- {
- struct memfs_file *pf = (struct memfs_file *)fi->fh;
- printf("%s: %s\n", __FUNCTION__, path);
-
- // TODO Check whether the file was opened for reading
-
- off_t filesize = pf->vstat.st_size;
- if (offset > filesize) {
- return 0;
- }
-
- size_t avail = filesize - offset;
- size_t rsize = (size < avail) ? size : avail;
- memcpy(buf, pf->data + offset, rsize);
-
- __do_update_times(pf, U_ATIME);
- return rsize;
- }
试验2:cd /mnt/fuse && echo "Helloworld" >test.txt && cat test.txt && rm -rf test.txt
memfs_getattr: / memfs_access: / memfs_getattr: /test.txt memfs_mknod: /test.txt memfs_getattr: /test.txt memfs_open: /test.txt memfs_write: /test.txt, size: 11 memfs_release: /test.txt memfs_getattr: / memfs_getattr: /test.txt memfs_open: /test.txt memfs_read: /test.txt memfs_getattr: /test.txt memfs_release: /test.txt memfs_getattr: /test.txt memfs_unlink: /test.txt
最后是遍历目录readdir的实现,basename、dirname字符串处理麻烦一点点,基本思路是先找到父目录节点,然后后序遍历直到离开父目录(rbtree能够实现范围查找,hashmap则不行)。
文件名处理也可以用#include <libgen.h>里面的方法,为了深入理解处理,本文使用__is_parent来代替basename()函数;
filler函数中填写的文件名为basename,不能带'/';
- /*
- * @parent - "/tmp"
- * @path - "/tmp/1.txt"
- */
- static inline const char *__is_parent(const char *parent, const char *path)
- {
- const char delim = '/';
-
- if (parent[1] == '\0' && parent[0] == '/' && path[0] == '/') {
- return path;
- }
-
- while (*parent != '\0' && *path != '\0' && *parent == *path) {
- ++parent, ++path;
- }
- return (*parent == '\0' && *path == delim) ? path : NULL;
- }
-
- static int __do_readdir(const char *dirname, void *buf, fuse_fill_dir_t filler)
- {
- struct rb_node *node = NULL;
- struct memfs_file *pentry = __search(&memfs.root, dirname);
- if (!pentry) {
- return -ENOENT;
- }
- else if (!S_ISDIR(pentry->vstat.st_mode)) {
- return -ENOTDIR;
- }
-
- for (node = rb_next(&pentry->node); node; node = rb_next(node)) {
- const struct memfs_file *pf = rb_entry(node, struct memfs_file, node);
- const char *basename = __is_parent(dirname, pf->path);
-
- if (!basename) {
- break;
- }
- else if (strchr(basename + 1, '/')) {
- continue;
- }
- filler(buf, basename + 1, &pf->vstat, 0);
- printf(" readdir: %10s, path: %10s\n", basename, pf->path);
- }
-
- return 0;
- }
-
- static int memfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
- off_t offset, struct fuse_file_info *fi)
- {
- int res = 0;
- printf("%s: %s\n", __FUNCTION__, path);
-
- filler(buf, ".", NULL, 0);
-
- if (strcmp(path, "/") != 0) {
- filler(buf, "..", NULL, 0);
- }
-
- pthread_mutex_lock(&memfs.lock);
- res = __do_readdir(path, buf, filler);
- pthread_mutex_unlock(&memfs.lock);
-
- return res;
- }
试验3:cd /mnt/fuse && mkdir 1 2 3 4 && ls -l
memfs_getattr: / memfs_access: / memfs_getattr: /1 memfs_mkdir: /1 memfs_getattr: /1 memfs_getattr: /2 memfs_mkdir: /2 memfs_getattr: /2 memfs_getattr: /3 memfs_mkdir: /3 memfs_getattr: /3 memfs_getattr: /4 memfs_mkdir: /4 memfs_getattr: /4 memfs_getattr: / memfs_readdir: / readdir: /1, path: /1 readdir: /2, path: /2 readdir: /3, path: /3 readdir: /4, path: /4
本文对fuse开发文件系统进行了探索,并简单实现了基于内存的文件系统,开发、调试过程是比较方便,遇到不会写的函数就参考一下fuse/example底下的案例,或者看下sshfs的源码。另外线程安全的问题也是需要在应用中重点考虑的部分。
然后尝试大批小文件写入发现速度达到10000ops,对比了一下tmpfs居然有40000ops的速度,果然多了两层内核态/用户态的切换性能影响还是挺大的。所以对于重扩展不重性能的应用,可以考虑fuse去实现(网络文件协议挂载到本地),但对于性能型应用还是考虑调用api比较合适。
附录:
- /* from fuse.h */
- struct fuse_operations
- {
- /** Get file attributes. */
- int (*getattr) (const char *, struct stat *);
- /** Read the target of a symbolic link */
- int (*readlink) (const char *, char *, size_t);
- /** Create a file node */
- int (*mknod) (const char *, mode_t, dev_t);
- /** Create a directory */
- int (*mkdir) (const char *, mode_t);
- /** Remove a file */
- int (*unlink) (const char *);
- /** Remove a directory */
- int (*rmdir) (const char *);
- /** Create a symbolic link */
- int (*symlink) (const char *, const char *);
- /** Rename a file */
- int (*rename) (const char *, const char *);
- /** Create a hard link to a file */
- int (*link) (const char *, const char *);
- /** Change the permission bits of a file */
- int (*chmod) (const char *, mode_t);
- /** Change the owner and group of a file */
- int (*chown) (const char *, uid_t, gid_t);
- /** Change the size of a file */
- int (*truncate) (const char *, off_t);
- /** Change the access and/or modification times of a file */
- int (*utime) (const char *, struct utimbuf *);
- /** File open operation */
- int (*open) (const char *, struct fuse_file_info *);
- /** Read data from an open file */
- int (*read) (const char *, char *, size_t, off_t,
- struct fuse_file_info *);
- /** Write data to an open file */
- int (*write) (const char *, const char *, size_t, off_t,
- struct fuse_file_info *);
- /** Get file system statistics */
- int (*statfs) (const char *, struct statvfs *);
- /** Possibly flush cached data */
- int (*flush) (const char *, struct fuse_file_info *);
- /** Release an open file */
- int (*release) (const char *, struct fuse_file_info *);
- /** Synchronize file contents */
- int (*fsync) (const char *, int, struct fuse_file_info *);
- /** Set extended attributes */
- int (*setxattr) (const char *, const char *, const char *, size_t, int);
- /** Get extended attributes */
- int (*getxattr) (const char *, const char *, char *, size_t);
- /** List extended attributes */
- int (*listxattr) (const char *, char *, size_t);
- /** Remove extended attributes */
- int (*removexattr) (const char *, const char *);
- /** Open directory */
- int (*opendir) (const char *, struct fuse_file_info *);
- /** Read directory */
- int (*readdir) (const char *, void *, fuse_fill_dir_t, off_t,
- struct fuse_file_info *);
- /** Release directory */
- int (*releasedir) (const char *, struct fuse_file_info *);
- /** Synchronize directory contents */
- int (*fsyncdir) (const char *, int, struct fuse_file_info *);
- /** Initialize filesystem */
- void *(*init) (struct fuse_conn_info *conn);
- /** Clean up filesystem */
- void (*destroy) (void *);
- /** Check file access permissions */
- int (*access) (const char *, int);
- /** Create and open a file */
- int (*create) (const char *, mode_t, struct fuse_file_info *);
- /** Change the size of an open file */
- int (*ftruncate) (const char *, off_t, struct fuse_file_info *);
- /** Get attributes from an open file */
- int (*fgetattr) (const char *, struct stat *, struct fuse_file_info *);
- /** Perform POSIX file locking operation */
- int (*lock) (const char *, struct fuse_file_info *, int cmd,
- struct flock *);
- /**
- * Change the access and modification times of a file with
- * nanosecond resolution
- */
- int (*utimens) (const char *, const struct timespec tv[2]);
- /** Map block index within file to block index within device */
- int (*bmap) (const char *, size_t blocksize, uint64_t *idx);
- /** Ioctl */
- int (*ioctl) (const char *, int cmd, void *arg,
- struct fuse_file_info *, unsigned int flags, void *data);
- /** Poll for IO readiness events */
- int (*poll) (const char *, struct fuse_file_info *,
- struct fuse_pollhandle *ph, unsigned *reventsp);
- /** Write contents of buffer to an open file */
- int (*write_buf) (const char *, struct fuse_bufvec *buf, off_t off,
- struct fuse_file_info *);
- /** Store data from an open file in a buffer */
- int (*read_buf) (const char *, struct fuse_bufvec **bufp,
- size_t size, off_t off, struct fuse_file_info *);
- /** Perform BSD file locking operation */
- int (*flock) (const char *, struct fuse_file_info *, int op);
- /** Allocates space for an open file */
- int (*fallocate) (const char *, int, off_t, off_t,
- struct fuse_file_info *);
- };
参考文章:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。