在这里插入图片描述

系列文章目录

Linux 内核设计与实现
深入理解 Linux 内核
Linux 设备驱动程序
Linux设备驱动开发详解
深入理解Linux虚拟内存管理（一）
深入理解Linux虚拟内存管理（二）
深入理解Linux虚拟内存管理（三）
深入理解Linux虚拟内存管理（四）
深入理解Linux虚拟内存管理（五）
深入理解Linux虚拟内存管理（六）
深入理解Linux虚拟内存管理（七）
深入理解Linux虚拟内存管理（八）
深入理解Linux虚拟内存管理（九）

文章目录

系列文章目录
一、共享内存虚拟文件系统
- 1、初始化 shmfs
- - （1）init_tmpfs
  - - ① ⇐ DECLARE_FSTYPE
    - ② ⟺ register_filesystem
    - ③ ⟺ kern_mount
    - ④ ⇒ shmem_set_size
  - （2）shmem_read_super
  - - ① ⇐ SHMEM_SB
    - ② ⇒ shmem_get_inode
    - ③ ⇔ d_alloc_root
  - （3）shmem_set_size
- 2、在 tmpfs 中创建文件
- - （1）shmem_create
  - （2）shmem_mknod
  - - ① ⇒ shmem_get_inode
    - ② ⇔ d_instantiate
  - （3）shmem_get_inode
  - - ① ⇔ new_inode
    - ② ⇔ init_special_inode
    - ③ ⇐ shmem_aops、shmem_file_operations
    - ④ ⇐ dcache_dir_ops
- 3、tmpfs 中的文件操作
- - （1）内存映射
  - - ① shmem_mmap
    - - ⑴ ⇒ shmem_nopage
  - （2）读取文件
  - - ① shmem_file_read
    - - ⑴ ⇐ read_descriptor_t
      - ⑵ ⇒ do_shmem_file_read
    - ② do_shmem_file_read
    - - ⑴ ⇒ file_read_actor
      - ⑵ ⇒ shmem_getpage
    - ③ file_read_actor
  - （3）写入文件
  - - ① shmem_file_write
  - （4）符号链接
  - - ① shmem_symlink
    - ② shmem_readlink_inline
    - ③ shmem_follow_link_inline
    - ④ shmem_readlink
  - （5）同步文件
  - - ① shmem_sync_file
- 4、tmpfs 中的索引节点操作
- - （1）截取
  - - ① shmem_truncate
    - ② shmem_truncate_indirect
    - ③ shmem_truncate_direct
    - ④ shmem_free_swp
  - （2）链接
  - - ① shmem_link
  - （3）解除链接
  - - ① shmem_unlink
  - （4）创建目录
  - - ① shmem_mkdir
  - （5）移除目录
  - - ① shmem_rmdir
    - ② shmem_empty
    - ③ shmem_positive
- 5、虚拟文件中的缺页中断
- - （1）缺页中断时读取页面
  - - ① shmem_nopage
    - ② shmem_getpage
  - （2）定位交换页面
  - - ① shmem_alloc_entry
    - ② shmem_swp_entry
- 6、交换空间交互
- - （1）shmem_writepage
  - （2）shmem_unuse
  - （3）shmem_unuse_inode
  - （4）shmem_find_swp
- 7、建立共享区
- - （1）shmem_zero_setup
  - （2）shmem_file_setup
- 8、System V IPC
- - （1）创建一个 SYSV 共享区
  - - ① sys_shmget
    - ② newseg
  - （2）附属一个 SYSV 共享区
  - - ① sys_shmat
符号

一、共享内存虚拟文件系统

1、初始化 shmfs

（1）init_tmpfs

这个函数用于注册和挂载 tmpfs 及 shmemfs 文件系统。

// mm/shmem.c
// shm文件系统只有在编译时定义CONFIG_TMPFS的情况下可以挂载。即使没有
// 指定，也会因 fork() 而为匿名共享内存建立tmpfs。
#ifdef CONFIG_TMPFS
/* type "shm" will be tagged obsolete in 2.5 */
// 声明在文件 <linux/fs.h> 中的 DECLARE_FSTYPE(), 声明了 tmpfs_fs_type 是
// struct file_system_type, 并填充了 4 个字段。"tmpfs" 是它的可读名字。shmem_read_super()
// 函数用于为文件系统读取超级块(超级块的细节以及它们如何匹配文件系统已经超出本书范
// 畴)。FS_LITTER 是一个标志位，用于表明应该在 dcache 中维护文件系统树。最后，这个宏
// 设置文件系统的模块所有者成为载入文件系统的模块。
static DECLARE_FSTYPE(shmem_fs_type, "shm", shmem_read_super, FS_LITTER);
static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER);
#else
static DECLARE_FSTYPE(tmpfs_fs_type, "tmpfs", shmem_read_super, FS_LITTER|FS_NOMOUNT);
#endif
static struct vfsmount *shm_mnt;

// __init 放置该函数在初始化部分。这意味着，内核启动完毕后，这个函数的代码将被移除。
static int __init init_tmpfs(void)
{
	int error;
	// 注册文件系统为tmpfs_fs_type类型，这已在第1433行进行了声明。如果失
	// 败，跳转到 out3, 返回适当的错误。
	error = register_filesystem(&tmpfs_fs_type);
	if (error) {
		printk(KERN_ERR "Could not register tmpfs\n");
		goto out3;
	}
// 如果在配置时指定了 tmpfs ,那么注册shmem文件系统。如果失败，跳转到
// out2, 解除tmpfs_fs_type的注册并返回错误。
#ifdef CONFIG_TMPFS
	error = register_filesystem(&shmem_fs_type);
	if (error) {
		printk(KERN_ERR "Could not register shm fs\n");
		goto out2;
	}
	// 如果由设备文件系统(devfs)管理/dev/,则创建一个新的shm目录。如果内核没有
	// 使用devfs,则系统管理员必须手工创建该目录。
	devfs_mk_dir(NULL, "shm", NULL);
#endif
	// kern_mount()在内部挂载一个文件系统。换言之，该文件系统被挂载并被激活，但
	// 在VFS中不被任何用户可见。其挂载点是shm_mnt,位于shmem.c文件中，其类型是
	// struct vfsmount。 在后期需要搜索文件系统并卸载这个变量。
	shm_mnt = kern_mount(&tmpfs_fs_type);
	// 确保正确地卸载文件系统, 但是如果没有，则跳转到out1, 解除文件系统的注
	// 册, 并返回错误。
	if (IS_ERR(shm_mnt)) {
		error = PTR_ERR(shm_mnt);
		printk(KERN_ERR "Could not kern_mount tmpfs\n");
		goto out1;
	}

	/* The internal instance should not do size checking */
	// 函数shmem_set_size() (见L. 1. 3小节)用于设置文件系统中创建的块数及索引节
	// 点数的最大值。
	shmem_set_size(SHMEM_SB(shm_mnt->mnt_sb), ULONG_MAX, ULONG_MAX);
	return 0;

out1:
#ifdef CONFIG_TMPFS
	unregister_filesystem(&shmem_fs_type);
out2:
#endif
	unregister_filesystem(&tmpfs_fs_type);
out3:
	shm_mnt = ERR_PTR(error);
	return error;
}
// 在这种情况下，module_init()表明了在载入模块上应当调用init_shmem_fs()以及
// 如何直接编译进内核，在系统启动时调用这个函数。
module_init(init_tmpfs)

① ⇐ DECLARE_FSTYPE

// include/linux/fs.h
struct file_system_type {
	const char *name;
	int fs_flags;
	struct super_block *(*read_super) (struct super_block *, void *, int);
	struct module *owner;
	struct file_system_type * next;
	struct list_head fs_supers;
};

#define DECLARE_FSTYPE(var,type,read,flags) \
struct file_system_type var = { \
	name:		type, \
	read_super:	read, \
	fs_flags:	flags, \
	owner:		THIS_MODULE, \
}

② ⟺ register_filesystem

// fs/super.c
/**
 *	register_filesystem - register a new filesystem
 *	@fs: the file system structure
 *
 *	Adds the file system passed to the list of file systems the kernel
 *	is aware of for mount and other syscalls. Returns 0 on success,
 *	or a negative errno code on an error.
 *
 *	The &struct file_system_type that is passed is linked into the kernel 
 *	structures and must not be freed until the file system has been
 *	unregistered.
 */
 
int register_filesystem(struct file_system_type * fs)
{
	int res = 0;
	struct file_system_type ** p;

	if (!fs)
		return -EINVAL;
	if (fs->next)
		return -EBUSY;
	INIT_LIST_HEAD(&fs->fs_supers);
	write_lock(&file_systems_lock);
	p = find_filesystem(fs->name);
	if (*p)
		res = -EBUSY;
	else
		*p = fs;
	write_unlock(&file_systems_lock);
	return res;
}

③ ⟺ kern_mount

// fs/super.c
struct vfsmount *kern_mount(struct file_system_type *type)
{
	return do_kern_mount(type->name, 0, (char *)type->name, NULL);
}

④ ⇒ shmem_set_size

shmem_set_size 函数

（2）shmem_read_super

这是文件系统提供的回调函数，用于读取超级块。对于普通的文件系统，这可以从磁盘读取细节信息，但是，由于这个文件系统基于 RAM ，相反它产生一个 struct super_block 。

// mm/shmem.c
// 参数如下所示:
//	sb 是产生的超级块。
//	data 包括了一些参数。
//  silent 在这个函数中未使用。
static struct super_block *shmem_read_super(struct super_block *sb, void *data, int silent)
{
	struct inode *inode;
	struct dentry *root;
	unsigned long blocks, inodes;
	// 设置缺省模式，uid和gid。这些可能覆盖挂载选项中的参数。
	int mode   = S_IRWXUGO | S_ISVTX;
	uid_t uid = current->fsuid;
	gid_t gid = current->fsgid;
	// 每个super_block 都允许具有一个特定文件系统的结构，该结构包括一个称为
	// super_block->u 的联合结构。 宏SHMEM_SB()返回联合结构中所包含的
	// struct shmem_sb_info。
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
	struct sysinfo si;

	/*
	 * Per default we only allow half of the physical ram per
	 * tmpfs instance
	 */
	// si_meminfo()产生struct sysinfo,包括了全部内存，可用内存和已用内存的统计数
	// 据。这个函数在arch/i386/mm/init.c文件中进行了定义，它是架构相关的。
	si_meminfo(&si);
	// 缺省情况下，只允许文件系统消耗物理内存的一半。
	blocks = inodes = si.totalram / 2;

// 如果tmpfs可用，这将解析挂载选项，并允许覆盖缺省值。
#ifdef CONFIG_TMPFS
	if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, &inodes))
		return NULL;
#endif
	// 获取锁保护的 sbinfo, 它是 super_block 中的 struct shmem_sb_info。
	spin_lock_init(&sbinfo->stat_lock);
	sbinfo->max_blocks = blocks;
	sbinfo->free_blocks = blocks;
	sbinfo->max_inodes = inodes;
	sbinfo->free_inodes = inodes;
	sb->s_maxbytes = SHMEM_MAX_BYTES;
	sb->s_blocksize = PAGE_CACHE_SIZE;
	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
	// 产生sb和sbinfo字段。
	sb->s_magic = TMPFS_MAGIC;
	// shmem_ops是超级块结构的函数指针,用于重新挂载文件系统和删除索引节点。
	sb->s_op = &shmem_ops;
	// 这个块分配特定的索引节点，用于表示文件系统的根节点。
	inode = shmem_get_inode(sb, S_IFDIR | mode, 0);
	if (!inode)
		return NULL;
	// 设置新文件系统的根部 uid和 gid。
	inode->i_uid = uid;
	inode->i_gid = gid;
	// 实现在 fs/dcache.c 中
	root = d_alloc_root(inode);
	if (!root) {
		iput(inode);
		return NULL;
	}
	// 设置根索引节点至 super_block 中。
	sb->s_root = root;
	// 返回产生的超级块。
	return sb;
}

① ⇐ SHMEM_SB

// mm/shmem.c
#define SHMEM_SB(sb) (&sb->u.shmem_sb)

// include/linux/fs.h
struct super_block {
	struct list_head	s_list;		/* Keep this first */
	kdev_t			s_dev;
	unsigned long		s_blocksize;
	unsigned char		s_blocksize_bits;
	unsigned char		s_dirt;
	unsigned long long	s_maxbytes;	/* Max file size */
	struct file_system_type	*s_type;
	struct super_operations	*s_op;
	struct dquot_operations	*dq_op;
	struct quotactl_ops	*s_qcop;
	unsigned long		s_flags;
	unsigned long		s_magic;
	struct dentry		*s_root;
	struct rw_semaphore	s_umount;
	struct semaphore	s_lock;
	int			s_count;
	atomic_t		s_active;

	struct list_head	s_dirty;	/* dirty inodes */
	struct list_head	s_locked_inodes;/* inodes being synced */
	struct list_head	s_files;

	struct block_device	*s_bdev;
	struct list_head	s_instances;
	struct quota_info	s_dquot;	/* Diskquota specific options */

	union {
		struct minix_sb_info	minix_sb;
		struct ext2_sb_info	ext2_sb;
		struct ext3_sb_info	ext3_sb;
		struct hpfs_sb_info	hpfs_sb;
		struct ntfs_sb_info	ntfs_sb;
		struct msdos_sb_info	msdos_sb;
		struct isofs_sb_info	isofs_sb;
		struct nfs_sb_info	nfs_sb;
		struct sysv_sb_info	sysv_sb;
		struct affs_sb_info	affs_sb;
		struct ufs_sb_info	ufs_sb;
		struct efs_sb_info	efs_sb;
		struct shmem_sb_info	shmem_sb;
		struct romfs_sb_info	romfs_sb;
		struct smb_sb_info	smbfs_sb;
		struct hfs_sb_info	hfs_sb;
		struct adfs_sb_info	adfs_sb;
		struct qnx4_sb_info	qnx4_sb;
		struct reiserfs_sb_info	reiserfs_sb;
		struct bfs_sb_info	bfs_sb;
		struct udf_sb_info	udf_sb;
		struct ncp_sb_info	ncpfs_sb;
		struct usbdev_sb_info   usbdevfs_sb;
		struct jffs2_sb_info	jffs2_sb;
		struct cramfs_sb_info	cramfs_sb;
		void			*generic_sbp;
	} u;
	/*
	 * The next field is for VFS *only*. No filesystems have any business
	 * even looking at it. You had been warned.
	 */
	struct semaphore s_vfs_rename_sem;	/* Kludge */

	/* The next field is used by knfsd when converting a (inode number based)
	 * file handle into a dentry. As it builds a path in the dcache tree from
	 * the bottom up, there may for a time be a subpath of dentrys which is not
	 * connected to the main tree.  This semaphore ensure that there is only ever
	 * one such free path per filesystem.  Note that unconnected files (or other
	 * non-directories) are allowed, but not unconnected diretories.
	 */
	struct semaphore s_nfsd_free_path_sem;
};

② ⇒ shmem_get_inode

shmem_get_inode 函数

③ ⇔ d_alloc_root

// fs/dcache.c
/**
 * d_alloc_root - allocate root dentry
 * @root_inode: inode to allocate the root for
 *
 * Allocate a root ("/") dentry for the inode given. The inode is
 * instantiated and returned. %NULL is returned if there is insufficient
 * memory or the inode passed is %NULL.
 */
 
struct dentry * d_alloc_root(struct inode * root_inode)
{
	struct dentry *res = NULL;

	if (root_inode) {
		res = d_alloc(NULL, &(const struct qstr) { "/", 1, 0 });
		if (res) {
			res->d_sb = root_inode->i_sb;
			res->d_parent = res;
			d_instantiate(res, root_inode);
		}
	}
	return res;
}

（3）shmem_set_size

这个函数更新文件系统中可用块和索引节点的数量。在文件系统挂载和卸载时进行设置。

// mm/shmem.c
// 这些参数描述了文件系统超级块的信息，块的最大数量(max_blocks)和索引节点的
// 最大数量(max_inodes)。
static int shmem_set_size(struct shmem_sb_info *info,
			  unsigned long max_blocks, unsigned long max_inodes)
{
	int error;
	unsigned long blocks, inodes;
	// 锁定超级块信息自旋锁。
	spin_lock(&info->stat_lock);
	// 计算文件系统中当前使用的块数。在初始挂载时，这并不重要，然而，如果重新挂载
	// 文件系统,该函数必须保证新的文件系统不会太小。
	blocks = info->max_blocks - info->free_blocks;
	// 计算当前使用的索引节点数。
	inodes = info->max_inodes - info->free_inodes;
	error = -EINVAL;
	// 如果重新挂载的文件系统没有足够的块存放当前信息，则跳转到out,并返
	// 回-EINVAL。
	if (max_blocks < blocks)
		goto out;
	// 同样地，确保是否有足够的索引节点，否则返回-EINVAL	
	if (max_inodes < inodes)
		goto out;
	// 可以安全地挂载文件系统，因此这里设置error为0表示操作成功。
	error = 0;
	// 设置最大索引节点数和可用节点数。
	info->max_blocks  = max_blocks;
	info->free_blocks = max_blocks - blocks;
	info->max_inodes  = max_inodes;
	info->free_inodes = max_inodes - inodes;
out:
	// 为文件系统超级块信息结构解锁。
	spin_unlock(&info->stat_lock);
	// 成功则返回0,否则返回-EINVAL。
	return error;
}

2、在 tmpfs 中创建文件

（1）shmem_create

这是创建新文件时位于最顶层的函数。

// mm/shmem.c
// 参数如下所示:
//	dir 是新文件创建时的目录索引节点。
//	entry 是新文件创建时的目录节点。
//	mode 是传递给开放系统调用的标志位。
static int shmem_create(struct inode *dir, struct dentry *dentry, int mode)
{	
	// 调用shmem_mknod() (见L. 2.2小节)，并添加S_IFREG标志位到模式标志位，以
	// 此创建一个常规文件。
	return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
}

（2）shmem_mknod

// mm/shmem.c
/*
 * File creation. Allocate an inode, and we're done..
 */
static int shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, int dev)
{
	// 调用shmem_get_inode() (见L. 2. 3小节)创建一个新的索引节点。
	struct inode *inode = shmem_get_inode(dir->i_sb, mode, dev);
	int error = -ENOSPC;
// 如果成功创建索引节点，则更新目录统计数据并实例化新文件。
	if (inode) {
	// 更新目录的大小。
		dir->i_size += BOGO_DIRENT_SIZE;
	// 更新ctime和mtime字段。
		dir->i_ctime = dir->i_mtime = CURRENT_TIME;
	// 实例化索引节点。
		d_instantiate(dentry, inode);
	// 对目录项进行引用，以阻止在页面换出时意外地回收了目录项。
		dget(dentry); /* Extra count - pin the dentry in core */
	// 表明调用成功结束。
		error = 0;
	}
	// 返回成功，否则返回-ENOSPC。
	return error;
}

① ⇒ shmem_get_inode

shmem_get_inode 函数

② ⇔ d_instantiate

// fs/dcache.c
/**
 * d_instantiate - fill in inode information for a dentry
 * @entry: dentry to complete
 * @inode: inode to attach to this dentry
 *
 * Fill in inode information in the entry.
 *
 * This turns negative dentries into productive full members
 * of society.
 *
 * NOTE! This assumes that the inode count has been incremented
 * (or otherwise set) by the caller to indicate that it is now
 * in use by the dcache.
 */
 
void d_instantiate(struct dentry *entry, struct inode * inode)
{
	if (!list_empty(&entry->d_alias)) BUG();
	spin_lock(&dcache_lock);
	if (inode)
		list_add(&entry->d_alias, &inode->i_dentry);
	entry->d_inode = inode;
	spin_unlock(&dcache_lock);
}

（3）shmem_get_inode

// mm/shmem.c
// 这个函数用于更新空闲索引节点数，用new_inode()分配一个索引节点。
static struct inode *shmem_get_inode(struct super_block *sb, int mode, int dev)
{
	struct inode *inode;
	struct shmem_inode_info *info;
	struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
	// 获取sbinfo自旋锁，因为它即将被更新。
	spin_lock(&sbinfo->stat_lock);
	// 确保有空闲的索引节点，如果没有，则返回NULL。
	if (!sbinfo->free_inodes) {
		spin_unlock(&sbinfo->stat_lock);
		return NULL;
	}
	sbinfo->free_inodes--;
	// 更新空闲索引节点计数并释放锁。
	spin_unlock(&sbinfo->stat_lock);
	// new_inode()处于文件系统层面，并在<linux/fs.h>中声明。它如何运作的详细情
	// 况不在本文档讨论范围内，但其概要内容很简单。它从slab分配器中分配一个索引节点，并
	// 将各字段赋予0,并根据超级块中的信息产生inode->i_sb,inode->i_dev和inode->i_blkbits。
	// 实现在文件 fs/inode.c 中
	inode = new_inode(sb);
// 如果创建成功则填充索引节点各字段。
	if (inode) {
	// 填充基本的索引节点信息。
		inode->i_mode = mode;
		inode->i_uid = current->fsuid;
		inode->i_gid = current->fsgid;
		inode->i_blksize = PAGE_CACHE_SIZE;
		inode->i_blocks = 0;
		inode->i_rdev = NODEV;
		// 设置 address_space_operations 使用 shmem_aops,后者建立函数 shmem_writepage()
		// (见 L. 6. 1小节）用于address_space的页面回写回调函数。
		inode->i_mapping->a_ops = &shmem_aops;
		// 填充更多的基本信息。
		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
		info = SHMEM_I(inode);
		info->inode = inode;
		// 初始化索引节点的semaphore信号量和自旋锁。
		spin_lock_init(&info->lock);
		// 确定如何根据传入的模式信息填充剩余的字段。
		switch (mode & S_IFMT) {
		default:
		// 在这种情况下，创建特定的索引节点。尤其是在挂载文件系统时和创建根索引节点时。
			init_special_inode(inode, mode, dev);
			break;
		case S_IFREG:
// 为常规文件创建索引节点。这里主要考虑的问题是设置 inode->i_op 和 inode->i_fop
// 字段分别为 shmem_inode_operations 和 shmem_file_operations 。
			inode->i_op = &shmem_inode_operations;
			inode->i_fop = &shmem_file_operations;
			spin_lock(&shmem_ilock);
			list_add_tail(&info->list, &shmem_inodes);
			spin_unlock(&shmem_ilock);
			break;
		case S_IFDIR:
// 为新目录创建索引节点。更新i_nlink和i_size字段以显示增加的文件数量和
// 目录大小。这里主要考虑的问题是设置 inode->i_op 和 inode->i_fop 字段分别为
// shmem_dir_inode_operations 和 dcache_dir_ops 。
			inode->i_nlink++;
			/* Some things misbehave if size == 0 on a directory */
			inode->i_size = 2 * BOGO_DIRENT_SIZE;
			inode->i_op = &shmem_dir_inode_operations;
			inode->i_fop = &dcache_dir_ops;
			break;
		case S_IFLNK:
// 如果链接了文件, 由于它由父函数 shmem_link()操作, 因此它现在什么也不是
			break;
		}
	}
	// 返回新索引节点，如果没有创建则返回NULL。
	return inode;
}

① ⇔ new_inode

// fs/inode.c
/**
 *	new_inode 	- obtain an inode
 *	@sb: superblock
 *
 *	Allocates a new inode for given superblock.
 */
 
struct inode * new_inode(struct super_block *sb)
{
	static unsigned long last_ino;
	struct inode * inode;

	spin_lock_prefetch(&inode_lock);
	
	inode = alloc_inode(sb);
	if (inode) {
		spin_lock(&inode_lock);
		inodes_stat.nr_inodes++;
		list_add(&inode->i_list, &inode_in_use);
		inode->i_ino = ++last_ino;
		inode->i_state = 0;
		spin_unlock(&inode_lock);
	}
	return inode;
}

② ⇔ init_special_inode

// fs/devices.c
void init_special_inode(struct inode *inode, umode_t mode, int rdev)
{
	inode->i_mode = mode;
	if (S_ISCHR(mode)) {
		inode->i_fop = &def_chr_fops;
		inode->i_rdev = to_kdev_t(rdev);
		inode->i_cdev = cdget(rdev);
	} else if (S_ISBLK(mode)) {
		inode->i_fop = &def_blk_fops;
		inode->i_rdev = to_kdev_t(rdev);
	} else if (S_ISFIFO(mode))
		inode->i_fop = &def_fifo_fops;
	else if (S_ISSOCK(mode))
		inode->i_fop = &bad_sock_fops;
	else
		printk(KERN_DEBUG "init_special_inode: bogus imode (%o)\n", mode);
}

③ ⇐ shmem_aops、shmem_file_operations

// mm/shmem.c
static struct address_space_operations shmem_aops = {
	removepage:	shmem_removepage,
	writepage:	shmem_writepage,
#ifdef CONFIG_TMPFS
	readpage:	shmem_readpage,
	prepare_write:	shmem_prepare_write,
	commit_write:	shmem_commit_write,
#endif
};

static struct file_operations shmem_file_operations = {
	mmap:		shmem_mmap,
#ifdef CONFIG_TMPFS
	read:		shmem_file_read,
	write:		shmem_file_write,
	fsync:		shmem_sync_file,
#endif
};

static struct inode_operations shmem_inode_operations = {
	truncate:	shmem_truncate,
	setattr:	shmem_notify_change,
};

static struct inode_operations shmem_dir_inode_operations = {
#ifdef CONFIG_TMPFS
	create:		shmem_create,
	lookup:		shmem_lookup,
	link:		shmem_link,
	unlink:		shmem_unlink,
	symlink:	shmem_symlink,
	mkdir:		shmem_mkdir,
	rmdir:		shmem_rmdir,
	mknod:		shmem_mknod,
	rename:		shmem_rename,
#endif
};

④ ⇐ dcache_dir_ops

// fs/readdir.c
struct file_operations dcache_dir_ops = {
	open:		dcache_dir_open,
	release:	dcache_dir_close,
	llseek:		dcache_dir_lseek,
	read:		generic_read_dir,
	readdir:	dcache_readdir,
	fsync:		dcache_dir_fsync,
};

3、tmpfs 中的文件操作

（1）内存映射

用于映射虚拟文件到内存。惟一要做的改变是更新 VMA 的 vm_operations_struct 字段使用异常的等价 shmfs 。

① shmem_mmap

// mm/shmem.c
static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
{
	struct vm_operations_struct *ops;
	struct inode *inode = file->f_dentry->d_inode;
	// 操作目前为虚拟文件系统所使用的vm_operations_struct。
	ops = &shmem_vm_ops;
	// 确保索引节点映射的是常规文件。如果不是,则返回-EACCESS。
	if (!S_ISREG(inode->i_mode))
		return -EACCES;
	// 更新索引节点的atime，显示它是否已经被访问。
	UPDATE_ATIME(inode);
	// 更新vma->vm_ops,这样shmem_nopage() 可以用于处理映射中的缺页中断。
	vma->vm_ops = ops;
	return 0;
}

⑴ ⇒ shmem_nopage

shmem_nopage 函数

（2）读取文件

① shmem_file_read

这是读取 tmpfs 文件时所调用的位于最顶层的函数。

// mm/shmem.c
// 参数如下所示:
//	filp 是指向被读取文件的指针。
//	buf 是应当填充的缓冲区。
//	count 是应当读取的字节数。
//	ppos 是当前位置。
static ssize_t shmem_file_read(struct file *filp, char *buf, size_t count, loff_t *ppos)
{
	read_descriptor_t desc;
	// 计数不可能为负数。
	if ((ssize_t) count < 0)
		return -EINVAL;
	// access_ok()确保安全地写count数量的字节到用户空间缓冲区。如果不能,
	// 则返回-EFAULT。
	if (!access_ok(VERIFY_WRITE, buf, count))
		return -EFAULT;
	if (!count)
		return 0;
	// 初始化 read_descriptor_t 结构，该结构最终传递给 file_read_actor()
	// (见 L.3.2.3)
	desc.written = 0;
	desc.count = count;
	desc.buf = buf;
	desc.error = 0;
	// 调用do_shmem_file_read()开始执行实际的读操作。
	do_shmem_file_read(filp, ppos, &desc);
	// 返回写到用户空间缓冲区的字节数。
	if (desc.written)
		return desc.written;
	// 如果没有写任何东西, 而返回错误
	return desc.error;
}

⑴ ⇐ read_descriptor_t

// include/linux/fs.h
/*
 * "descriptor" for what we're up to with a read for sendfile().
 * This allows us to use the same read code yet
 * have multiple different users of the data that
 * we read from a file.
 *
 * The simplest case just copies the data to user
 * mode.
 */
typedef struct {
	size_t written;
	size_t count;
	char * buf;
	int error;
} read_descriptor_t;

⑵ ⇒ do_shmem_file_read

do_shmem_file_read 函数

② do_shmem_file_read

这个函数通过 shmem_getpage() 找回读取文件所需的页面数，并调 file_read_actor() 复制数据到用户空间。

// mm/shmem.c
static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_t *desc)
{
	// 找回 inode 和使用 struct file 的 mapping。
	struct inode *inode = filp->f_dentry->d_inode;
	struct address_space *mapping = inode->i_mapping;
	unsigned long index, offset;
	// index是文件中包含数据的页面的索引。
	index = *ppos >> PAGE_CACHE_SHIFT;
	// offset是在当前被读取页面中的偏移量。
	offset = *ppos & ~PAGE_CACHE_MASK;
// 循环直到读取完请求的字节数。nr 是当前页面中还需要读取的字节数。desc->count
// 初始为需要读取的字节数，并由file_read_actor()(见L. 3. 2. 3)减小。
	for (;;) {
		struct page *page = NULL;
		unsigned long end_index, nr, ret;
	// end_index是文件中最后页面的索引。当到达文件尾部时停止。
		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
// 当到达最后一个页面时，设置nr为当前页面中还需要读取的字节数。如果
// 文件指针在nr后面,则停止，因为没有更多的数据供读取。这有可能发生在文件被截断时的情况。
		if (index > end_index)
			break;
		if (index == end_index) {
			nr = inode->i_size & ~PAGE_CACHE_MASK;
			if (nr <= offset)
				break;
		}
	// shmem_getpage() (见L. 5.1. 2)查找被请求页在页面高速缓存，交换缓存中
	// 的位置。如果错误发生，则记录错误到desc->error并返回。	
		desc->error = shmem_getpage(inode, index, &page, SGP_READ);
		if (desc->error) {
			if (desc->error == -EINVAL)
				desc->error = 0;
			break;
		}

		/*
		 * We must evaluate after, since reads (unlike writes)
		 * are called without i_sem protection against truncate
		 */
	// nr是页面中必须读取的字节数，因此初始化为一个页面的大小，这样就可以读取整
	// 个页面。
		nr = PAGE_CACHE_SIZE;
	// 初始化end_index,它是文件中最后一个页面的索引
		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
// 如果是文件的最后一个页面，则更新nr为页面的字节数。如果nr当前在文
// 件尾部之后(可能发生在文件截取的情况)，则释放页面的引用(由 shmem_getpage() 使用)，并
// 退出循环。
		if (index == end_index) {
			nr = inode->i_size & ~PAGE_CACHE_MASK;
			if (nr <= offset) {
				page_cache_release(page);
				break;
			}
		}
	// 更新需要读取的字节数。请记得offset是在页面中当前的文件读取处。
		nr -= offset;
// 如果读取的页面不是全局零页面，则需要注意调用 flush_dcache_page() 所引
// 起的别名混淆隐患。如果是第一次读取的页面或者仅仅发生 lseek() (f_reada是0),则用 
// mark_page_accessed()标记页面被访问过。
		if (page != ZERO_PAGE(0)) {
			/*
			 * If users can be writing to this page using arbitrary
			 * virtual addresses, take care about potential aliasing
			 * before reading the page on the kernel side.
			 */
			if (mapping->i_mmap_shared != NULL)
				flush_dcache_page(page);
			/*
			 * Mark the page accessed if we read the
			 * beginning or we just did an lseek.
			 */
			if (!offset || !filp->f_reada)
				mark_page_accessed(page);
		}

		/*
		 * Ok, we have the page, and it's up-to-date, so
		 * now we can copy it to user space...
		 *
		 * The actor routine returns how many bytes were actually used..
		 * NOTE! This may not be the same as how much of a user buffer
		 * we filled up (we may be padding etc), so we can only update
		 * "pos" here (the actor routine has to update the user buffer
		 * pointers and the remaining count).
		 */
	// 调用file_read_actor()(见L. 3. 2. 3)复制数据到用户空间。它返回复制的字节数,
	// 并更新用户缓冲指针及剩下的计数。
		ret = file_read_actor(desc, page, offset, nr);
	// 更新页面中读取的偏移量。
		offset += ret;
	// 如果可能，移动索引至下一个页面。
		index += offset >> PAGE_CACHE_SHIFT;
	// 确保offset是页面中的偏移量。
		offset &= ~PAGE_CACHE_MASK;
	// 释放被复制页面的引用。该引用由shmem_getpage()使用。
		page_cache_release(page);
	// 如果已经读取完请求的字节数，则返回。
		if (ret != nr || !desc->count)
			break;
	}
	// 更新文件指针。
	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
	// 允许文件预读。
	filp->f_reada = 1;
	// 更新inode的访问计数，因为它已经被读取。
	UPDATE_ATIME(inode);
}

⑴ ⇒ file_read_actor

file_read_actor 函数

⑵ ⇒ shmem_getpage

shmem_getpage 函数

⇐ ⇒ ⇔ ⇆ ⇒ ⟺
①②③④⑤⑥⑦
⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑿⒀⒁⒂⒃⒄⒅⒆⒇

③ file_read_actor

（3）写入文件

① shmem_file_write

（4）符号链接

① shmem_symlink

② shmem_readlink_inline

③ shmem_follow_link_inline

④ shmem_readlink

（5）同步文件

① shmem_sync_file

4、tmpfs 中的索引节点操作

（1）截取

① shmem_truncate

② shmem_truncate_indirect

③ shmem_truncate_direct

①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯⑰⑱⑲⑳

④ shmem_free_swp

（2）链接

① shmem_link

（3）解除链接

① shmem_unlink

（4）创建目录

① shmem_mkdir

（5）移除目录

① shmem_rmdir

② shmem_empty

③ shmem_positive

5、虚拟文件中的缺页中断

（1）缺页中断时读取页面

① shmem_nopage

② shmem_getpage

（2）定位交换页面

① shmem_alloc_entry

② shmem_swp_entry

6、交换空间交互

（1）shmem_writepage

（2）shmem_unuse

（3）shmem_unuse_inode

（4）shmem_find_swp

7、建立共享区

（1）shmem_zero_setup

（2）shmem_file_setup

8、System V IPC

（1）创建一个 SYSV 共享区

① sys_shmget

② newseg

（2）附属一个 SYSV 共享区

① sys_shmat

符号

⇐ ⇒ ⇔ ⇆ ⇒ ⟺
①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯⑰⑱⑲⑳㉑㉒㉓㉔㉕㉖㉗㉘㉙㉚㉛㉜㉝㉞㉟㊱㊲㊳㊴㊵㊶㊷㊸㊹㊺㊻㊼㊽㊾㊿
⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑿⒀⒁⒂⒃⒄⒅⒆⒇
➊➋➌➍➎➏➐➑➒➓⓫⓬⓭⓮⓯⓰⓱⓲⓳⓴
⒜⒝⒞⒟⒠⒡⒢⒣⒤⒥⒦⒧⒨⒩⒪⒫⒬⒭⒮⒯⒰⒱⒲⒳⒴⒵
ⓐⓑⓒⓓⓔⓕⓖⓗⓘⓙⓚⓛⓜⓝⓞⓟⓠⓡⓢⓣⓤⓥⓦⓧⓨⓩ
ⒶⒷⒸⒹⒺⒻⒼⒽⒾⒿⓀⓁⓂⓃⓄⓅⓆⓇⓈⓉⓊⓋⓌⓍⓎⓏ
🅐🅑🅒🅓🅔🅕🅖🅗🅘🅙🅚🅛🅜🅝🅞🅟🅠🅡🅢🅣🅤🅥🅦🅧🅨🅩

123

$y = x^2 + z_3$

$x^2 + z_3 + \frac {a}{b} + \sqrt[a]{b}$

$x^2 + z^3 \tag{1}$