diff options
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r-- | Documentation/filesystems/Locking | 30 | ||||
-rw-r--r-- | Documentation/filesystems/porting | 45 | ||||
-rw-r--r-- | Documentation/filesystems/squashfs.txt | 2 | ||||
-rw-r--r-- | Documentation/filesystems/vfs.txt | 6 |
4 files changed, 60 insertions, 23 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 96d4293607ec..2db4283efa8d 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -92,8 +92,8 @@ prototypes: void (*destroy_inode)(struct inode *); void (*dirty_inode) (struct inode *); int (*write_inode) (struct inode *, int); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); + int (*drop_inode) (struct inode *); + void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); @@ -101,14 +101,13 @@ prototypes: int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); - void (*clear_inode) (struct inode *); void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); locking rules: - All may block. + All may block [not true, see below] None have BKL s_umount alloc_inode: @@ -116,22 +115,25 @@ destroy_inode: dirty_inode: (must not sleep) write_inode: drop_inode: !!!inode_lock!!! -delete_inode: +evict_inode: put_super: write write_super: read sync_fs: read freeze_fs: read unfreeze_fs: read -statfs: no -remount_fs: maybe (see below) -clear_inode: +statfs: maybe(read) (see below) +remount_fs: write umount_begin: no show_options: no (namespace_sem) quota_read: no (see below) quota_write: no (see below) -->remount_fs() will have the s_umount exclusive lock if it's already mounted. -When called from get_sb_single, it does NOT have the s_umount lock. +->statfs() has s_umount (shared) when called by ustat(2) (native or +compat), but that's an accident of bad API; s_umount is used to pin +the superblock down when we only have dev_t given us by userland to +identify the superblock. Everything else (statfs(), fstatfs(), etc.) +doesn't hold it when calling ->statfs() - superblock is pinned down +by resolving the pathname passed to syscall. ->quota_read() and ->quota_write() functions are both guaranteed to be the only ones operating on the quota file by the quota code (via dqio_sem) (unless an admin really wants to screw up something and @@ -372,8 +374,6 @@ prototypes: ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, - unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -407,8 +407,7 @@ write: no aio_write: no readdir: no poll: no -ioctl: yes (see below) -unlocked_ioctl: no (see below) +unlocked_ioctl: no compat_ioctl: no mmap: no open: no @@ -451,9 +450,6 @@ move ->readdir() to inode_operations and use a separate method for directory anything that resembles union-mount we won't have a struct file for all components. And there are other reasons why the current interface is a mess... -->ioctl() on regular files is superceded by the ->unlocked_ioctl() that -doesn't take the BKL. - ->read on directories probably must go away - we should just enforce -EISDIR in sys_read() and friends. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index a7e9746ee7ea..b12c89538680 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -273,3 +273,48 @@ it's safe to remove it. If you don't need it, remove it. deliberate; as soon as struct block_device * is propagated in a reasonable way by that code fixing will become trivial; until then nothing can be done. + +[mandatory] + + block truncatation on error exit from ->write_begin, and ->direct_IO +moved from generic methods (block_write_begin, cont_write_begin, +nobh_write_begin, blockdev_direct_IO*) to callers. Take a look at +ext2_write_failed and callers for an example. + +[mandatory] + + ->truncate is going away. The whole truncate sequence needs to be +implemented in ->setattr, which is now mandatory for filesystems +implementing on-disk size changes. Start with a copy of the old inode_setattr +and vmtruncate, and the reorder the vmtruncate + foofs_vmtruncate sequence to +be in order of zeroing blocks using block_truncate_page or similar helpers, +size update and on finally on-disk truncation which should not fail. +inode_change_ok now includes the size checks for ATTR_SIZE and must be called +in the beginning of ->setattr unconditionally. + +[mandatory] + + ->clear_inode() and ->delete_inode() are gone; ->evict_inode() should +be used instead. It gets called whenever the inode is evicted, whether it has +remaining links or not. Caller does *not* evict the pagecache or inode-associated +metadata buffers; getting rid of those is responsibility of method, as it had +been for ->delete_inode(). + ->drop_inode() returns int now; it's called on final iput() with inode_lock +held and it returns true if filesystems wants the inode to be dropped. As before, +generic_drop_inode() is still the default and it's been updated appropriately. +generic_delete_inode() is also alive and it consists simply of return 1. Note that +all actual eviction work is done by caller after ->drop_inode() returns. + clear_inode() is gone; use end_writeback() instead. As before, it must +be called exactly once on each call of ->evict_inode() (as it used to be for +each call of ->delete_inode()). Unlike before, if you are using inode-associated +metadata buffers (i.e. mark_buffer_dirty_inode()), it's your responsibility to +call invalidate_inode_buffers() before end_writeback(). + No async writeback (and thus no calls of ->write_inode()) will happen +after end_writeback() returns, so actions that should not overlap with ->write_inode() +(e.g. freeing on-disk inode if i_nlink is 0) ought to be done after that call. + + NOTE: checking i_nlink in the beginning of ->write_inode() and bailing out +if it's zero is not *and* *never* *had* *been* enough. Final unlink() and iput() +may happen while the inode is in the middle of ->write_inode(); e.g. if you blindly +free the on-disk inode, you may end up doing that while ->write_inode() is writing +to it. diff --git a/Documentation/filesystems/squashfs.txt b/Documentation/filesystems/squashfs.txt index 203f7202cc9e..66699afd66ca 100644 --- a/Documentation/filesystems/squashfs.txt +++ b/Documentation/filesystems/squashfs.txt @@ -2,7 +2,7 @@ SQUASHFS 4.0 FILESYSTEM ======================= Squashfs is a compressed read-only filesystem for Linux. -It uses zlib compression to compress files, inodes and directories. +It uses zlib/lzo compression to compress files, inodes and directories. Inodes in the system are very small and all blocks are packed to minimise data overhead. Block sizes greater than 4K are supported up to a maximum of 1Mbytes (default block size 128K). diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 94677e7dcb13..ed7e5efc06d8 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -727,7 +727,6 @@ struct file_operations { ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); @@ -768,10 +767,7 @@ otherwise noted. activity on this file and (optionally) go to sleep until there is activity. Called by the select(2) and poll(2) system calls - ioctl: called by the ioctl(2) system call - - unlocked_ioctl: called by the ioctl(2) system call. Filesystems that do not - require the BKL should use this method instead of the ioctl() above. + unlocked_ioctl: called by the ioctl(2) system call. compat_ioctl: called by the ioctl(2) system call when 32 bit system calls are used on 64 bit kernels. |