summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/fid.c3
-rw-r--r--fs/9p/mux.c5
-rw-r--r--fs/9p/v9fs.c9
-rw-r--r--fs/9p/v9fs.h9
-rw-r--r--fs/9p/v9fs_vfs.h2
-rw-r--r--fs/9p/vfs_addr.c2
-rw-r--r--fs/9p/vfs_dentry.c26
-rw-r--r--fs/9p/vfs_file.c22
-rw-r--r--fs/9p/vfs_inode.c43
-rw-r--r--fs/9p/vfs_super.c4
-rw-r--r--fs/Kconfig64
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/adfs.h4
-rw-r--r--fs/adfs/dir.c2
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h6
-rw-r--r--fs/affs/dir.c2
-rw-r--r--fs/affs/file.c2
-rw-r--r--fs/affs/inode.c2
-rw-r--r--fs/affs/super.c2
-rw-r--r--fs/affs/symlink.c2
-rw-r--r--fs/afs/cell.c1
-rw-r--r--fs/afs/dir.c3
-rw-r--r--fs/afs/file.c3
-rw-r--r--fs/afs/inode.c1
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/main.c1
-rw-r--r--fs/afs/mntpt.c3
-rw-r--r--fs/afs/proc.c1
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/aio.c5
-rw-r--r--fs/autofs/autofs_i.h4
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/autofs/root.c2
-rw-r--r--fs/autofs/symlink.c2
-rw-r--r--fs/autofs4/autofs_i.h10
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/autofs4/symlink.c2
-rw-r--r--fs/bad_inode.c2
-rw-r--r--fs/befs/linuxvfs.c4
-rw-r--r--fs/bfs/bfs.h4
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/file.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/binfmt_flat.c31
-rw-r--r--fs/binfmt_misc.c2
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c19
-rw-r--r--fs/char_dev.c7
-rw-r--r--fs/cifs/CHANGES9
-rw-r--r--fs/cifs/README2
-rw-r--r--fs/cifs/TODO8
-rw-r--r--fs/cifs/cifsfs.c20
-rw-r--r--fs/cifs/cifsfs.h10
-rw-r--r--fs/cifs/cifspdu.h84
-rw-r--r--fs/cifs/cifsproto.h5
-rw-r--r--fs/cifs/cifssmb.c20
-rw-r--r--fs/cifs/connect.c130
-rw-r--r--fs/cifs/file.c17
-rw-r--r--fs/cifs/inode.c6
-rw-r--r--fs/cifs/readdir.c12
-rw-r--r--fs/cifs/smbdes.c10
-rw-r--r--fs/coda/cnode.c2
-rw-r--r--fs/coda/dir.c2
-rw-r--r--fs/coda/inode.c4
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/coda/sysctl.c136
-rw-r--r--fs/configfs/configfs_internal.h4
-rw-r--r--fs/configfs/dir.c5
-rw-r--r--fs/configfs/file.c9
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c2
-rw-r--r--fs/cramfs/inode.c8
-rw-r--r--fs/debugfs/file.c14
-rw-r--r--fs/debugfs/inode.c82
-rw-r--r--fs/devpts/inode.c2
-rw-r--r--fs/dlm/Kconfig18
-rw-r--r--fs/dlm/config.c154
-rw-r--r--fs/dlm/config.h17
-rw-r--r--fs/dlm/debug_fs.c4
-rw-r--r--fs/dlm/dlm_internal.h20
-rw-r--r--fs/dlm/lock.c87
-rw-r--r--fs/dlm/lockspace.c10
-rw-r--r--fs/dlm/lowcomms-sctp.c151
-rw-r--r--fs/dlm/lowcomms-tcp.c384
-rw-r--r--fs/dlm/memory.c4
-rw-r--r--fs/dlm/midcomms.c4
-rw-r--r--fs/dlm/rcom.c85
-rw-r--r--fs/dlm/recover.c8
-rw-r--r--fs/dlm/recoverd.c22
-rw-r--r--fs/dlm/user.c15
-rw-r--r--fs/dlm/util.c4
-rw-r--r--fs/dquot.c61
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c337
-rw-r--r--fs/ecryptfs/debug.c6
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h162
-rw-r--r--fs/ecryptfs/file.c52
-rw-r--r--fs/ecryptfs/inode.c93
-rw-r--r--fs/ecryptfs/keystore.c825
-rw-r--r--fs/ecryptfs/main.c87
-rw-r--r--fs/ecryptfs/messaging.c516
-rw-r--r--fs/ecryptfs/mmap.c378
-rw-r--r--fs/ecryptfs/netlink.c255
-rw-r--r--fs/ecryptfs/super.c2
-rw-r--r--fs/efs/dir.c2
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext2/dir.c8
-rw-r--r--fs/ext2/ext2.h10
-rw-r--r--fs/ext2/file.c2
-rw-r--r--fs/ext2/namei.c4
-rw-r--r--fs/ext2/super.c6
-rw-r--r--fs/ext2/symlink.c4
-rw-r--r--fs/ext3/file.c2
-rw-r--r--fs/ext3/hash.c1
-rw-r--r--fs/ext3/inode.c4
-rw-r--r--fs/ext3/namei.c31
-rw-r--r--fs/ext3/resize.c1
-rw-r--r--fs/ext3/super.c22
-rw-r--r--fs/ext3/symlink.c4
-rw-r--r--fs/ext4/extents.c14
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/hash.c1
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c31
-rw-r--r--fs/ext4/resize.c1
-rw-r--r--fs/ext4/super.c22
-rw-r--r--fs/ext4/symlink.c4
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/filesystems.c1
-rw-r--r--fs/freevxfs/vxfs_extern.h2
-rw-r--r--fs/freevxfs/vxfs_immed.c2
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/freevxfs/vxfs_lookup.c2
-rw-r--r--fs/freevxfs/vxfs_super.c2
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dir.c6
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/fuse/inode.c4
-rw-r--r--fs/gfs2/Kconfig47
-rw-r--r--fs/gfs2/bmap.c11
-rw-r--r--fs/gfs2/dir.c26
-rw-r--r--fs/gfs2/dir.h21
-rw-r--r--fs/gfs2/eaops.c1
-rw-r--r--fs/gfs2/eattr.c9
-rw-r--r--fs/gfs2/glock.c316
-rw-r--r--fs/gfs2/glock.h11
-rw-r--r--fs/gfs2/glops.c137
-rw-r--r--fs/gfs2/incore.h18
-rw-r--r--fs/gfs2/inode.c61
-rw-r--r--fs/gfs2/lm.c9
-rw-r--r--fs/gfs2/locking/dlm/lock_dlm.h2
-rw-r--r--fs/gfs2/locking/dlm/main.c6
-rw-r--r--fs/gfs2/locking/dlm/mount.c6
-rw-r--r--fs/gfs2/locking/dlm/plock.c2
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c13
-rw-r--r--fs/gfs2/lops.c14
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/gfs2/mount.c1
-rw-r--r--fs/gfs2/ondisk.c1
-rw-r--r--fs/gfs2/ops_address.c134
-rw-r--r--fs/gfs2/ops_dentry.c17
-rw-r--r--fs/gfs2/ops_export.c16
-rw-r--r--fs/gfs2/ops_file.c53
-rw-r--r--fs/gfs2/ops_inode.c64
-rw-r--r--fs/gfs2/ops_inode.h8
-rw-r--r--fs/gfs2/ops_super.c13
-rw-r--r--fs/gfs2/ops_super.h2
-rw-r--r--fs/gfs2/ops_vm.c25
-rw-r--r--fs/gfs2/recovery.c1
-rw-r--r--fs/gfs2/rgrp.c1
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/sys.c10
-rw-r--r--fs/gfs2/util.c1
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/hfs.h2
-rw-r--r--fs/hfs/hfs_fs.h2
-rw-r--r--fs/hfs/inode.c4
-rw-r--r--fs/hfs/super.c2
-rw-r--r--fs/hfsplus/catalog.c1
-rw-r--r--fs/hfsplus/dir.c3
-rw-r--r--fs/hfsplus/hfsplus_raw.h2
-rw-r--r--fs/hfsplus/inode.c4
-rw-r--r--fs/hfsplus/super.c3
-rw-r--r--fs/hostfs/hostfs_kern.c10
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/hpfs_fn.h4
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs_kern.c10
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c42
-rw-r--r--fs/inotify_user.c10
-rw-r--r--fs/ioprio.c18
-rw-r--r--fs/isofs/dir.c2
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/isofs.h2
-rw-r--r--fs/jffs/Makefile11
-rw-r--r--fs/jffs/inode-v23.c1847
-rw-r--r--fs/jffs/intrep.c3449
-rw-r--r--fs/jffs/intrep.h58
-rw-r--r--fs/jffs/jffs_fm.c798
-rw-r--r--fs/jffs/jffs_fm.h149
-rw-r--r--fs/jffs/jffs_proc.c261
-rw-r--r--fs/jffs/jffs_proc.h28
-rw-r--r--fs/jffs2/build.c22
-rw-r--r--fs/jffs2/compr_zlib.c1
-rw-r--r--fs/jffs2/dir.c3
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/jffs2_fs_sb.h12
-rw-r--r--fs/jffs2/os-linux.h6
-rw-r--r--fs/jffs2/scan.c10
-rw-r--r--fs/jffs2/summary.c1
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c203
-rw-r--r--fs/jfs/file.c2
-rw-r--r--fs/jfs/inode.c6
-rw-r--r--fs/jfs/jfs_debug.h5
-rw-r--r--fs/jfs/jfs_dmap.c16
-rw-r--r--fs/jfs/jfs_imap.c16
-rw-r--r--fs/jfs/jfs_incore.h29
-rw-r--r--fs/jfs/jfs_inode.h6
-rw-r--r--fs/jfs/jfs_lock.h2
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/jfs_xtree.c15
-rw-r--r--fs/jfs/namei.c50
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/jfs/symlink.c2
-rw-r--r--fs/libfs.c6
-rw-r--r--fs/lockd/clntproc.c9
-rw-r--r--fs/lockd/host.c3
-rw-r--r--fs/lockd/svc.c36
-rw-r--r--fs/lockd/svc4proc.c13
-rw-r--r--fs/lockd/svclock.c4
-rw-r--r--fs/lockd/svcproc.c13
-rw-r--r--fs/minix/bitmap.c69
-rw-r--r--fs/minix/dir.c162
-rw-r--r--fs/minix/file.c2
-rw-r--r--fs/minix/inode.c53
-rw-r--r--fs/minix/itree_common.c16
-rw-r--r--fs/minix/itree_v1.c4
-rw-r--r--fs/minix/itree_v2.c7
-rw-r--r--fs/minix/minix.h16
-rw-r--r--fs/minix/namei.c2
-rw-r--r--fs/msdos/namei.c2
-rw-r--r--fs/namei.c5
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/file.c2
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/callback.c34
-rw-r--r--fs/nfs/callback_xdr.c4
-rw-r--r--fs/nfs/client.c22
-rw-r--r--fs/nfs/dir.c43
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c11
-rw-r--r--fs/nfs/inode.c50
-rw-r--r--fs/nfs/internal.h4
-rw-r--r--fs/nfs/namespace.c4
-rw-r--r--fs/nfs/nfs3proc.c24
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4namespace.c16
-rw-r--r--fs/nfs/nfs4proc.c62
-rw-r--r--fs/nfs/nfs4renewd.c1
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/proc.c30
-rw-r--r--fs/nfs/read.c109
-rw-r--r--fs/nfs/super.c7
-rw-r--r--fs/nfs/symlink.c2
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/write.c91
-rw-r--r--fs/nfsd/export.c107
-rw-r--r--fs/nfsd/nfs2acl.c17
-rw-r--r--fs/nfsd/nfs3xdr.c31
-rw-r--r--fs/nfsd/nfs4acl.c491
-rw-r--r--fs/nfsd/nfs4callback.c7
-rw-r--r--fs/nfsd/nfs4idmap.c1
-rw-r--r--fs/nfsd/nfs4state.c18
-rw-r--r--fs/nfsd/nfs4xdr.c65
-rw-r--r--fs/nfsd/nfscache.c2
-rw-r--r--fs/nfsd/nfsctl.c2
-rw-r--r--fs/nfsd/nfsfh.c152
-rw-r--r--fs/nfsd/nfsproc.c7
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/nfsxdr.c19
-rw-r--r--fs/nfsd/vfs.c5
-rw-r--r--fs/ntfs/attrib.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/namei.c2
-rw-r--r--fs/ntfs/ntfs.h6
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ntfs/sysctl.c34
-rw-r--r--fs/ocfs2/cluster/heartbeat.c158
-rw-r--r--fs/ocfs2/cluster/nodemanager.c6
-rw-r--r--fs/ocfs2/cluster/nodemanager.h3
-rw-r--r--fs/ocfs2/cluster/tcp.c35
-rw-r--r--fs/ocfs2/cluster/tcp.h6
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h12
-rw-r--r--fs/ocfs2/dlm/dlmast.c14
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h130
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c40
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c30
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c253
-rw-r--r--fs/ocfs2/dlm/dlmfs.c20
-rw-r--r--fs/ocfs2/dlm/dlmlock.c7
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c579
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c182
-rw-r--r--fs/ocfs2/dlm/dlmthread.c200
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c15
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/file.h4
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/namei.c4
-rw-r--r--fs/ocfs2/namei.h2
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ocfs2/symlink.c4
-rw-r--r--fs/ocfs2/symlink.h4
-rw-r--r--fs/ocfs2/vote.c8
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/partitions/check.c24
-rw-r--r--fs/partitions/msdos.c22
-rw-r--r--fs/partitions/sgi.c2
-rw-r--r--fs/partitions/sun.c5
-rw-r--r--fs/pipe.c7
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c56
-rw-r--r--fs/proc/generic.c12
-rw-r--r--fs/proc/inode.c3
-rw-r--r--fs/proc/internal.h14
-rw-r--r--fs/proc/nommu.c2
-rw-r--r--fs/proc/proc_misc.c54
-rw-r--r--fs/proc/proc_sysctl.c479
-rw-r--r--fs/proc/proc_tty.c2
-rw-r--r--fs/proc/root.c18
-rw-r--r--fs/proc/task_mmu.c6
-rw-r--r--fs/proc/task_nommu.c2
-rw-r--r--fs/qnx4/dir.c2
-rw-r--r--fs/qnx4/file.c2
-rw-r--r--fs/qnx4/inode.c4
-rw-r--r--fs/ramfs/file-mmu.c4
-rw-r--r--fs/ramfs/file-nommu.c4
-rw-r--r--fs/ramfs/inode.c8
-rw-r--r--fs/ramfs/internal.h2
-rw-r--r--fs/read_write.c26
-rw-r--r--fs/reiserfs/do_balan.c5
-rw-r--r--fs/reiserfs/file.c2
-rw-r--r--fs/reiserfs/namei.c6
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/romfs/inode.c6
-rw-r--r--fs/smbfs/dir.c4
-rw-r--r--fs/smbfs/file.c2
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proto.h8
-rw-r--r--fs/smbfs/request.c3
-rw-r--r--fs/smbfs/symlink.c3
-rw-r--r--fs/stack.c14
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c219
-rw-r--r--fs/sysfs/file.c82
-rw-r--r--fs/sysfs/group.c2
-rw-r--r--fs/sysfs/inode.c38
-rw-r--r--fs/sysfs/mount.c13
-rw-r--r--fs/sysfs/symlink.c3
-rw-r--r--fs/sysfs/sysfs.h25
-rw-r--r--fs/sysv/file.c2
-rw-r--r--fs/sysv/inode.c4
-rw-r--r--fs/sysv/namei.c2
-rw-r--r--fs/sysv/symlink.c2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/namei.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/udf/udfdecl.h4
-rw-r--r--fs/ufs/balloc.c198
-rw-r--r--fs/ufs/dir.c22
-rw-r--r--fs/ufs/ialloc.c116
-rw-r--r--fs/ufs/inode.c193
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/ufs/super.c54
-rw-r--r--fs/ufs/symlink.c2
-rw-r--r--fs/ufs/truncate.c141
-rw-r--r--fs/ufs/util.h57
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr_acl.c1
-rw-r--r--fs/xfs/linux-2.6/kmem.c1
-rw-r--r--fs/xfs/linux-2.6/mrlock.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c18
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c142
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c51
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c12
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c48
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c33
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c266
-rw-r--r--fs/xfs/linux-2.6/xfs_vfs.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h4
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c8
-rw-r--r--fs/xfs/quota/xfs_qm.c8
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c4
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c4
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.c23
-rw-r--r--fs/xfs/support/debug.h30
-rw-r--r--fs/xfs/support/move.h4
-rw-r--r--fs/xfs/xfs_acl.c1
-rw-r--r--fs/xfs/xfs_alloc_btree.h10
-rw-r--r--fs/xfs/xfs_attr.c49
-rw-r--r--fs/xfs/xfs_attr_leaf.c54
-rw-r--r--fs/xfs/xfs_bit.c2
-rw-r--r--fs/xfs/xfs_bmap.c101
-rw-r--r--fs/xfs/xfs_bmap.h1
-rw-r--r--fs/xfs/xfs_bmap_btree.c86
-rw-r--r--fs/xfs/xfs_bmap_btree.h59
-rw-r--r--fs/xfs/xfs_btree.h6
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_buf_item.h18
-rw-r--r--fs/xfs/xfs_cap.h70
-rw-r--r--fs/xfs/xfs_da_btree.c18
-rw-r--r--fs/xfs/xfs_da_btree.h1
-rw-r--r--fs/xfs/xfs_dfrag.c1
-rw-r--r--fs/xfs/xfs_error.c26
-rw-r--r--fs/xfs/xfs_error.h3
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_fsops.c60
-rw-r--r--fs/xfs/xfs_ialloc.c2
-rw-r--r--fs/xfs/xfs_ialloc_btree.h10
-rw-r--r--fs/xfs/xfs_inode.c30
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_log_recover.c61
-rw-r--r--fs/xfs/xfs_mac.h106
-rw-r--r--fs/xfs/xfs_mount.c288
-rw-r--r--fs/xfs/xfs_mount.h39
-rw-r--r--fs/xfs/xfs_rename.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c110
-rw-r--r--fs/xfs/xfs_rtalloc.h18
-rw-r--r--fs/xfs/xfs_rw.c1
-rw-r--r--fs/xfs/xfs_trans.c32
-rw-r--r--fs/xfs/xfs_trans.h46
-rw-r--r--fs/xfs/xfs_trans_ail.c2
-rw-r--r--fs/xfs/xfs_vfsops.c45
-rw-r--r--fs/xfs/xfs_vnodeops.c22
464 files changed, 8784 insertions, 12212 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index a9b6301a04fc..90419715c7e9 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -136,7 +136,8 @@ struct v9fs_fid *v9fs_fid_lookup(struct dentry *dentry)
}
/**
- * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and release it
+ * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and
+ * release it
* @dentry: dentry to look for fid in
*
* find a fid in the dentry and then clone to a new private fid
diff --git a/fs/9p/mux.c b/fs/9p/mux.c
index 147ceef8e537..c783874a9caf 100644
--- a/fs/9p/mux.c
+++ b/fs/9p/mux.c
@@ -256,7 +256,7 @@ static void v9fs_mux_poll_stop(struct v9fs_mux_data *m)
vpt->muxnum--;
if (!vpt->muxnum) {
dprintk(DEBUG_MUX, "destroy proc %p\n", vpt);
- send_sig(SIGKILL, vpt->task, 1);
+ kthread_stop(vpt->task);
vpt->task = NULL;
v9fs_mux_poll_task_num--;
}
@@ -438,11 +438,8 @@ static int v9fs_poll_proc(void *a)
vpt = a;
dprintk(DEBUG_MUX, "start %p %p\n", current, vpt);
- allow_signal(SIGKILL);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
- if (signal_pending(current))
- break;
list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) {
v9fs_poll_mux(m);
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index d9b561ba5e58..6ad6f192b6e4 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -53,6 +53,8 @@ enum {
Opt_uname, Opt_remotename,
/* Options that take no arguments */
Opt_legacy, Opt_nodevmap, Opt_unix, Opt_tcp, Opt_fd,
+ /* Cache options */
+ Opt_cache_loose,
/* Error token */
Opt_err
};
@@ -76,6 +78,8 @@ static match_table_t tokens = {
{Opt_fd, "fd"},
{Opt_legacy, "noextend"},
{Opt_nodevmap, "nodevmap"},
+ {Opt_cache_loose, "cache=loose"},
+ {Opt_cache_loose, "loose"},
{Opt_err, NULL}
};
@@ -106,6 +110,7 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
v9ses->debug = 0;
v9ses->rfdno = ~0;
v9ses->wfdno = ~0;
+ v9ses->cache = 0;
if (!options)
return;
@@ -121,7 +126,6 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
"integer field, but no integer?\n");
continue;
}
-
}
switch (token) {
case Opt_port:
@@ -169,6 +173,9 @@ static void v9fs_parse_options(char *options, struct v9fs_session_info *v9ses)
case Opt_nodevmap:
v9ses->nodev = 1;
break;
+ case Opt_cache_loose:
+ v9ses->cache = CACHE_LOOSE;
+ break;
default:
continue;
}
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index c134d104cb28..820bf5ca35d8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -47,7 +47,7 @@ struct v9fs_session_info {
unsigned int afid; /* authentication fid */
unsigned int rfdno; /* read file descriptor number */
unsigned int wfdno; /* write file descriptor number */
-
+ unsigned int cache; /* cache mode */
char *name; /* user name to mount as */
char *remotename; /* name of remote hierarchy being mounted */
@@ -73,6 +73,13 @@ enum {
PROTO_FD,
};
+/* possible values of ->cache */
+/* eventually support loose, tight, time, session, default always none */
+enum {
+ CACHE_NONE, /* default */
+ CACHE_LOOSE, /* no consistency */
+};
+
extern struct dentry *v9fs_debugfs_root;
int v9fs_session_init(struct v9fs_session_info *, const char *, char *);
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 450b0c1b385e..8ada4c5c5d70 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -40,8 +40,10 @@
extern struct file_system_type v9fs_fs_type;
extern const struct address_space_operations v9fs_addr_operations;
extern const struct file_operations v9fs_file_operations;
+extern const struct file_operations v9fs_cached_file_operations;
extern const struct file_operations v9fs_dir_operations;
extern struct dentry_operations v9fs_dentry_operations;
+extern struct dentry_operations v9fs_cached_dentry_operations;
struct inode *v9fs_get_inode(struct super_block *sb, int mode);
ino_t v9fs_qid2ino(struct v9fs_qid *qid);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index cc24abf232d5..bed48fa96521 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -63,6 +63,8 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
int total = 0;
int result = 0;
+ dprintk(DEBUG_VFS, "\n");
+
buffer = kmap(page);
do {
if (count < rsize)
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 062daa6000ab..ddffd8aa902d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -53,10 +53,31 @@
static int v9fs_dentry_delete(struct dentry *dentry)
{
dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
return 1;
}
/**
+ * v9fs_cached_dentry_delete - called when dentry refcount equals 0
+ * @dentry: dentry in question
+ *
+ * Only return 1 if our inode is invalid. Only non-synthetic files
+ * (ones without mtime == 0) should be calling this function.
+ *
+ */
+
+static int v9fs_cached_dentry_delete(struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry);
+
+ if(!inode)
+ return 1;
+
+ return 0;
+}
+
+/**
* v9fs_dentry_release - called when dentry is going to be freed
* @dentry: dentry that is being release
*
@@ -87,6 +108,11 @@ void v9fs_dentry_release(struct dentry *dentry)
}
}
+struct dentry_operations v9fs_cached_dentry_operations = {
+ .d_delete = v9fs_cached_dentry_delete,
+ .d_release = v9fs_dentry_release,
+};
+
struct dentry_operations v9fs_dentry_operations = {
.d_delete = v9fs_dentry_delete,
.d_release = v9fs_dentry_release,
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 9f17b0cacdd0..653dfa5b2531 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -79,6 +79,13 @@ int v9fs_file_open(struct inode *inode, struct file *file)
vfid->filp = file;
kfree(fcall);
+ if((vfid->qid.version) && (v9ses->cache)) {
+ dprintk(DEBUG_VFS, "cached");
+ /* enable cached file options */
+ if(file->f_op == &v9fs_file_operations)
+ file->f_op = &v9fs_cached_file_operations;
+ }
+
return 0;
Clunk_Fid:
@@ -110,7 +117,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
filemap_write_and_wait(inode->i_mapping);
- invalidate_inode_pages(&inode->i_data);
+ invalidate_mapping_pages(&inode->i_data, 0, -1);
}
return res;
@@ -234,10 +241,21 @@ v9fs_file_write(struct file *filp, const char __user * data,
total += result;
} while (count);
- invalidate_inode_pages2(inode->i_mapping);
+ invalidate_inode_pages2(inode->i_mapping);
return total;
}
+const struct file_operations v9fs_cached_file_operations = {
+ .llseek = generic_file_llseek,
+ .read = do_sync_read,
+ .aio_read = generic_file_aio_read,
+ .write = v9fs_file_write,
+ .open = v9fs_file_open,
+ .release = v9fs_dir_release,
+ .lock = v9fs_file_lock,
+ .mmap = generic_file_mmap,
+};
+
const struct file_operations v9fs_file_operations = {
.llseek = generic_file_llseek,
.read = v9fs_file_read,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9109ba1d6969..124a085d1f2e 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -41,10 +41,10 @@
#include "v9fs_vfs.h"
#include "fid.h"
-static struct inode_operations v9fs_dir_inode_operations;
-static struct inode_operations v9fs_dir_inode_operations_ext;
-static struct inode_operations v9fs_file_inode_operations;
-static struct inode_operations v9fs_symlink_inode_operations;
+static const struct inode_operations v9fs_dir_inode_operations;
+static const struct inode_operations v9fs_dir_inode_operations_ext;
+static const struct inode_operations v9fs_file_inode_operations;
+static const struct inode_operations v9fs_symlink_inode_operations;
/**
* unixmode2p9mode - convert unix mode bits to plan 9
@@ -504,7 +504,10 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ if(v9ses->cache)
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ else
+ dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
if (nd && nd->flags & LOOKUP_OPEN) {
@@ -585,17 +588,17 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
inode = NULL;
- goto clean_up_fids;
+ v9fs_fid_destroy(vfid);
+ goto error;
}
- dentry->d_op = &v9fs_dentry_operations;
+ if(v9ses->cache)
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ else
+ dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
return 0;
-clean_up_fids:
- if (vfid)
- v9fs_fid_destroy(vfid);
-
clean_up_dfid:
v9fs_fid_clunk(v9ses, dfid);
@@ -629,7 +632,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
- dentry->d_op = &v9fs_dentry_operations;
dirfid = v9fs_fid_lookup(dentry->d_parent);
if(IS_ERR(dirfid))
@@ -700,6 +702,10 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
fid->qid = fcall->params.rstat.stat.qid;
v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
+ if((fid->qid.version)&&(v9ses->cache))
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ else
+ dentry->d_op = &v9fs_dentry_operations;
d_add(dentry, inode);
kfree(fcall);
@@ -1187,7 +1193,10 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
goto free_vfid;
}
- dentry->d_op = &v9fs_dentry_operations;
+ if(v9ses->cache)
+ dentry->d_op = &v9fs_cached_dentry_operations;
+ else
+ dentry->d_op = &v9fs_dentry_operations;
d_instantiate(dentry, inode);
return 0;
@@ -1306,7 +1315,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
return retval;
}
-static struct inode_operations v9fs_dir_inode_operations_ext = {
+static const struct inode_operations v9fs_dir_inode_operations_ext = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
.symlink = v9fs_vfs_symlink,
@@ -1321,7 +1330,7 @@ static struct inode_operations v9fs_dir_inode_operations_ext = {
.setattr = v9fs_vfs_setattr,
};
-static struct inode_operations v9fs_dir_inode_operations = {
+static const struct inode_operations v9fs_dir_inode_operations = {
.create = v9fs_vfs_create,
.lookup = v9fs_vfs_lookup,
.unlink = v9fs_vfs_unlink,
@@ -1333,12 +1342,12 @@ static struct inode_operations v9fs_dir_inode_operations = {
.setattr = v9fs_vfs_setattr,
};
-static struct inode_operations v9fs_file_inode_operations = {
+static const struct inode_operations v9fs_file_inode_operations = {
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
-static struct inode_operations v9fs_symlink_inode_operations = {
+static const struct inode_operations v9fs_symlink_inode_operations = {
.readlink = v9fs_vfs_readlink,
.follow_link = v9fs_vfs_follow_link,
.put_link = v9fs_vfs_put_link,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 63320d4e15d2..0ec42f665457 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -45,7 +45,7 @@
#include "fid.h"
static void v9fs_clear_inode(struct inode *);
-static struct super_operations v9fs_super_ops;
+static const struct super_operations v9fs_super_ops;
/**
* v9fs_clear_inode - release an inode
@@ -263,7 +263,7 @@ v9fs_umount_begin(struct vfsmount *vfsmnt, int flags)
v9fs_session_cancel(v9ses);
}
-static struct super_operations v9fs_super_ops = {
+static const struct super_operations v9fs_super_ops = {
.statfs = simple_statfs,
.clear_inode = v9fs_clear_inode,
.show_options = v9fs_show_options,
diff --git a/fs/Kconfig b/fs/Kconfig
index 8cd2417a14db..3c4886b849f5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -426,7 +426,6 @@ config OCFS2_FS
select CONFIGFS_FS
select JBD
select CRC32
- select INET
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
@@ -675,12 +674,6 @@ config ZISOFS
necessary to create such a filesystem. Say Y here if you want to be
able to read such compressed CD-ROMs.
-config ZISOFS_FS
-# for fs/nls/Config.in
- tristate
- depends on ZISOFS
- default ISO9660_FS
-
config UDF_FS
tristate "UDF file system support"
help
@@ -1095,7 +1088,7 @@ config AFFS_FS
config ECRYPT_FS
tristate "eCrypt filesystem layer support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && KEYS && CRYPTO
+ depends on EXPERIMENTAL && KEYS && CRYPTO && NET
help
Encrypted filesystem that operates on the VFS layer. See
<file:Documentation/ecryptfs.txt> to learn more about
@@ -1196,32 +1189,6 @@ config EFS_FS
To compile the EFS file system support as a module, choose M here: the
module will be called efs.
-config JFFS_FS
- tristate "Journalling Flash File System (JFFS) support"
- depends on MTD && BLOCK && BROKEN
- help
- JFFS is the Journalling Flash File System developed by Axis
- Communications in Sweden, aimed at providing a crash/powerdown-safe
- file system for disk-less embedded devices. Further information is
- available at (<http://developer.axis.com/software/jffs/>).
-
- NOTE: This filesystem is deprecated and is scheduled for removal in
- 2.6.21. See Documentation/feature-removal-schedule.txt
-
-config JFFS_FS_VERBOSE
- int "JFFS debugging verbosity (0 = quiet, 3 = noisy)"
- depends on JFFS_FS
- default "0"
- help
- Determines the verbosity level of the JFFS debugging messages.
-
-config JFFS_PROC_FS
- bool "JFFS stats available in /proc filesystem"
- depends on JFFS_FS && PROC_FS
- help
- Enabling this option will cause statistics from mounted JFFS file systems
- to be made available to the user in the /proc/fs/jffs/ directory.
-
config JFFS2_FS
tristate "Journalling Flash File System v2 (JFFS2) support"
select CRC32
@@ -1871,20 +1838,14 @@ config CIFS
file servers such as Windows 2000 (including Windows 2003, NT 4
and Windows XP) as well by Samba (which provides excellent CIFS
server support for Linux and many other operating systems). Limited
- support for Windows ME and similar servers is provided as well.
- You must use the smbfs client filesystem to access older SMB servers
- such as OS/2 and DOS.
+ support for OS/2 and Windows ME and similar servers is provided as well.
The intent of the cifs module is to provide an advanced
- network file system client for mounting to CIFS compliant servers,
+ network file system client for mounting to CIFS compliant servers,
including support for dfs (hierarchical name space), secure per-user
session establishment, safe distributed caching (oplock), optional
- packet signing, Unicode and other internationalization improvements,
- and optional Winbind (nsswitch) integration. You do not need to enable
- cifs if running only a (Samba) server. It is possible to enable both
- smbfs and cifs (e.g. if you are using CIFS for accessing Windows 2003
- and Samba 3 servers, and smbfs for accessing old servers). If you need
- to mount to Samba or Windows from this machine, say Y.
+ packet signing, Unicode and other internationalization improvements.
+ If you need to mount to Samba or Windows from this machine, say Y.
config CIFS_STATS
bool "CIFS statistics"
@@ -1977,14 +1938,13 @@ config CIFS_EXPERIMENTAL
depends on CIFS && EXPERIMENTAL
help
Enables cifs features under testing. These features are
- experimental and currently include support for writepages
- (multipage writebehind performance improvements) and directory
- change notification ie fcntl(F_DNOTIFY) as well as some security
- improvements. Some also depend on setting at runtime the
- pseudo-file /proc/fs/cifs/Experimental (which is disabled by
- default). See the file fs/cifs/README for more details.
-
- If unsure, say N.
+ experimental and currently include DFS support and directory
+ change notification ie fcntl(F_DNOTIFY), as well as the upcall
+ mechanism which will be used for Kerberos session negotiation
+ and uid remapping. Some of these features also may depend on
+ setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental
+ (which is disabled by default). See the file fs/cifs/README
+ for more details. If unsure, say N.
config CIFS_UPCALL
bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
diff --git a/fs/Makefile b/fs/Makefile
index b9ffa63f77fc..9edf4112bee0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -94,7 +94,6 @@ obj-$(CONFIG_HPFS_FS) += hpfs/
obj-$(CONFIG_NTFS_FS) += ntfs/
obj-$(CONFIG_UFS_FS) += ufs/
obj-$(CONFIG_EFS_FS) += efs/
-obj-$(CONFIG_JFFS_FS) += jffs/
obj-$(CONFIG_JFFS2_FS) += jffs2/
obj-$(CONFIG_AFFS_FS) += affs/
obj-$(CONFIG_ROMFS_FS) += romfs/
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 29217ff36d44..936f2af39c43 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -84,7 +84,7 @@ void __adfs_error(struct super_block *sb, const char *function,
*/
/* dir_*.c */
-extern struct inode_operations adfs_dir_inode_operations;
+extern const struct inode_operations adfs_dir_inode_operations;
extern const struct file_operations adfs_dir_operations;
extern struct dentry_operations adfs_dentry_operations;
extern struct adfs_dir_ops adfs_f_dir_ops;
@@ -93,7 +93,7 @@ extern struct adfs_dir_ops adfs_fplus_dir_ops;
extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
/* file.c */
-extern struct inode_operations adfs_file_inode_operations;
+extern const struct inode_operations adfs_file_inode_operations;
extern const struct file_operations adfs_file_operations;
static inline __u32 signed_asl(__u32 val, signed int shift)
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index 2b8903893d3f..fc1a8dc64d78 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -295,7 +295,7 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
/*
* directories can handle most operations...
*/
-struct inode_operations adfs_dir_inode_operations = {
+const struct inode_operations adfs_dir_inode_operations = {
.lookup = adfs_lookup,
.setattr = adfs_notify_change,
};
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 6101ea679cb1..f544a2855923 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -36,6 +36,6 @@ const struct file_operations adfs_file_operations = {
.sendfile = generic_file_sendfile,
};
-struct inode_operations adfs_file_inode_operations = {
+const struct inode_operations adfs_file_inode_operations = {
.setattr = adfs_notify_change,
};
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 5023351a7afe..2e5f2c8371ee 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -254,7 +254,7 @@ static void destroy_inodecache(void)
kmem_cache_destroy(adfs_inode_cachep);
}
-static struct super_operations adfs_sops = {
+static const struct super_operations adfs_sops = {
.alloc_inode = adfs_alloc_inode,
.destroy_inode = adfs_destroy_inode,
.write_inode = adfs_write_inode,
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1dc8438ef389..7db2d287e9f3 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -188,9 +188,9 @@ extern void affs_dir_truncate(struct inode *);
/* jump tables */
-extern struct inode_operations affs_file_inode_operations;
-extern struct inode_operations affs_dir_inode_operations;
-extern struct inode_operations affs_symlink_inode_operations;
+extern const struct inode_operations affs_file_inode_operations;
+extern const struct inode_operations affs_dir_inode_operations;
+extern const struct inode_operations affs_symlink_inode_operations;
extern const struct file_operations affs_file_operations;
extern const struct file_operations affs_file_operations_ofs;
extern const struct file_operations affs_dir_operations;
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index cad3ee340063..6e3f282424b0 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -26,7 +26,7 @@ const struct file_operations affs_dir_operations = {
/*
* directories can handle most operations...
*/
-struct inode_operations affs_dir_inode_operations = {
+const struct inode_operations affs_dir_inode_operations = {
.create = affs_create,
.lookup = affs_lookup,
.link = affs_link,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 05b5e22de759..4aa8079e71be 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -38,7 +38,7 @@ const struct file_operations affs_file_operations = {
.sendfile = generic_file_sendfile,
};
-struct inode_operations affs_file_inode_operations = {
+const struct inode_operations affs_file_inode_operations = {
.truncate = affs_truncate,
.setattr = affs_notify_change,
};
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 44d439cb69f4..fce6848a4641 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -12,7 +12,7 @@
#include "affs.h"
-extern struct inode_operations affs_symlink_inode_operations;
+extern const struct inode_operations affs_symlink_inode_operations;
extern struct timezone sys_tz;
void
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3de93e799949..a324045d8554 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -112,7 +112,7 @@ static void destroy_inodecache(void)
kmem_cache_destroy(affs_inode_cachep);
}
-static struct super_operations affs_sops = {
+static const struct super_operations affs_sops = {
.alloc_inode = affs_alloc_inode,
.destroy_inode = affs_destroy_inode,
.read_inode = affs_read_inode,
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index f802256a5933..41782539c907 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -70,7 +70,7 @@ const struct address_space_operations affs_symlink_aops = {
.readpage = affs_symlink_readpage,
};
-struct inode_operations affs_symlink_inode_operations = {
+const struct inode_operations affs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
diff --git a/fs/afs/cell.c b/fs/afs/cell.c
index bfc1fd22d5b1..1fc578372759 100644
--- a/fs/afs/cell.c
+++ b/fs/afs/cell.c
@@ -10,7 +10,6 @@
*/
#include <linux/module.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <rxrpc/peer.h>
#include <rxrpc/connection.h>
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 4acd04134055..b6dc2ebe47a8 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -12,7 +12,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -37,7 +36,7 @@ const struct file_operations afs_dir_file_operations = {
.readdir = afs_dir_readdir,
};
-struct inode_operations afs_dir_inode_operations = {
+const struct inode_operations afs_dir_inode_operations = {
.lookup = afs_dir_lookup,
.getattr = afs_inode_getattr,
#if 0 /* TODO */
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 2e8c42639eaa..b17634541f67 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -12,7 +12,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -30,7 +29,7 @@ static int afs_file_readpage(struct file *file, struct page *page);
static void afs_file_invalidatepage(struct page *page, unsigned long offset);
static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
-struct inode_operations afs_file_inode_operations = {
+const struct inode_operations afs_file_inode_operations = {
.getattr = afs_inode_getattr,
};
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 6f37754906c2..9d9bca6c28b5 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -16,7 +16,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index e88b3b65ae49..5151d5da2c2f 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -63,14 +63,14 @@ extern struct cachefs_index_def afs_cache_cell_index_def;
/*
* dir.c
*/
-extern struct inode_operations afs_dir_inode_operations;
+extern const struct inode_operations afs_dir_inode_operations;
extern const struct file_operations afs_dir_file_operations;
/*
* file.c
*/
extern const struct address_space_operations afs_fs_aops;
-extern struct inode_operations afs_file_inode_operations;
+extern const struct inode_operations afs_file_inode_operations;
#ifdef AFS_CACHING_SUPPORT
extern int afs_cache_get_page_cookie(struct page *page,
@@ -104,7 +104,7 @@ extern struct cachefs_netfs afs_cache_netfs;
/*
* mntpt.c
*/
-extern struct inode_operations afs_mntpt_inode_operations;
+extern const struct inode_operations afs_mntpt_inode_operations;
extern const struct file_operations afs_mntpt_file_operations;
extern struct afs_timer afs_mntpt_expiry_timer;
extern struct afs_timer_ops afs_mntpt_expiry_timer_ops;
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 913c689bdb35..f2704ba53857 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/completion.h>
#include <rxrpc/rxrpc.h>
#include <rxrpc/transport.h>
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 8f74e8450826..68495f0de7b3 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -12,7 +12,6 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
@@ -36,7 +35,7 @@ const struct file_operations afs_mntpt_file_operations = {
.open = afs_mntpt_open,
};
-struct inode_operations afs_mntpt_inode_operations = {
+const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
.follow_link = afs_mntpt_follow_link,
.readlink = page_readlink,
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 86463ec9ccb4..ae6b85b1e484 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -9,7 +9,6 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/proc_fs.h>
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 18d9b77ba40f..eb7e32349da3 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -56,7 +56,7 @@ struct file_system_type afs_fs_type = {
.fs_flags = FS_BINARY_MOUNTDATA,
};
-static struct super_operations afs_super_ops = {
+static const struct super_operations afs_super_ops = {
.statfs = simple_statfs,
.alloc_inode = afs_alloc_inode,
.drop_inode = generic_delete_inode,
diff --git a/fs/aio.c b/fs/aio.c
index 55991e4132a7..0b4ee0a5c83e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -132,7 +132,7 @@ static int aio_setup_ring(struct kioctx *ctx)
dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
down_write(&ctx->mm->mmap_sem);
info->mmap_base = do_mmap(NULL, 0, info->mmap_size,
- PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE,
+ PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
0);
if (IS_ERR((void *)info->mmap_base)) {
up_write(&ctx->mm->mmap_sem);
@@ -211,11 +211,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
if ((unsigned long)nr_events > aio_max_nr)
return ERR_PTR(-EAGAIN);
- ctx = kmem_cache_alloc(kioctx_cachep, GFP_KERNEL);
+ ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
if (!ctx)
return ERR_PTR(-ENOMEM);
- memset(ctx, 0, sizeof(*ctx));
ctx->max_reqs = nr_events;
mm = ctx->mm = current->mm;
atomic_inc(&mm->mm_count);
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 906ba5ce2261..4ef544434b51 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -142,8 +142,8 @@ struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info
/* Operations structures */
-extern struct inode_operations autofs_root_inode_operations;
-extern struct inode_operations autofs_symlink_inode_operations;
+extern const struct inode_operations autofs_root_inode_operations;
+extern const struct inode_operations autofs_symlink_inode_operations;
extern const struct file_operations autofs_root_operations;
/* Initializing function */
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index f968d1342808..aa0b61ff8270 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -52,7 +52,7 @@ out_kill_sb:
static void autofs_read_inode(struct inode *inode);
-static struct super_operations autofs_sops = {
+static const struct super_operations autofs_sops = {
.read_inode = autofs_read_inode,
.statfs = simple_statfs,
};
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index e698c51d2b02..f2597205939d 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -32,7 +32,7 @@ const struct file_operations autofs_root_operations = {
.ioctl = autofs_root_ioctl,
};
-struct inode_operations autofs_root_inode_operations = {
+const struct inode_operations autofs_root_inode_operations = {
.lookup = autofs_root_lookup,
.unlink = autofs_root_unlink,
.symlink = autofs_root_symlink,
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
index c74f2eb65775..7ce9cb2c9ce2 100644
--- a/fs/autofs/symlink.c
+++ b/fs/autofs/symlink.c
@@ -20,7 +20,7 @@ static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations autofs_symlink_inode_operations = {
+const struct inode_operations autofs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = autofs_follow_link
};
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 216b1a364ccb..6b4cec3f272f 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -168,11 +168,11 @@ int autofs4_expire_multi(struct super_block *, struct vfsmount *,
/* Operations structures */
-extern struct inode_operations autofs4_symlink_inode_operations;
-extern struct inode_operations autofs4_dir_inode_operations;
-extern struct inode_operations autofs4_root_inode_operations;
-extern struct inode_operations autofs4_indirect_root_inode_operations;
-extern struct inode_operations autofs4_direct_root_inode_operations;
+extern const struct inode_operations autofs4_symlink_inode_operations;
+extern const struct inode_operations autofs4_dir_inode_operations;
+extern const struct inode_operations autofs4_root_inode_operations;
+extern const struct inode_operations autofs4_indirect_root_inode_operations;
+extern const struct inode_operations autofs4_direct_root_inode_operations;
extern const struct file_operations autofs4_dir_operations;
extern const struct file_operations autofs4_root_operations;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index e8f6c5ad3e90..5e458e096ef6 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -196,7 +196,7 @@ static int autofs4_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
-static struct super_operations autofs4_sops = {
+static const struct super_operations autofs4_sops = {
.statfs = simple_statfs,
.show_options = autofs4_show_options,
};
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 8d05b9f7578d..47fee96c2182 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -47,7 +47,7 @@ const struct file_operations autofs4_dir_operations = {
.readdir = autofs4_dir_readdir,
};
-struct inode_operations autofs4_indirect_root_inode_operations = {
+const struct inode_operations autofs4_indirect_root_inode_operations = {
.lookup = autofs4_lookup,
.unlink = autofs4_dir_unlink,
.symlink = autofs4_dir_symlink,
@@ -55,7 +55,7 @@ struct inode_operations autofs4_indirect_root_inode_operations = {
.rmdir = autofs4_dir_rmdir,
};
-struct inode_operations autofs4_direct_root_inode_operations = {
+const struct inode_operations autofs4_direct_root_inode_operations = {
.lookup = autofs4_lookup,
.unlink = autofs4_dir_unlink,
.mkdir = autofs4_dir_mkdir,
@@ -63,7 +63,7 @@ struct inode_operations autofs4_direct_root_inode_operations = {
.follow_link = autofs4_follow_link,
};
-struct inode_operations autofs4_dir_inode_operations = {
+const struct inode_operations autofs4_dir_inode_operations = {
.lookup = autofs4_lookup,
.unlink = autofs4_dir_unlink,
.symlink = autofs4_dir_symlink,
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index 2ea2c98fd84b..b4ea82934d2e 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -19,7 +19,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations autofs4_symlink_inode_operations = {
+const struct inode_operations autofs4_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = autofs4_follow_link
};
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 869f5193ecc2..efeab2fab40b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -291,7 +291,7 @@ static int bad_inode_removexattr(struct dentry *dentry, const char *name)
return -EIO;
}
-static struct inode_operations bad_inode_ops =
+static const struct inode_operations bad_inode_ops =
{
.create = bad_inode_create,
.lookup = bad_inode_lookup,
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 481e59b9d91c..cc6cc8ed2e39 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -68,7 +68,7 @@ static const struct file_operations befs_dir_operations = {
.readdir = befs_readdir,
};
-static struct inode_operations befs_dir_inode_operations = {
+static const struct inode_operations befs_dir_inode_operations = {
.lookup = befs_lookup,
};
@@ -78,7 +78,7 @@ static const struct address_space_operations befs_aops = {
.bmap = befs_bmap,
};
-static struct inode_operations befs_symlink_inode_operations = {
+static const struct inode_operations befs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = befs_follow_link,
.put_link = befs_put_link,
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 31973bbbf057..130f6c66c5ba 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -48,12 +48,12 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
/* file.c */
-extern struct inode_operations bfs_file_inops;
+extern const struct inode_operations bfs_file_inops;
extern const struct file_operations bfs_file_operations;
extern const struct address_space_operations bfs_aops;
/* dir.c */
-extern struct inode_operations bfs_dir_inops;
+extern const struct inode_operations bfs_dir_inops;
extern const struct file_operations bfs_dir_operations;
#endif /* _FS_BFS_BFS_H */
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 2a746e688df5..097f1497f743 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -260,7 +260,7 @@ end_rename:
return error;
}
-struct inode_operations bfs_dir_inops = {
+const struct inode_operations bfs_dir_inops = {
.create = bfs_create,
.lookup = bfs_lookup,
.link = bfs_link,
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index a9164a87f8de..ef4d1fa04e65 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -164,4 +164,4 @@ const struct address_space_operations bfs_aops = {
.bmap = bfs_bmap,
};
-struct inode_operations bfs_file_inops;
+const struct inode_operations bfs_file_inops;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 134c99941a63..93d6219243ad 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -270,7 +270,7 @@ static void destroy_inodecache(void)
kmem_cache_destroy(bfs_inode_cachep);
}
-static struct super_operations bfs_sops = {
+static const struct super_operations bfs_sops = {
.alloc_inode = bfs_alloc_inode,
.destroy_inode = bfs_destroy_inode,
.read_inode = bfs_read_inode,
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 669dbe5b0317..51db1182b27e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -76,7 +76,8 @@ static struct linux_binfmt elf_format = {
.load_binary = load_elf_binary,
.load_shlib = load_elf_library,
.core_dump = elf_core_dump,
- .min_coredump = ELF_EXEC_PAGESIZE
+ .min_coredump = ELF_EXEC_PAGESIZE,
+ .hasvdso = 1
};
#define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index a4d933a51208..5810aa1339fd 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -372,7 +372,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
down_write(&current->mm->mmap_sem);
current->mm->start_brk = do_mmap(NULL, 0, stack_size,
PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN,
0);
if (IS_ERR_VALUE(current->mm->start_brk)) {
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index ae8595d49856..7b0265d7f3a8 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -419,7 +419,7 @@ static int load_flat_file(struct linux_binprm * bprm,
unsigned long textpos = 0, datapos = 0, result;
unsigned long realdatastart = 0;
unsigned long text_len, data_len, bss_len, stack_len, flags;
- unsigned long memp = 0; /* for finding the brk area */
+ unsigned long len, reallen, memp = 0;
unsigned long extra, rlim;
unsigned long *reloc = 0, *rp;
struct inode *inode;
@@ -540,10 +540,18 @@ static int load_flat_file(struct linux_binprm * bprm,
goto err;
}
+ len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
down_write(&current->mm->mmap_sem);
- realdatastart = do_mmap(0, 0, data_len + extra +
- MAX_SHARED_LIBS * sizeof(unsigned long),
- PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
+ realdatastart = do_mmap(0, 0, len,
+ PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
+ /* Remap to use all availabe slack region space */
+ if (realdatastart && (realdatastart < (unsigned long)-4096)) {
+ reallen = ksize(realdatastart);
+ if (reallen > len) {
+ realdatastart = do_mremap(realdatastart, len,
+ reallen, MREMAP_FIXED, realdatastart);
+ }
+ }
up_write(&current->mm->mmap_sem);
if (realdatastart == 0 || realdatastart >= (unsigned long)-4096) {
@@ -584,11 +592,20 @@ static int load_flat_file(struct linux_binprm * bprm,
} else {
+ len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
down_write(&current->mm->mmap_sem);
- textpos = do_mmap(0, 0, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(unsigned long),
- PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
+ textpos = do_mmap(0, 0, len,
+ PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
+ /* Remap to use all availabe slack region space */
+ if (textpos && (textpos < (unsigned long) -4096)) {
+ reallen = ksize(textpos);
+ if (reallen > len) {
+ textpos = do_mremap(textpos, len, reallen,
+ MREMAP_FIXED, textpos);
+ }
+ }
up_write(&current->mm->mmap_sem);
+
if (!textpos || textpos >= (unsigned long) -4096) {
if (!textpos)
textpos = (unsigned long) -ENOMEM;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c2e08252af35..e6f57990b121 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -719,7 +719,7 @@ static const struct file_operations bm_status_operations = {
/* Superblock handling */
-static struct super_operations s_ops = {
+static const struct super_operations s_ops = {
.statfs = simple_statfs,
.clear_inode = bm_clear_inode,
};
diff --git a/fs/block_dev.c b/fs/block_dev.c
index fc7028b685f2..0c59b703e9d5 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -489,7 +489,7 @@ static void bdev_clear_inode(struct inode *inode)
spin_unlock(&bdev_lock);
}
-static struct super_operations bdev_sops = {
+static const struct super_operations bdev_sops = {
.statfs = simple_statfs,
.alloc_inode = bdev_alloc_inode,
.destroy_inode = bdev_destroy_inode,
diff --git a/fs/buffer.c b/fs/buffer.c
index 1ad674fd348c..f99c509697cd 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -78,6 +78,7 @@ EXPORT_SYMBOL(__lock_buffer);
void fastcall unlock_buffer(struct buffer_head *bh)
{
+ smp_mb__before_clear_bit();
clear_buffer_locked(bh);
smp_mb__after_clear_bit();
wake_up_bit(&bh->b_state, BH_Lock);
@@ -345,7 +346,7 @@ void invalidate_bdev(struct block_device *bdev, int destroy_dirty_buffers)
* We really want to use invalidate_inode_pages2() for
* that, but not until that's cleaned up.
*/
- invalidate_inode_pages(mapping);
+ invalidate_mapping_pages(mapping, 0, -1);
}
/*
@@ -1282,11 +1283,11 @@ static void bh_lru_install(struct buffer_head *bh)
* Look up the bh in this cpu's LRU. If it's there, move it to the head.
*/
static struct buffer_head *
-lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
+lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *ret = NULL;
struct bh_lru *lru;
- int i;
+ unsigned int i;
check_irqs_on();
bh_lru_lock();
@@ -1318,7 +1319,7 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, int size)
* NULL
*/
struct buffer_head *
-__find_get_block(struct block_device *bdev, sector_t block, int size)
+__find_get_block(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
@@ -1346,7 +1347,7 @@ EXPORT_SYMBOL(__find_get_block);
* attempt is failing. FIXME, perhaps?
*/
struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, int size)
+__getblk(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __find_get_block(bdev, block, size);
@@ -1360,7 +1361,7 @@ EXPORT_SYMBOL(__getblk);
/*
* Do async read-ahead on a buffer..
*/
-void __breadahead(struct block_device *bdev, sector_t block, int size)
+void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
if (likely(bh)) {
@@ -1380,7 +1381,7 @@ EXPORT_SYMBOL(__breadahead);
* It returns NULL if the block was unreadable.
*/
struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, int size)
+__bread(struct block_device *bdev, sector_t block, unsigned size)
{
struct buffer_head *bh = __getblk(bdev, block, size);
@@ -1439,6 +1440,7 @@ static void discard_buffer(struct buffer_head * bh)
clear_buffer_req(bh);
clear_buffer_new(bh);
clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
unlock_buffer(bh);
}
@@ -1822,6 +1824,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
continue;
}
if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+ !buffer_unwritten(bh) &&
(block_start < from || block_end > to)) {
ll_rw_block(READ, 1, &bh);
*wait_bh++=bh;
@@ -2543,7 +2546,7 @@ int block_truncate_page(struct address_space *mapping,
if (PageUptodate(page))
set_buffer_uptodate(bh);
- if (!buffer_uptodate(bh) && !buffer_delay(bh)) {
+ if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) {
err = -EIO;
ll_rw_block(READ, 1, &bh);
wait_on_buffer(bh);
diff --git a/fs/char_dev.c b/fs/char_dev.c
index a885f46ca001..e6194e2b9bb9 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -108,6 +108,13 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
/* temporary */
if (major == 0) {
for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
+ /*
+ * Disallow the LANANA-assigned LOCAL/EXPERIMENTAL
+ * majors
+ */
+ if ((60 <= i && i <= 63) || (120 <= i && i <= 127) ||
+ (240 <= i && i <= 254))
+ continue;
if (chrdevs[i] == NULL)
break;
}
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index d04d2f7448d9..5fe13593b57f 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,6 +1,15 @@
Version 1.47
------------
Fix oops in list_del during mount caused by unaligned string.
+Fix file corruption which could occur on some large file
+copies caused by writepages page i/o completion bug.
+Seek to SEEK_END forces check for update of file size for non-cached
+files. Allow file size to be updated on remote extend of locally open,
+non-cached file. Fix reconnect to newer Samba servers (or other servers
+which support the CIFS Unix/POSIX extensions) so that we again tell the
+server the Unix/POSIX cifs capabilities which we support (SetFSInfo).
+Add experimental support for new POSIX Open/Mkdir (which returns
+stat information on the open, and allows setting the mode).
Version 1.46
------------
diff --git a/fs/cifs/README b/fs/cifs/README
index 432e515431c4..080c5eba112b 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -1,5 +1,5 @@
The CIFS VFS support for Linux supports many advanced network filesystem
-features such as heirarchical dfs like namespace, hardlinks, locking and more.
+features such as hierarchical dfs like namespace, hardlinks, locking and more.
It was designed to comply with the SNIA CIFS Technical Reference (which
supersedes the 1992 X/Open SMB Standard) as well as to perform best practice
practical interoperability with Windows 2000, Windows XP, Samba and equivalent
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index fc34c74ec4be..68372946dc92 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -128,3 +128,11 @@ negotiated size) and send larger write sizes to modern servers.
4) More exhaustively test against less common servers. More testing
against Windows 9x, Windows ME servers.
+
+DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
+
+mount check for unmatched uids - and uid override
+
+Add mount option for Linux extension disable per mount, and partial disable per mount (uid off, symlink/fifo/mknod on but what about posix acls?)
+
+Free threads at umount --force that are stuck on the sesSem
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 10c90294cd18..e8287c4c6eb3 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -64,7 +64,7 @@ extern struct task_struct * oplockThread; /* remove sparse warning */
struct task_struct * oplockThread = NULL;
extern struct task_struct * dnotifyThread; /* remove sparse warning */
struct task_struct * dnotifyThread = NULL;
-static struct super_operations cifs_super_ops;
+static const struct super_operations cifs_super_ops;
unsigned int CIFSMaxBufSize = CIFS_MAX_MSGSIZE;
module_param(CIFSMaxBufSize, int, 0);
MODULE_PARM_DESC(CIFSMaxBufSize,"Network buffer size (not including header). Default: 16384 Range: 8192 to 130048");
@@ -453,7 +453,7 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations cifs_super_ops = {
+static const struct super_operations cifs_super_ops = {
.read_inode = cifs_read_inode,
.put_super = cifs_put_super,
.statfs = cifs_statfs,
@@ -511,7 +511,15 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
{
/* origin == SEEK_END => we must revalidate the cached file length */
if (origin == SEEK_END) {
- int retval = cifs_revalidate(file->f_path.dentry);
+ int retval;
+
+ /* some applications poll for the file length in this strange
+ way so we must seek to end on non-oplocked files by
+ setting the revalidate time to zero */
+ if(file->f_path.dentry->d_inode)
+ CIFS_I(file->f_path.dentry->d_inode)->time = 0;
+
+ retval = cifs_revalidate(file->f_path.dentry);
if (retval < 0)
return (loff_t)retval;
}
@@ -525,7 +533,7 @@ static struct file_system_type cifs_fs_type = {
.kill_sb = kill_anon_super,
/* .fs_flags */
};
-struct inode_operations cifs_dir_inode_ops = {
+const struct inode_operations cifs_dir_inode_ops = {
.create = cifs_create,
.lookup = cifs_lookup,
.getattr = cifs_getattr,
@@ -547,7 +555,7 @@ struct inode_operations cifs_dir_inode_ops = {
#endif
};
-struct inode_operations cifs_file_inode_ops = {
+const struct inode_operations cifs_file_inode_ops = {
/* revalidate:cifs_revalidate, */
.setattr = cifs_setattr,
.getattr = cifs_getattr, /* do we need this anymore? */
@@ -561,7 +569,7 @@ struct inode_operations cifs_file_inode_ops = {
#endif
};
-struct inode_operations cifs_symlink_inode_ops = {
+const struct inode_operations cifs_symlink_inode_ops = {
.readlink = generic_readlink,
.follow_link = cifs_follow_link,
.put_link = cifs_put_link,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 8aa66dcf13bd..c97c08eb481a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -36,13 +36,13 @@ extern const struct address_space_operations cifs_addr_ops;
extern const struct address_space_operations cifs_addr_ops_smallbuf;
/* Functions related to super block operations */
-/* extern struct super_operations cifs_super_ops;*/
+/* extern const struct super_operations cifs_super_ops;*/
extern void cifs_read_inode(struct inode *);
extern void cifs_delete_inode(struct inode *);
/* extern void cifs_write_inode(struct inode *); *//* BB not needed yet */
/* Functions related to inodes */
-extern struct inode_operations cifs_dir_inode_ops;
+extern const struct inode_operations cifs_dir_inode_ops;
extern int cifs_create(struct inode *, struct dentry *, int,
struct nameidata *);
extern struct dentry * cifs_lookup(struct inode *, struct dentry *,
@@ -58,8 +58,8 @@ extern int cifs_revalidate(struct dentry *);
extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int cifs_setattr(struct dentry *, struct iattr *);
-extern struct inode_operations cifs_file_inode_ops;
-extern struct inode_operations cifs_symlink_inode_ops;
+extern const struct inode_operations cifs_file_inode_ops;
+extern const struct inode_operations cifs_symlink_inode_ops;
/* Functions related to files and directories */
extern const struct file_operations cifs_file_ops;
@@ -100,5 +100,5 @@ extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t);
extern ssize_t cifs_listxattr(struct dentry *, char *, size_t);
extern int cifs_ioctl (struct inode * inode, struct file * filep,
unsigned int command, unsigned long arg);
-#define CIFS_VERSION "1.47"
+#define CIFS_VERSION "1.48"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 068ef51edbf7..7d9505491b16 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1,7 +1,7 @@
/*
* fs/cifs/cifspdu.h
*
- * Copyright (c) International Business Machines Corp., 2002,2005
+ * Copyright (c) International Business Machines Corp., 2002,2007
* Author(s): Steve French (sfrench@us.ibm.com)
*
* This library is free software; you can redistribute it and/or modify
@@ -544,7 +544,8 @@ typedef union smb_com_session_setup_andx {
/* unsigned char * NativeOS; */
/* unsigned char * NativeLanMan; */
/* unsigned char * PrimaryDomain; */
- } __attribute__((packed)) resp; /* NTLM response with or without extended sec*/
+ } __attribute__((packed)) resp; /* NTLM response
+ (with or without extended sec) */
struct { /* request format */
struct smb_hdr hdr; /* wct = 10 */
@@ -795,6 +796,8 @@ typedef struct smb_com_openx_rsp {
__u16 ByteCount;
} __attribute__((packed)) OPENX_RSP;
+/* For encoding of POSIX Open Request - see trans2 function 0x209 data struct */
+
/* Legacy write request for older servers */
typedef struct smb_com_writex_req {
struct smb_hdr hdr; /* wct = 12 */
@@ -1352,11 +1355,13 @@ struct smb_t2_rsp {
#define SMB_QUERY_FILE_UNIX_BASIC 0x200
#define SMB_QUERY_FILE_UNIX_LINK 0x201
#define SMB_QUERY_POSIX_ACL 0x204
-#define SMB_QUERY_XATTR 0x205
+#define SMB_QUERY_XATTR 0x205 /* e.g. system EA name space */
#define SMB_QUERY_ATTR_FLAGS 0x206 /* append,immutable etc. */
#define SMB_QUERY_POSIX_PERMISSION 0x207
#define SMB_QUERY_POSIX_LOCK 0x208
-/* #define SMB_POSIX_OPEN 0x209 */
+/* #define SMB_POSIX_OPEN 0x209 */
+/* #define SMB_POSIX_UNLINK 0x20a */
+#define SMB_QUERY_FILE__UNIX_INFO2 0x20b
#define SMB_QUERY_FILE_INTERNAL_INFO 0x3ee
#define SMB_QUERY_FILE_ACCESS_INFO 0x3f0
#define SMB_QUERY_FILE_NAME_INFO2 0x3f1 /* 0x30 bytes */
@@ -1377,8 +1382,10 @@ struct smb_t2_rsp {
#define SMB_SET_ATTR_FLAGS 0x206 /* append, immutable etc. */
#define SMB_SET_POSIX_LOCK 0x208
#define SMB_POSIX_OPEN 0x209
+#define SMB_POSIX_UNLINK 0x20a
+#define SMB_SET_FILE_UNIX_INFO2
#define SMB_SET_FILE_BASIC_INFO2 0x3ec
-#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo level too */
+#define SMB_SET_FILE_RENAME_INFORMATION 0x3f2 /* BB check if qpathinfo too */
#define SMB_FILE_ALL_INFO2 0x3fa
#define SMB_SET_FILE_ALLOCATION_INFO2 0x3fb
#define SMB_SET_FILE_END_OF_FILE_INFO2 0x3fc
@@ -1428,7 +1435,7 @@ typedef struct smb_com_transaction2_qpi_rsp {
struct smb_hdr hdr; /* wct = 10 + SetupCount */
struct trans2_resp t2;
__u16 ByteCount;
- __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */
+ __u16 Reserved2; /* parameter word is present for infolevels > 100 */
} __attribute__((packed)) TRANSACTION2_QPI_RSP;
typedef struct smb_com_transaction2_spi_req {
@@ -1461,7 +1468,7 @@ typedef struct smb_com_transaction2_spi_rsp {
struct smb_hdr hdr; /* wct = 10 + SetupCount */
struct trans2_resp t2;
__u16 ByteCount;
- __u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */
+ __u16 Reserved2; /* parameter word is present for infolevels > 100 */
} __attribute__((packed)) TRANSACTION2_SPI_RSP;
struct set_file_rename {
@@ -1627,6 +1634,7 @@ typedef struct smb_com_transaction2_fnext_rsp_parms {
#define SMB_QUERY_FS_ATTRIBUTE_INFO 0x105
#define SMB_QUERY_CIFS_UNIX_INFO 0x200
#define SMB_QUERY_POSIX_FS_INFO 0x201
+#define SMB_QUERY_POSIX_WHO_AM_I 0x202
#define SMB_QUERY_LABEL_INFO 0x3ea
#define SMB_QUERY_FS_QUOTA_INFO 0x3ee
#define SMB_QUERY_FS_FULL_SIZE_INFO 0x3ef
@@ -1659,9 +1667,21 @@ typedef struct smb_com_transaction_qfsi_rsp {
struct smb_hdr hdr; /* wct = 10 + SetupCount */
struct trans2_resp t2;
__u16 ByteCount;
- __u8 Pad; /* may be three bytes *//* followed by data area */
+ __u8 Pad; /* may be three bytes? *//* followed by data area */
} __attribute__((packed)) TRANSACTION2_QFSI_RSP;
+typedef struct whoami_rsp_data { /* Query level 0x202 */
+ __u32 flags; /* 0 = Authenticated user 1 = GUEST */
+ __u32 mask; /* which flags bits server understands ie 0x0001 */
+ __u64 unix_user_id;
+ __u64 unix_user_gid;
+ __u32 number_of_supplementary_gids; /* may be zero */
+ __u32 number_of_sids; /* may be zero */
+ __u32 length_of_sid_array; /* in bytes - may be zero */
+ __u32 pad; /* reserved - MBZ */
+ /* __u64 gid_array[0]; */ /* may be empty */
+ /* __u8 * psid_list */ /* may be empty */
+} __attribute__((packed)) WHOAMI_RSP_DATA;
/* SETFSInfo Levels */
#define SMB_SET_CIFS_UNIX_INFO 0x200
@@ -1858,8 +1878,11 @@ typedef struct {
#define CIFS_UNIX_XATTR_CAP 0x00000004 /* support new namespace */
#define CIFS_UNIX_EXTATTR_CAP 0x00000008 /* support chattr/chflag */
#define CIFS_UNIX_POSIX_PATHNAMES_CAP 0x00000010 /* Allow POSIX path chars */
+#define CIFS_UNIX_POSIX_PATH_OPS_CAP 0x00000020 /* Allow new POSIX path based
+ calls including posix open
+ and posix unlink */
#ifdef CONFIG_CIFS_POSIX
-#define CIFS_UNIX_CAP_MASK 0x0000001b
+#define CIFS_UNIX_CAP_MASK 0x0000003b
#else
#define CIFS_UNIX_CAP_MASK 0x00000013
#endif /* CONFIG_CIFS_POSIX */
@@ -1946,7 +1969,7 @@ typedef struct { /* data block encoding of response to level 263 QPathInfo */
__le32 AlignmentRequirement;
__le32 FileNameLength;
char FileName[1];
-} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */
+} __attribute__((packed)) FILE_ALL_INFO; /* level 0x107 QPathInfo */
/* defines for enumerating possible values of the Unix type field below */
#define UNIX_FILE 0
@@ -1970,11 +1993,11 @@ typedef struct {
__u64 UniqueId;
__le64 Permissions;
__le64 Nlinks;
-} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */
+} __attribute__((packed)) FILE_UNIX_BASIC_INFO; /* level 0x200 QPathInfo */
typedef struct {
char LinkDest[1];
-} __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */
+} __attribute__((packed)) FILE_UNIX_LINK_INFO; /* level 0x201 QPathInfo */
/* The following three structures are needed only for
setting time to NT4 and some older servers via
@@ -2011,7 +2034,7 @@ typedef struct {
__le64 ChangeTime;
__le32 Attributes;
__u32 Pad;
-} __attribute__((packed)) FILE_BASIC_INFO; /* size info, level 0x101 */
+} __attribute__((packed)) FILE_BASIC_INFO; /* size info, level 0x101 */
struct file_allocation_info {
__le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
@@ -2020,7 +2043,7 @@ struct file_allocation_info {
struct file_end_of_file_info {
__le64 FileSize; /* offset to end of file */
-} __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */
+} __attribute__((packed)); /* size info, level 0x104 for set, 0x106 for query */
struct file_alt_name_info {
__u8 alt_name[1];
@@ -2075,6 +2098,19 @@ struct cifs_posix_acl { /* access conrol list (ACL) */
/* end of POSIX ACL definitions */
+typedef struct {
+ __u32 OpenFlags; /* same as NT CreateX */
+ __u32 PosixOpenFlags;
+ __u32 Mode;
+ __u16 Level; /* reply level requested (see QPathInfo levels) */
+ __u16 Pad; /* reserved - MBZ */
+} __attribute__((packed)) OPEN_PSX_REQ; /* level 0x209 SetPathInfo data */
+
+typedef struct {
+ /* reply varies based on requested level */
+} __attribute__((packed)) OPEN_PSX_RSP; /* level 0x209 SetPathInfo data */
+
+
struct file_internal_info {
__u64 UniqueId; /* inode number */
} __attribute__((packed)); /* level 0x3ee */
@@ -2238,7 +2274,8 @@ struct data_blob {
1) PosixCreateX - to set and return the mode, inode#, device info and
perhaps add a CreateDevice - to create Pipes and other special .inodes
Also note POSIX open flags
- 2) Close - to return the last write time to do cache across close more safely
+ 2) Close - to return the last write time to do cache across close
+ more safely
3) FindFirst return unique inode number - what about resume key, two
forms short (matches readdir) and full (enough info to cache inodes)
4) Mkdir - set mode
@@ -2273,7 +2310,8 @@ struct data_blob {
TRANSACTION2 (18 cases)
SMB_SET_FILE_END_OF_FILE_INFO2 SMB_SET_PATH_END_OF_FILE_INFO2
(BB verify that never need to set allocation size)
- SMB_SET_FILE_BASIC_INFO2 (setting times - BB can it be done via Unix ext?)
+ SMB_SET_FILE_BASIC_INFO2 (setting times - BB can it be done via
+ Unix ext?)
COPY (note support for copy across directories) - FUTURE, OPTIONAL
setting/getting OS/2 EAs - FUTURE (BB can this handle
@@ -2293,13 +2331,13 @@ struct data_blob {
T2 QUERY_PATH_INFO (SMB_QUERY_FILE_UNIX_BASIC) - BB check for missing inode fields
Actually need QUERY_FILE_UNIX_INFO since has inode num
BB what about a) blksize/blkbits/blocks
- b) i_version
- c) i_rdev
- d) notify mask?
- e) generation
- f) size_seqcount
+ b) i_version
+ c) i_rdev
+ d) notify mask?
+ e) generation
+ f) size_seqcount
T2 FIND_FIRST/FIND_NEXT FIND_FILE_UNIX
- TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended
+ TRANS2_GET_DFS_REFERRAL - OPTIONAL but recommended
T2_QFS_INFO QueryDevice/AttributeInfo - OPTIONAL
@@ -2338,7 +2376,7 @@ typedef struct file_xattr_info {
__u32 xattr_value_len;
char xattr_name[0];
/* followed by xattr_value[xattr_value_len], no pad */
-} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute, info level 0x205 */
+} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute, info level 0x205 */
/* flags for chattr command */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f1f8225102f0..6148b82170c4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -23,6 +23,7 @@
#include <linux/nls.h>
struct statfs;
+struct smb_vol;
/*
*****************************************************************
@@ -57,7 +58,7 @@ extern int SendReceiveBlockingLock(const unsigned int /* xid */ ,
int * /* bytes returned */);
extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length);
extern int is_valid_oplock_break(struct smb_hdr *smb, struct TCP_Server_Info *);
-extern int is_size_safe_to_change(struct cifsInodeInfo *);
+extern int is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *);
extern unsigned int smbCalcSize(struct smb_hdr *ptr);
extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
@@ -147,6 +148,8 @@ extern int get_dfs_path(int xid, struct cifsSesInfo *pSesInfo,
unsigned int *pnum_referrals,
unsigned char ** preferrals,
int remap);
+extern void reset_cifs_unix_caps(int xid, struct cifsTconInfo *tcon,
+ struct super_block * sb, struct smb_vol * vol);
extern int CIFSSMBQFSInfo(const int xid, struct cifsTconInfo *tcon,
struct kstatfs *FSData);
extern int SMBOldQFSInfo(const int xid, struct cifsTconInfo *tcon,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 472e33e0f3cf..24364106b8f9 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -158,9 +158,15 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
nls_codepage);
if(!rc && (tcon->tidStatus == CifsNeedReconnect)) {
mark_open_files_invalid(tcon);
- rc = CIFSTCon(0, tcon->ses, tcon->treeName, tcon
- , nls_codepage);
+ rc = CIFSTCon(0, tcon->ses, tcon->treeName,
+ tcon, nls_codepage);
up(&tcon->ses->sesSem);
+ /* tell server which Unix caps we support */
+ if (tcon->ses->capabilities & CAP_UNIX)
+ reset_cifs_unix_caps(0 /* no xid */,
+ tcon,
+ NULL /* we do not know sb */,
+ NULL /* no vol info */);
/* BB FIXME add code to check if wsize needs
update due to negotiated smb buffer size
shrinking */
@@ -298,6 +304,12 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon,
rc = CIFSTCon(0, tcon->ses, tcon->treeName,
tcon, nls_codepage);
up(&tcon->ses->sesSem);
+ /* tell server which Unix caps we support */
+ if (tcon->ses->capabilities & CAP_UNIX)
+ reset_cifs_unix_caps(0 /* no xid */,
+ tcon,
+ NULL /* do not know sb */,
+ NULL /* no vol info */);
/* BB FIXME add code to check if wsize needs
update due to negotiated smb buffer size
shrinking */
@@ -2812,10 +2824,10 @@ GetExtAttrOut:
/* security id for everyone */
-const static struct cifs_sid sid_everyone =
+static const struct cifs_sid sid_everyone =
{1, 1, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0}};
/* group users */
-const static struct cifs_sid sid_user =
+static const struct cifs_sid sid_user =
{1, 2 , {0, 0, 0, 0, 0, 5}, {32, 545, 0, 0}};
/* Convert CIFS ACL to POSIX form */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 2caca06b4bae..20ba7dcc9959 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1613,6 +1613,76 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
return rc;
}
+void reset_cifs_unix_caps(int xid, struct cifsTconInfo * tcon,
+ struct super_block * sb, struct smb_vol * vol_info)
+{
+ /* if we are reconnecting then should we check to see if
+ * any requested capabilities changed locally e.g. via
+ * remount but we can not do much about it here
+ * if they have (even if we could detect it by the following)
+ * Perhaps we could add a backpointer to array of sb from tcon
+ * or if we change to make all sb to same share the same
+ * sb as NFS - then we only have one backpointer to sb.
+ * What if we wanted to mount the server share twice once with
+ * and once without posixacls or posix paths? */
+ __u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
+
+
+ if(!CIFSSMBQFSUnixInfo(xid, tcon)) {
+ __u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
+
+ /* check for reconnect case in which we do not
+ want to change the mount behavior if we can avoid it */
+ if(vol_info == NULL) {
+ /* turn off POSIX ACL and PATHNAMES if not set
+ originally at mount time */
+ if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0)
+ cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
+ if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0)
+ cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
+
+
+
+
+ }
+
+ cap &= CIFS_UNIX_CAP_MASK;
+ if(vol_info && vol_info->no_psx_acl)
+ cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
+ else if(CIFS_UNIX_POSIX_ACL_CAP & cap) {
+ cFYI(1,("negotiated posix acl support"));
+ if(sb)
+ sb->s_flags |= MS_POSIXACL;
+ }
+
+ if(vol_info && vol_info->posix_paths == 0)
+ cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
+ else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
+ cFYI(1,("negotiate posix pathnames"));
+ if(sb)
+ CIFS_SB(sb)->mnt_cifs_flags |=
+ CIFS_MOUNT_POSIX_PATHS;
+ }
+
+ cFYI(1,("Negotiate caps 0x%x",(int)cap));
+#ifdef CONFIG_CIFS_DEBUG2
+ if(cap & CIFS_UNIX_FCNTL_CAP)
+ cFYI(1,("FCNTL cap"));
+ if(cap & CIFS_UNIX_EXTATTR_CAP)
+ cFYI(1,("EXTATTR cap"));
+ if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
+ cFYI(1,("POSIX path cap"));
+ if(cap & CIFS_UNIX_XATTR_CAP)
+ cFYI(1,("XATTR cap"));
+ if(cap & CIFS_UNIX_POSIX_ACL_CAP)
+ cFYI(1,("POSIX ACL cap"));
+#endif /* CIFS_DEBUG2 */
+ if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
+ cFYI(1,("setting capabilities failed"));
+ }
+ }
+}
+
int
cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
char *mount_data, const char *devname)
@@ -1928,20 +1998,25 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
if (tcon == NULL)
rc = -ENOMEM;
else {
- /* check for null share name ie connect to dfs root */
+ /* check for null share name ie connecting to
+ * dfs root */
- /* BB check if this works for exactly length three strings */
+ /* BB check if this works for exactly length
+ * three strings */
if ((strchr(volume_info.UNC + 3, '\\') == NULL)
&& (strchr(volume_info.UNC + 3, '/') ==
NULL)) {
rc = connect_to_dfs_path(xid, pSesInfo,
- "", cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ "", cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
kfree(volume_info.UNC);
FreeXid(xid);
return -ENODEV;
} else {
+ /* BB Do we need to wrap sesSem around
+ * this TCon call and Unix SetFS as
+ * we do on SessSetup and reconnect? */
rc = CIFSTCon(xid, pSesInfo,
volume_info.UNC,
tcon, cifs_sb->local_nls);
@@ -1962,6 +2037,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
sb->s_maxbytes = (u64) 1 << 31; /* 2 GB */
}
+ /* BB FIXME fix time_gran to be larger for LANMAN sessions */
sb->s_time_gran = 100;
/* on error free sesinfo and tcon struct if needed */
@@ -2006,45 +2082,11 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
/* do not care if following two calls succeed - informational */
CIFSSMBQFSDeviceInfo(xid, tcon);
CIFSSMBQFSAttributeInfo(xid, tcon);
-
- if (tcon->ses->capabilities & CAP_UNIX) {
- if(!CIFSSMBQFSUnixInfo(xid, tcon)) {
- __u64 cap =
- le64_to_cpu(tcon->fsUnixInfo.Capability);
- cap &= CIFS_UNIX_CAP_MASK;
- if(volume_info.no_psx_acl)
- cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
- else if(CIFS_UNIX_POSIX_ACL_CAP & cap) {
- cFYI(1,("negotiated posix acl support"));
- sb->s_flags |= MS_POSIXACL;
- }
-
- if(volume_info.posix_paths == 0)
- cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
- else if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
- cFYI(1,("negotiate posix pathnames"));
- cifs_sb->mnt_cifs_flags |=
- CIFS_MOUNT_POSIX_PATHS;
- }
-
- cFYI(1,("Negotiate caps 0x%x",(int)cap));
-#ifdef CONFIG_CIFS_DEBUG2
- if(cap & CIFS_UNIX_FCNTL_CAP)
- cFYI(1,("FCNTL cap"));
- if(cap & CIFS_UNIX_EXTATTR_CAP)
- cFYI(1,("EXTATTR cap"));
- if(cap & CIFS_UNIX_POSIX_PATHNAMES_CAP)
- cFYI(1,("POSIX path cap"));
- if(cap & CIFS_UNIX_XATTR_CAP)
- cFYI(1,("XATTR cap"));
- if(cap & CIFS_UNIX_POSIX_ACL_CAP)
- cFYI(1,("POSIX ACL cap"));
-#endif /* CIFS_DEBUG2 */
- if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
- cFYI(1,("setting capabilities failed"));
- }
- }
- }
+
+ /* tell server which Unix caps we support */
+ if (tcon->ses->capabilities & CAP_UNIX)
+ reset_cifs_unix_caps(xid, tcon, sb, &volume_info);
+
if (!(tcon->ses->capabilities & CAP_LARGE_WRITE_X))
cifs_sb->wsize = min(cifs_sb->wsize,
(tcon->ses->server->maxBuf -
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8a49b2e77d37..07ff9351e9ee 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1146,7 +1146,7 @@ static int cifs_writepages(struct address_space *mapping,
pgoff_t end;
pgoff_t index;
int range_whole = 0;
- struct kvec iov[32];
+ struct kvec * iov;
int len;
int n_iov = 0;
pgoff_t next;
@@ -1171,15 +1171,21 @@ static int cifs_writepages(struct address_space *mapping,
if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
if(cifs_sb->tcon->ses->server->secMode &
(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
- if(!experimEnabled)
+ if(!experimEnabled)
return generic_writepages(mapping, wbc);
+ iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
+ if(iov == NULL)
+ return generic_writepages(mapping, wbc);
+
+
/*
* BB: Is this meaningful for a non-block-device file system?
* If it is, we should test it again after we do I/O
*/
if (wbc->nonblocking && bdi_write_congested(bdi)) {
wbc->encountered_congestion = 1;
+ kfree(iov);
return 0;
}
@@ -1345,7 +1351,7 @@ retry:
mapping->writeback_index = index;
FreeXid(xid);
-
+ kfree(iov);
return rc;
}
@@ -1948,7 +1954,7 @@ static int cifs_readpage(struct file *file, struct page *page)
refreshing the inode only on increases in the file size
but this is tricky to do without racing with writebehind
page caching in the current Linux kernel design */
-int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
+int is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
{
struct cifsFileInfo *open_file = NULL;
@@ -1970,6 +1976,9 @@ int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
return 1;
}
+ if(i_size_read(&cifsInode->vfs_inode) < end_of_file)
+ return 1;
+
return 0;
} else
return 1;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index c4fa91b8b62f..3f5bc83dc3d1 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -140,7 +140,7 @@ int cifs_get_inode_info_unix(struct inode **pinode,
inode->i_gid = le64_to_cpu(findData.Gid);
inode->i_nlink = le64_to_cpu(findData.Nlinks);
- if (is_size_safe_to_change(cifsInfo)) {
+ if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/* can not safely change the file size here if the
client is writing to it due to potential races */
@@ -491,8 +491,8 @@ int cifs_get_inode_info(struct inode **pinode,
/* BB add code here -
validate if device or weird share or device type? */
}
- if (is_size_safe_to_change(cifsInfo)) {
- /* can not safely change the file size here if the
+ if (is_size_safe_to_change(cifsInfo, le64_to_cpu(pfindData->EndOfFile))) {
+ /* can not safely shrink the file size here if the
client is writing to it due to potential races */
i_size_write(inode,le64_to_cpu(pfindData->EndOfFile));
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 99dfb5337e31..c6220bd27165 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -156,9 +156,9 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
tmp_inode->i_atime = cnvrtDosUnixTm(
le16_to_cpu(pfindData->LastAccessDate),
le16_to_cpu(pfindData->LastAccessTime));
- tmp_inode->i_ctime = cnvrtDosUnixTm(
- le16_to_cpu(pfindData->LastWriteDate),
- le16_to_cpu(pfindData->LastWriteTime));
+ tmp_inode->i_ctime = cnvrtDosUnixTm(
+ le16_to_cpu(pfindData->LastWriteDate),
+ le16_to_cpu(pfindData->LastWriteTime));
AdjustForTZ(cifs_sb->tcon, tmp_inode);
attr = le16_to_cpu(pfindData->Attributes);
allocation_size = le32_to_cpu(pfindData->AllocationSize);
@@ -222,7 +222,7 @@ static void fill_in_inode(struct inode *tmp_inode, int new_buf_type,
atomic_set(&cifsInfo->inUse, 1);
}
- if (is_size_safe_to_change(cifsInfo)) {
+ if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/* can not safely change the file size here if the
client is writing to it due to potential races */
i_size_write(tmp_inode, end_of_file);
@@ -351,10 +351,10 @@ static void unix_fill_in_inode(struct inode *tmp_inode,
tmp_inode->i_gid = le64_to_cpu(pfindData->Gid);
tmp_inode->i_nlink = le64_to_cpu(pfindData->Nlinks);
- if (is_size_safe_to_change(cifsInfo)) {
+ if (is_size_safe_to_change(cifsInfo, end_of_file)) {
/* can not safely change the file size here if the
client is writing to it due to potential races */
- i_size_write(tmp_inode,end_of_file);
+ i_size_write(tmp_inode, end_of_file);
/* 512 bytes (2**9) is the fake blocksize that must be used */
/* for this calculation, not the real blocksize */
diff --git a/fs/cifs/smbdes.c b/fs/cifs/smbdes.c
index 7a1b2b961ec8..1b1daf63f062 100644
--- a/fs/cifs/smbdes.c
+++ b/fs/cifs/smbdes.c
@@ -196,7 +196,7 @@ dohash(char *out, char *in, char *key, int forw)
char c[28];
char d[28];
char *cd;
- char ki[16][48];
+ char (*ki)[48];
char *pd1;
char l[32], r[32];
char *rl;
@@ -206,6 +206,12 @@ dohash(char *out, char *in, char *key, int forw)
if(pk1 == NULL)
return;
+ ki = kmalloc(16*48, GFP_KERNEL);
+ if(ki == NULL) {
+ kfree(pk1);
+ return;
+ }
+
cd = pk1 + 56;
pd1= cd + 56;
rl = pd1 + 64;
@@ -243,6 +249,7 @@ dohash(char *out, char *in, char *key, int forw)
er = kmalloc(48+48+32+32+32, GFP_KERNEL);
if(er == NULL) {
kfree(pk1);
+ kfree(ki);
return;
}
erk = er+48;
@@ -290,6 +297,7 @@ dohash(char *out, char *in, char *key, int forw)
permute(out, rl, perm6, 64);
kfree(pk1);
+ kfree(ki);
}
static void
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c
index 4c9fecbfa91f..28c872747f81 100644
--- a/fs/coda/cnode.c
+++ b/fs/coda/cnode.c
@@ -16,7 +16,7 @@ static inline int coda_fideq(struct CodaFid *fid1, struct CodaFid *fid2)
return memcmp(fid1, fid2, sizeof(*fid1)) == 0;
}
-static struct inode_operations coda_symlink_inode_operations = {
+static const struct inode_operations coda_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 0c6f7f3b3dd7..9ddf5ed62162 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -66,7 +66,7 @@ static struct dentry_operations coda_dentry_operations =
.d_delete = coda_dentry_delete,
};
-struct inode_operations coda_dir_inode_operations =
+const struct inode_operations coda_dir_inode_operations =
{
.create = coda_create,
.lookup = coda_lookup,
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 01395defed85..614175a3b02e 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -90,7 +90,7 @@ static int coda_remount(struct super_block *sb, int *flags, char *data)
}
/* exported operations */
-static struct super_operations coda_super_operations =
+static const struct super_operations coda_super_operations =
{
.alloc_inode = coda_alloc_inode,
.destroy_inode = coda_destroy_inode,
@@ -271,7 +271,7 @@ int coda_setattr(struct dentry *de, struct iattr *iattr)
return error;
}
-struct inode_operations coda_file_inode_operations = {
+const struct inode_operations coda_file_inode_operations = {
.permission = coda_permission,
.getattr = coda_getattr,
.setattr = coda_setattr,
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index 214822be87bd..2bf3026adc80 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -30,7 +30,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
unsigned int cmd, unsigned long user_data);
/* exported from this file */
-struct inode_operations coda_ioctl_inode_operations =
+const struct inode_operations coda_ioctl_inode_operations =
{
.permission = coda_ioctl_permission,
.setattr = coda_setattr,
diff --git a/fs/coda/sysctl.c b/fs/coda/sysctl.c
index 1c82e9a7d7c8..c57a1fa7cf23 100644
--- a/fs/coda/sysctl.c
+++ b/fs/coda/sysctl.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/ctype.h>
@@ -32,8 +33,6 @@
static struct ctl_table_header *fs_table_header;
-#define FS_CODA 1 /* Coda file system */
-
#define CODA_TIMEOUT 3 /* timeout on upcalls to become intrble */
#define CODA_HARD 5 /* mount type "hard" or "soft" */
#define CODA_VFS 6 /* vfs statistics */
@@ -84,15 +83,11 @@ static int do_reset_coda_cache_inv_stats( ctl_table * table, int write,
return 0;
}
-static int coda_vfs_stats_get_info( char * buffer, char ** start,
- off_t offset, int length)
+static int proc_vfs_stats_show(struct seq_file *m, void *v)
{
- int len=0;
- off_t begin;
struct coda_vfs_stats * ps = & coda_vfs_stat;
- /* this works as long as we are below 1024 characters! */
- len += sprintf( buffer,
+ seq_printf(m,
"Coda VFS statistics\n"
"===================\n\n"
"File Operations:\n"
@@ -132,28 +127,14 @@ static int coda_vfs_stats_get_info( char * buffer, char ** start,
ps->rmdir,
ps->rename,
ps->permission);
-
- begin = offset;
- *start = buffer + begin;
- len -= begin;
-
- if ( len > length )
- len = length;
- if ( len < 0 )
- len = 0;
-
- return len;
+ return 0;
}
-static int coda_cache_inv_stats_get_info( char * buffer, char ** start,
- off_t offset, int length)
+static int proc_cache_inv_stats_show(struct seq_file *m, void *v)
{
- int len=0;
- off_t begin;
struct coda_cache_inv_stats * ps = & coda_cache_inv_stat;
- /* this works as long as we are below 1024 characters! */
- len += sprintf( buffer,
+ seq_printf(m,
"Coda cache invalidation statistics\n"
"==================================\n\n"
"flush\t\t%9d\n"
@@ -170,31 +151,87 @@ static int coda_cache_inv_stats_get_info( char * buffer, char ** start,
ps->zap_vnode,
ps->purge_fid,
ps->replace );
-
- begin = offset;
- *start = buffer + begin;
- len -= begin;
+ return 0;
+}
- if ( len > length )
- len = length;
- if ( len < 0 )
- len = 0;
+static int proc_vfs_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_vfs_stats_show, NULL);
+}
- return len;
+static int proc_cache_inv_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_cache_inv_stats_show, NULL);
}
+static const struct file_operations proc_vfs_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = proc_vfs_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations proc_cache_inv_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = proc_cache_inv_stats_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
static ctl_table coda_table[] = {
- {CODA_TIMEOUT, "timeout", &coda_timeout, sizeof(int), 0644, NULL, &proc_dointvec},
- {CODA_HARD, "hard", &coda_hard, sizeof(int), 0644, NULL, &proc_dointvec},
- {CODA_VFS, "vfs_stats", NULL, 0, 0644, NULL, &do_reset_coda_vfs_stats},
- {CODA_CACHE_INV, "cache_inv_stats", NULL, 0, 0644, NULL, &do_reset_coda_cache_inv_stats},
- {CODA_FAKE_STATFS, "fake_statfs", &coda_fake_statfs, sizeof(int), 0600, NULL, &proc_dointvec},
- { 0 }
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "timeout",
+ .data = &coda_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "hard",
+ .data = &coda_hard,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "vfs_stats",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0644,
+ .proc_handler = &do_reset_coda_vfs_stats
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "cache_inv_stats",
+ .data = NULL,
+ .maxlen = 0,
+ .mode = 0644,
+ .proc_handler = &do_reset_coda_cache_inv_stats
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "fake_statfs",
+ .data = &coda_fake_statfs,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = &proc_dointvec
+ },
+ {}
};
static ctl_table fs_table[] = {
- {FS_CODA, "coda", NULL, 0, 0555, coda_table},
- {0}
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "coda",
+ .mode = 0555,
+ .child = coda_table
+ },
+ {}
};
@@ -212,9 +249,6 @@ static struct proc_dir_entry* proc_fs_coda;
#endif
-#define coda_proc_create(name,get_info) \
- create_proc_info_entry(name, 0, proc_fs_coda, get_info)
-
void coda_sysctl_init(void)
{
reset_coda_vfs_stats();
@@ -223,15 +257,21 @@ void coda_sysctl_init(void)
#ifdef CONFIG_PROC_FS
proc_fs_coda = proc_mkdir("coda", proc_root_fs);
if (proc_fs_coda) {
+ struct proc_dir_entry *pde;
+
proc_fs_coda->owner = THIS_MODULE;
- coda_proc_create("vfs_stats", coda_vfs_stats_get_info);
- coda_proc_create("cache_inv_stats", coda_cache_inv_stats_get_info);
+ pde = create_proc_entry("vfs_stats", 0, proc_fs_coda);
+ if (pde)
+ pde->proc_fops = &proc_vfs_stats_fops;
+ pde = create_proc_entry("cache_inv_stats", 0, proc_fs_coda);
+ if (pde)
+ pde->proc_fops = &proc_cache_inv_stats_fops;
}
#endif
#ifdef CONFIG_SYSCTL
if ( !fs_table_header )
- fs_table_header = register_sysctl_table(fs_table, 0);
+ fs_table_header = register_sysctl_table(fs_table);
#endif
}
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index f92cd303d2c9..7b48c034b312 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -75,8 +75,8 @@ extern struct super_block * configfs_sb;
extern const struct file_operations configfs_dir_operations;
extern const struct file_operations configfs_file_operations;
extern const struct file_operations bin_fops;
-extern struct inode_operations configfs_dir_inode_operations;
-extern struct inode_operations configfs_symlink_inode_operations;
+extern const struct inode_operations configfs_dir_inode_operations;
+extern const struct inode_operations configfs_symlink_inode_operations;
extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
const char *symname);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 1814ba446809..34750d5e4ff2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -72,11 +72,10 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
{
struct configfs_dirent * sd;
- sd = kmem_cache_alloc(configfs_dir_cachep, GFP_KERNEL);
+ sd = kmem_cache_zalloc(configfs_dir_cachep, GFP_KERNEL);
if (!sd)
return NULL;
- memset(sd, 0, sizeof(*sd));
atomic_set(&sd->s_count, 1);
INIT_LIST_HEAD(&sd->s_links);
INIT_LIST_HEAD(&sd->s_children);
@@ -931,7 +930,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
-struct inode_operations configfs_dir_inode_operations = {
+const struct inode_operations configfs_dir_inode_operations = {
.mkdir = configfs_mkdir,
.rmdir = configfs_rmdir,
.symlink = configfs_symlink,
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 2a7cb086e80c..d98be5e01328 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -162,14 +162,17 @@ fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size
int error;
if (!buffer->page)
- buffer->page = (char *)get_zeroed_page(GFP_KERNEL);
+ buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0);
if (!buffer->page)
return -ENOMEM;
- if (count > PAGE_SIZE)
- count = PAGE_SIZE;
+ if (count >= PAGE_SIZE)
+ count = PAGE_SIZE - 1;
error = copy_from_user(buffer->page,buf,count);
buffer->needs_read_fill = 1;
+ /* if buf is assumed to contain a string, terminate it by \0,
+ * so e.g. sscanf() can scan the string easily */
+ buffer->page[count] = 0;
return error ? -EFAULT : count;
}
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index fb18917954a9..2ec9beac17cf 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -49,7 +49,7 @@ static struct backing_dev_info configfs_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};
-static struct inode_operations configfs_inode_operations ={
+static const struct inode_operations configfs_inode_operations ={
.setattr = configfs_setattr,
};
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ed678529ebb2..6f573004cd7d 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -41,7 +41,7 @@ struct super_block * configfs_sb = NULL;
struct kmem_cache *configfs_dir_cachep;
static int configfs_mnt_count = 0;
-static struct super_operations configfs_ops = {
+static const struct super_operations configfs_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
};
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index fb65e0800a86..22700d2857da 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -272,7 +272,7 @@ static void configfs_put_link(struct dentry *dentry, struct nameidata *nd,
}
}
-struct inode_operations configfs_symlink_inode_operations = {
+const struct inode_operations configfs_symlink_inode_operations = {
.follow_link = configfs_follow_link,
.readlink = generic_readlink,
.put_link = configfs_put_link,
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 6db03fb089dc..facd0c89be8f 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -27,8 +27,8 @@
#include <asm/uaccess.h>
-static struct super_operations cramfs_ops;
-static struct inode_operations cramfs_dir_inode_operations;
+static const struct super_operations cramfs_ops;
+static const struct inode_operations cramfs_dir_inode_operations;
static const struct file_operations cramfs_directory_operations;
static const struct address_space_operations cramfs_aops;
@@ -518,11 +518,11 @@ static const struct file_operations cramfs_directory_operations = {
.readdir = cramfs_readdir,
};
-static struct inode_operations cramfs_dir_inode_operations = {
+static const struct inode_operations cramfs_dir_inode_operations = {
.lookup = cramfs_lookup,
};
-static struct super_operations cramfs_ops = {
+static const struct super_operations cramfs_ops = {
.put_super = cramfs_put_super,
.remount_fs = cramfs_remount,
.statfs = cramfs_statfs,
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index bf3901ab1744..682f928b7f4d 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -16,6 +16,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/namei.h>
#include <linux/debugfs.h>
static ssize_t default_read_file(struct file *file, char __user *buf,
@@ -44,6 +45,17 @@ const struct file_operations debugfs_file_operations = {
.open = default_open,
};
+static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ nd_set_link(nd, dentry->d_inode->i_private);
+ return NULL;
+}
+
+const struct inode_operations debugfs_link_operations = {
+ .readlink = generic_readlink,
+ .follow_link = debugfs_follow_link,
+};
+
static void debugfs_u8_set(void *data, u64 val)
{
*(u8 *)data = val;
@@ -254,7 +266,7 @@ static ssize_t read_file_blob(struct file *file, char __user *user_buf,
blob->size);
}
-static struct file_operations fops_blob = {
+static const struct file_operations fops_blob = {
.read = read_file_blob,
.open = default_open,
};
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c692487346ea..7b324cfebcb1 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -25,11 +25,13 @@
#include <linux/namei.h>
#include <linux/debugfs.h>
#include <linux/fsnotify.h>
+#include <linux/string.h>
#define DEBUGFS_MAGIC 0x64626720
/* declared over in file.c */
extern struct file_operations debugfs_file_operations;
+extern struct inode_operations debugfs_link_operations;
static struct vfsmount *debugfs_mount;
static int debugfs_mount_count;
@@ -51,6 +53,9 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
case S_IFREG:
inode->i_fop = &debugfs_file_operations;
break;
+ case S_IFLNK:
+ inode->i_op = &debugfs_link_operations;
+ break;
case S_IFDIR:
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
@@ -96,6 +101,12 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return res;
}
+static int debugfs_link(struct inode *dir, struct dentry *dentry, int mode)
+{
+ mode = (mode & S_IALLUGO) | S_IFLNK;
+ return debugfs_mknod(dir, dentry, mode, 0);
+}
+
static int debugfs_create(struct inode *dir, struct dentry *dentry, int mode)
{
int res;
@@ -158,10 +169,17 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
mutex_lock(&parent->d_inode->i_mutex);
*dentry = lookup_one_len(name, parent, strlen(name));
if (!IS_ERR(*dentry)) {
- if ((mode & S_IFMT) == S_IFDIR)
+ switch (mode & S_IFMT) {
+ case S_IFDIR:
error = debugfs_mkdir(parent->d_inode, *dentry, mode);
- else
+ break;
+ case S_IFLNK:
+ error = debugfs_link(parent->d_inode, *dentry, mode);
+ break;
+ default:
error = debugfs_create(parent->d_inode, *dentry, mode);
+ break;
+ }
dput(*dentry);
} else
error = PTR_ERR(*dentry);
@@ -194,9 +212,7 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
* you are responsible here.) If an error occurs, %NULL will be returned.
*
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * returned.
*/
struct dentry *debugfs_create_file(const char *name, mode_t mode,
struct dentry *parent, void *data,
@@ -246,9 +262,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file);
* you are responsible here.) If an error occurs, %NULL will be returned.
*
* If debugfs is not enabled in the kernel, the value -%ENODEV will be
- * returned. It is not wise to check for this value, but rather, check for
- * %NULL or !%NULL instead as to eliminate the need for #ifdef in the calling
- * code.
+ * returned.
*/
struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
{
@@ -259,6 +273,47 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent)
EXPORT_SYMBOL_GPL(debugfs_create_dir);
/**
+ * debugfs_create_symlink- create a symbolic link in the debugfs filesystem
+ * @name: a pointer to a string containing the name of the symbolic link to
+ * create.
+ * @parent: a pointer to the parent dentry for this symbolic link. This
+ * should be a directory dentry if set. If this paramater is NULL,
+ * then the symbolic link will be created in the root of the debugfs
+ * filesystem.
+ * @target: a pointer to a string containing the path to the target of the
+ * symbolic link.
+ *
+ * This function creates a symbolic link with the given name in debugfs that
+ * links to the given target path.
+ *
+ * This function will return a pointer to a dentry if it succeeds. This
+ * pointer must be passed to the debugfs_remove() function when the symbolic
+ * link is to be removed (no automatic cleanup happens if your module is
+ * unloaded, you are responsible here.) If an error occurs, %NULL will be
+ * returned.
+ *
+ * If debugfs is not enabled in the kernel, the value -%ENODEV will be
+ * returned.
+ */
+struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
+ const char *target)
+{
+ struct dentry *result;
+ char *link;
+
+ link = kstrdup(target, GFP_KERNEL);
+ if (!link)
+ return NULL;
+
+ result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link,
+ NULL);
+ if (!result)
+ kfree(link);
+ return result;
+}
+EXPORT_SYMBOL_GPL(debugfs_create_symlink);
+
+/**
* debugfs_remove - removes a file or directory from the debugfs filesystem
* @dentry: a pointer to a the dentry of the file or directory to be
* removed.
@@ -287,15 +342,22 @@ void debugfs_remove(struct dentry *dentry)
if (debugfs_positive(dentry)) {
if (dentry->d_inode) {
dget(dentry);
- if (S_ISDIR(dentry->d_inode->i_mode)) {
+ switch (dentry->d_inode->i_mode & S_IFMT) {
+ case S_IFDIR:
ret = simple_rmdir(parent->d_inode, dentry);
if (ret)
printk(KERN_ERR
"DebugFS rmdir on %s failed : "
"directory not empty.\n",
dentry->d_name.name);
- } else
+ break;
+ case S_IFLNK:
+ kfree(dentry->d_inode->i_private);
+ /* fall through */
+ default:
simple_unlink(parent->d_inode, dentry);
+ break;
+ }
if (!ret)
d_delete(dentry);
dput(dentry);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 5f7b5a6025bf..643e57b622bd 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -91,7 +91,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations devpts_sops = {
+static const struct super_operations devpts_sops = {
.statfs = simple_statfs,
.remount_fs = devpts_remount,
};
diff --git a/fs/dlm/Kconfig b/fs/dlm/Kconfig
index b5654a284fef..6fa7b0d5c043 100644
--- a/fs/dlm/Kconfig
+++ b/fs/dlm/Kconfig
@@ -3,21 +3,21 @@ menu "Distributed Lock Manager"
config DLM
tristate "Distributed Lock Manager (DLM)"
- depends on IPV6 || IPV6=n
+ depends on SYSFS && (IPV6 || IPV6=n)
select CONFIGFS_FS
select IP_SCTP if DLM_SCTP
help
- A general purpose distributed lock manager for kernel or userspace
- applications.
+ A general purpose distributed lock manager for kernel or userspace
+ applications.
choice
prompt "Select DLM communications protocol"
depends on DLM
default DLM_TCP
help
- The DLM Can use TCP or SCTP for it's network communications.
- SCTP supports multi-homed operations whereas TCP doesn't.
- However, SCTP seems to have stability problems at the moment.
+ The DLM Can use TCP or SCTP for it's network communications.
+ SCTP supports multi-homed operations whereas TCP doesn't.
+ However, SCTP seems to have stability problems at the moment.
config DLM_TCP
bool "TCP/IP"
@@ -31,8 +31,8 @@ config DLM_DEBUG
bool "DLM debugging"
depends on DLM
help
- Under the debugfs mount point, the name of each lockspace will
- appear as a file in the "dlm" directory. The output is the
- list of resource and locks the local node knows about.
+ Under the debugfs mount point, the name of each lockspace will
+ appear as a file in the "dlm" directory. The output is the
+ list of resource and locks the local node knows about.
endmenu
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 88553054bbfa..8665c88e5af2 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -54,6 +54,11 @@ static struct config_item *make_node(struct config_group *, const char *);
static void drop_node(struct config_group *, struct config_item *);
static void release_node(struct config_item *);
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf);
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len);
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf);
static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a,
@@ -73,6 +78,101 @@ static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len);
static ssize_t node_weight_read(struct node *nd, char *buf);
static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len);
+struct cluster {
+ struct config_group group;
+ unsigned int cl_tcp_port;
+ unsigned int cl_buffer_size;
+ unsigned int cl_rsbtbl_size;
+ unsigned int cl_lkbtbl_size;
+ unsigned int cl_dirtbl_size;
+ unsigned int cl_recover_timer;
+ unsigned int cl_toss_secs;
+ unsigned int cl_scan_secs;
+ unsigned int cl_log_debug;
+};
+
+enum {
+ CLUSTER_ATTR_TCP_PORT = 0,
+ CLUSTER_ATTR_BUFFER_SIZE,
+ CLUSTER_ATTR_RSBTBL_SIZE,
+ CLUSTER_ATTR_LKBTBL_SIZE,
+ CLUSTER_ATTR_DIRTBL_SIZE,
+ CLUSTER_ATTR_RECOVER_TIMER,
+ CLUSTER_ATTR_TOSS_SECS,
+ CLUSTER_ATTR_SCAN_SECS,
+ CLUSTER_ATTR_LOG_DEBUG,
+};
+
+struct cluster_attribute {
+ struct configfs_attribute attr;
+ ssize_t (*show)(struct cluster *, char *);
+ ssize_t (*store)(struct cluster *, const char *, size_t);
+};
+
+static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field,
+ unsigned int *info_field, int check_zero,
+ const char *buf, size_t len)
+{
+ unsigned int x;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ x = simple_strtoul(buf, NULL, 0);
+
+ if (check_zero && !x)
+ return -EINVAL;
+
+ *cl_field = x;
+ *info_field = x;
+
+ return len;
+}
+
+#define __CONFIGFS_ATTR(_name,_mode,_read,_write) { \
+ .attr = { .ca_name = __stringify(_name), \
+ .ca_mode = _mode, \
+ .ca_owner = THIS_MODULE }, \
+ .show = _read, \
+ .store = _write, \
+}
+
+#define CLUSTER_ATTR(name, check_zero) \
+static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \
+{ \
+ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \
+ check_zero, buf, len); \
+} \
+static ssize_t name##_read(struct cluster *cl, char *buf) \
+{ \
+ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \
+} \
+static struct cluster_attribute cluster_attr_##name = \
+__CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
+
+CLUSTER_ATTR(tcp_port, 1);
+CLUSTER_ATTR(buffer_size, 1);
+CLUSTER_ATTR(rsbtbl_size, 1);
+CLUSTER_ATTR(lkbtbl_size, 1);
+CLUSTER_ATTR(dirtbl_size, 1);
+CLUSTER_ATTR(recover_timer, 1);
+CLUSTER_ATTR(toss_secs, 1);
+CLUSTER_ATTR(scan_secs, 1);
+CLUSTER_ATTR(log_debug, 0);
+
+static struct configfs_attribute *cluster_attrs[] = {
+ [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
+ [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
+ [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
+ [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
+ [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
+ [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
+ [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
+ [CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
+ [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
+ NULL,
+};
+
enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
@@ -152,10 +252,6 @@ struct clusters {
struct configfs_subsystem subsys;
};
-struct cluster {
- struct config_group group;
-};
-
struct spaces {
struct config_group ss_group;
};
@@ -197,6 +293,8 @@ static struct configfs_group_operations clusters_ops = {
static struct configfs_item_operations cluster_ops = {
.release = release_cluster,
+ .show_attribute = show_cluster,
+ .store_attribute = store_cluster,
};
static struct configfs_group_operations spaces_ops = {
@@ -237,6 +335,7 @@ static struct config_item_type clusters_type = {
static struct config_item_type cluster_type = {
.ct_item_ops = &cluster_ops,
+ .ct_attrs = cluster_attrs,
.ct_owner = THIS_MODULE,
};
@@ -317,6 +416,16 @@ static struct config_group *make_cluster(struct config_group *g,
cl->group.default_groups[1] = &cms->cs_group;
cl->group.default_groups[2] = NULL;
+ cl->cl_tcp_port = dlm_config.ci_tcp_port;
+ cl->cl_buffer_size = dlm_config.ci_buffer_size;
+ cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
+ cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
+ cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
+ cl->cl_recover_timer = dlm_config.ci_recover_timer;
+ cl->cl_toss_secs = dlm_config.ci_toss_secs;
+ cl->cl_scan_secs = dlm_config.ci_scan_secs;
+ cl->cl_log_debug = dlm_config.ci_log_debug;
+
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
return &cl->group;
@@ -509,6 +618,25 @@ void dlm_config_exit(void)
* Functions for user space to read/write attributes
*/
+static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a,
+ char *buf)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->show ? cla->show(cl, buf) : 0;
+}
+
+static ssize_t store_cluster(struct config_item *i,
+ struct configfs_attribute *a,
+ const char *buf, size_t len)
+{
+ struct cluster *cl = to_cluster(i);
+ struct cluster_attribute *cla =
+ container_of(a, struct cluster_attribute, attr);
+ return cla->store ? cla->store(cl, buf, len) : -EINVAL;
+}
+
static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
@@ -775,15 +903,17 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
#define DEFAULT_SCAN_SECS 5
+#define DEFAULT_LOG_DEBUG 0
struct dlm_config_info dlm_config = {
- .tcp_port = DEFAULT_TCP_PORT,
- .buffer_size = DEFAULT_BUFFER_SIZE,
- .rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .lkbtbl_size = DEFAULT_LKBTBL_SIZE,
- .dirtbl_size = DEFAULT_DIRTBL_SIZE,
- .recover_timer = DEFAULT_RECOVER_TIMER,
- .toss_secs = DEFAULT_TOSS_SECS,
- .scan_secs = DEFAULT_SCAN_SECS
+ .ci_tcp_port = DEFAULT_TCP_PORT,
+ .ci_buffer_size = DEFAULT_BUFFER_SIZE,
+ .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
+ .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
+ .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
+ .ci_recover_timer = DEFAULT_RECOVER_TIMER,
+ .ci_toss_secs = DEFAULT_TOSS_SECS,
+ .ci_scan_secs = DEFAULT_SCAN_SECS,
+ .ci_log_debug = DEFAULT_LOG_DEBUG
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 9da7839958a9..1e978611a96e 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -17,14 +17,15 @@
#define DLM_MAX_ADDR_COUNT 3
struct dlm_config_info {
- int tcp_port;
- int buffer_size;
- int rsbtbl_size;
- int lkbtbl_size;
- int dirtbl_size;
- int recover_timer;
- int toss_secs;
- int scan_secs;
+ int ci_tcp_port;
+ int ci_buffer_size;
+ int ci_rsbtbl_size;
+ int ci_lkbtbl_size;
+ int ci_dirtbl_size;
+ int ci_recover_timer;
+ int ci_toss_secs;
+ int ci_scan_secs;
+ int ci_log_debug;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index ca94a837a5bb..61ba670b9e02 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -287,7 +287,7 @@ static int rsb_open(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations rsb_fops = {
+static const struct file_operations rsb_fops = {
.owner = THIS_MODULE,
.open = rsb_open,
.read = seq_read,
@@ -331,7 +331,7 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
return rv;
}
-static struct file_operations waiters_fops = {
+static const struct file_operations waiters_fops = {
.owner = THIS_MODULE,
.open = waiters_open,
.read = waiters_read
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 1ee8195e6fc0..61d93201e1b2 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,6 +41,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
+#include "config.h"
#define DLM_LOCKSPACE_LEN 64
@@ -69,12 +70,12 @@ struct dlm_mhandle;
#define log_error(ls, fmt, args...) \
printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args)
-#define DLM_LOG_DEBUG
-#ifdef DLM_LOG_DEBUG
-#define log_debug(ls, fmt, args...) log_error(ls, fmt, ##args)
-#else
-#define log_debug(ls, fmt, args...)
-#endif
+#define log_debug(ls, fmt, args...) \
+do { \
+ if (dlm_config.ci_log_debug) \
+ printk(KERN_DEBUG "dlm: %s: " fmt "\n", \
+ (ls)->ls_name , ##args); \
+} while (0)
#define DLM_ASSERT(x, do) \
{ \
@@ -309,8 +310,8 @@ static inline int rsb_flag(struct dlm_rsb *r, enum rsb_flags flag)
/* dlm_header is first element of all structs sent between nodes */
-#define DLM_HEADER_MAJOR 0x00020000
-#define DLM_HEADER_MINOR 0x00000001
+#define DLM_HEADER_MAJOR 0x00030000
+#define DLM_HEADER_MINOR 0x00000000
#define DLM_MSG 1
#define DLM_RCOM 2
@@ -386,6 +387,8 @@ struct dlm_rcom {
uint32_t rc_type; /* DLM_RCOM_ */
int rc_result; /* multi-purpose */
uint64_t rc_id; /* match reply with request */
+ uint64_t rc_seq; /* sender's ls_recover_seq */
+ uint64_t rc_seq_reply; /* remote ls_recover_seq */
char rc_buf[0];
};
@@ -523,6 +526,7 @@ struct dlm_user_proc {
spinlock_t asts_spin;
struct list_head locks;
spinlock_t locks_spin;
+ struct list_head unlocking;
wait_queue_head_t wait;
};
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 30878defaeb6..e725005fafd0 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -754,6 +754,11 @@ static void add_to_waiters(struct dlm_lkb *lkb, int mstype)
mutex_unlock(&ls->ls_waiters_mutex);
}
+/* We clear the RESEND flag because we might be taking an lkb off the waiters
+ list as part of process_requestqueue (e.g. a lookup that has an optimized
+ request reply on the requestqueue) between dlm_recover_waiters_pre() which
+ set RESEND and dlm_recover_waiters_post() */
+
static int _remove_from_waiters(struct dlm_lkb *lkb)
{
int error = 0;
@@ -764,6 +769,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb)
goto out;
}
lkb->lkb_wait_type = 0;
+ lkb->lkb_flags &= ~DLM_IFL_RESEND;
list_del(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
out:
@@ -810,7 +816,7 @@ static int shrink_bucket(struct dlm_ls *ls, int b)
list_for_each_entry_reverse(r, &ls->ls_rsbtbl[b].toss,
res_hashchain) {
if (!time_after_eq(jiffies, r->res_toss_time +
- dlm_config.toss_secs * HZ))
+ dlm_config.ci_toss_secs * HZ))
continue;
found = 1;
break;
@@ -2144,12 +2150,24 @@ static void send_args(struct dlm_rsb *r, struct dlm_lkb *lkb,
if (lkb->lkb_astaddr)
ms->m_asts |= AST_COMP;
- if (ms->m_type == DLM_MSG_REQUEST || ms->m_type == DLM_MSG_LOOKUP)
- memcpy(ms->m_extra, r->res_name, r->res_length);
+ /* compare with switch in create_message; send_remove() doesn't
+ use send_args() */
- else if (lkb->lkb_lvbptr)
+ switch (ms->m_type) {
+ case DLM_MSG_REQUEST:
+ case DLM_MSG_LOOKUP:
+ memcpy(ms->m_extra, r->res_name, r->res_length);
+ break;
+ case DLM_MSG_CONVERT:
+ case DLM_MSG_UNLOCK:
+ case DLM_MSG_REQUEST_REPLY:
+ case DLM_MSG_CONVERT_REPLY:
+ case DLM_MSG_GRANT:
+ if (!lkb->lkb_lvbptr)
+ break;
memcpy(ms->m_extra, lkb->lkb_lvbptr, r->res_ls->ls_lvblen);
-
+ break;
+ }
}
static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype)
@@ -2418,8 +2436,12 @@ static int receive_request_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
DLM_ASSERT(is_master_copy(lkb), dlm_print_lkb(lkb););
- if (receive_lvb(ls, lkb, ms))
- return -ENOMEM;
+ if (lkb->lkb_exflags & DLM_LKF_VALBLK) {
+ /* lkb was just created so there won't be an lvb yet */
+ lkb->lkb_lvbptr = allocate_lvb(ls);
+ if (!lkb->lkb_lvbptr)
+ return -ENOMEM;
+ }
return 0;
}
@@ -3002,7 +3024,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
{
struct dlm_message *ms = (struct dlm_message *) hd;
struct dlm_ls *ls;
- int error;
+ int error = 0;
if (!recovery)
dlm_message_in(ms);
@@ -3119,7 +3141,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
out:
dlm_put_lockspace(ls);
dlm_astd_wake();
- return 0;
+ return error;
}
@@ -3132,6 +3154,7 @@ static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb)
if (middle_conversion(lkb)) {
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -EINPROGRESS;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_convert_reply(lkb, &ls->ls_stub_ms);
@@ -3205,6 +3228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_UNLOCK:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_EUNLOCK;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_unlock_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3213,6 +3237,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
case DLM_MSG_CANCEL:
hold_lkb(lkb);
ls->ls_stub_ms.m_result = -DLM_ECANCEL;
+ ls->ls_stub_ms.m_flags = lkb->lkb_flags;
_remove_from_waiters(lkb);
_receive_cancel_reply(lkb, &ls->ls_stub_ms);
dlm_put_lkb(lkb);
@@ -3571,6 +3596,14 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
lock_rsb(r);
switch (error) {
+ case -EBADR:
+ /* There's a chance the new master received our lock before
+ dlm_recover_master_reply(), this wouldn't happen if we did
+ a barrier between recover_masters and recover_locks. */
+ log_debug(ls, "master copy not ready %x r %lx %s", lkb->lkb_id,
+ (unsigned long)r, r->res_name);
+ dlm_send_rcom_lock(r, lkb);
+ goto out;
case -EEXIST:
log_debug(ls, "master copy exists %x", lkb->lkb_id);
/* fall through */
@@ -3585,7 +3618,7 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
/* an ack for dlm_recover_locks() which waits for replies from
all the locks it sends to new masters */
dlm_recovered_lock(r);
-
+ out:
unlock_rsb(r);
put_rsb(r);
dlm_put_lkb(lkb);
@@ -3610,7 +3643,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
}
if (flags & DLM_LKF_VALBLK) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
kfree(ua);
__put_lkb(ls, lkb);
@@ -3679,7 +3712,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
ua = (struct dlm_user_args *)lkb->lkb_astparam;
if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
- ua->lksb.sb_lvbptr = kmalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
+ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
if (!ua->lksb.sb_lvbptr) {
error = -ENOMEM;
goto out_put;
@@ -3745,12 +3778,10 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out_put;
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ /* dlm_user_add_ast() may have already taken lkb off the proc list */
+ if (!list_empty(&lkb->lkb_ownqueue))
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
-
- /* this removes the reference for the proc->locks list added by
- dlm_user_request */
- unhold_lkb(lkb);
out_put:
dlm_put_lkb(lkb);
out:
@@ -3790,9 +3821,8 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
/* this lkb was removed from the WAITING queue */
if (lkb->lkb_grmode == DLM_LOCK_IV) {
spin_lock(&ua->proc->locks_spin);
- list_del_init(&lkb->lkb_ownqueue);
+ list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
- unhold_lkb(lkb);
}
out_put:
dlm_put_lkb(lkb);
@@ -3853,11 +3883,6 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
mutex_lock(&ls->ls_clear_proc_locks);
list_for_each_entry_safe(lkb, safe, &proc->locks, lkb_ownqueue) {
- if (lkb->lkb_ast_type) {
- list_del(&lkb->lkb_astqueue);
- unhold_lkb(lkb);
- }
-
list_del_init(&lkb->lkb_ownqueue);
if (lkb->lkb_exflags & DLM_LKF_PERSISTENT) {
@@ -3874,6 +3899,20 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
+
+ /* in-progress unlocks */
+ list_for_each_entry_safe(lkb, safe, &proc->unlocking, lkb_ownqueue) {
+ list_del_init(&lkb->lkb_ownqueue);
+ lkb->lkb_flags |= DLM_IFL_DEAD;
+ dlm_put_lkb(lkb);
+ }
+
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_del(&lkb->lkb_astqueue);
+ dlm_put_lkb(lkb);
+ }
+
mutex_unlock(&ls->ls_clear_proc_locks);
unlock_recovery(ls);
}
+
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 59012b089e8d..f40817b53c6f 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -236,7 +236,7 @@ static int dlm_scand(void *data)
while (!kthread_should_stop()) {
list_for_each_entry(ls, &lslist, ls_list)
dlm_scan_rsbs(ls);
- schedule_timeout_interruptible(dlm_config.scan_secs * HZ);
+ schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
}
return 0;
}
@@ -422,7 +422,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_count = 0;
ls->ls_flags = 0;
- size = dlm_config.rsbtbl_size;
+ size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
@@ -434,7 +434,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
rwlock_init(&ls->ls_rsbtbl[i].lock);
}
- size = dlm_config.lkbtbl_size;
+ size = dlm_config.ci_lkbtbl_size;
ls->ls_lkbtbl_size = size;
ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
@@ -446,7 +446,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
ls->ls_lkbtbl[i].counter = 1;
}
- size = dlm_config.dirtbl_size;
+ size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
@@ -489,7 +489,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
mutex_init(&ls->ls_requestqueue_mutex);
mutex_init(&ls->ls_clear_proc_locks);
- ls->ls_recover_buf = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (!ls->ls_recover_buf)
goto out_dirfree;
diff --git a/fs/dlm/lowcomms-sctp.c b/fs/dlm/lowcomms-sctp.c
index fe158d7a9285..dc83a9d979b5 100644
--- a/fs/dlm/lowcomms-sctp.c
+++ b/fs/dlm/lowcomms-sctp.c
@@ -72,6 +72,8 @@ struct nodeinfo {
struct list_head writequeue; /* outgoing writequeue_entries */
spinlock_t writequeue_lock;
int nodeid;
+ struct work_struct swork; /* Send workqueue */
+ struct work_struct lwork; /* Locking workqueue */
};
static DEFINE_IDR(nodeinfo_idr);
@@ -96,6 +98,7 @@ struct connection {
atomic_t waiting_requests;
struct cbuf cb;
int eagain_flag;
+ struct work_struct work; /* Send workqueue */
};
/* An entry waiting to be sent */
@@ -137,19 +140,23 @@ static void cbuf_eat(struct cbuf *cb, int n)
static LIST_HEAD(write_nodes);
static DEFINE_SPINLOCK(write_nodes_lock);
+
/* Maximum number of incoming messages to process before
* doing a schedule()
*/
#define MAX_RX_MSG_COUNT 25
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_wait);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
+static struct workqueue_struct *lock_workqueue;
/* The SCTP connection */
static struct connection sctp_con;
+static void process_send_sockets(struct work_struct *work);
+static void process_recv_sockets(struct work_struct *work);
+static void process_lock_request(struct work_struct *work);
static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
{
@@ -222,6 +229,8 @@ static struct nodeinfo *nodeid2nodeinfo(int nodeid, gfp_t alloc)
spin_lock_init(&ni->lock);
INIT_LIST_HEAD(&ni->writequeue);
spin_lock_init(&ni->writequeue_lock);
+ INIT_WORK(&ni->lwork, process_lock_request);
+ INIT_WORK(&ni->swork, process_send_sockets);
ni->nodeid = nodeid;
if (nodeid > max_nodeid)
@@ -249,11 +258,8 @@ static struct nodeinfo *assoc2nodeinfo(sctp_assoc_t assoc)
/* Data or notification available on socket */
static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
- atomic_inc(&sctp_con.waiting_requests);
if (test_and_set_bit(CF_READ_PENDING, &sctp_con.flags))
- return;
-
- wake_up_interruptible(&lowcomms_recv_wait);
+ queue_work(recv_workqueue, &sctp_con.work);
}
@@ -361,10 +367,10 @@ static void init_failed(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
}
}
- wake_up_process(send_task);
}
/* Something happened to an association */
@@ -446,8 +452,8 @@ static void process_sctp_notification(struct msghdr *msg, char *buf)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
break;
@@ -580,8 +586,8 @@ static int receive_from_sock(void)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
+ queue_work(send_workqueue, &ni->swork);
}
- wake_up_process(send_task);
}
}
@@ -590,6 +596,7 @@ static int receive_from_sock(void)
return 0;
cbuf_add(&sctp_con.cb, ret);
+ // PJC: TODO: Add to node's workqueue....can we ??
ret = dlm_process_incoming_buffer(cpu_to_le32(sinfo->sinfo_ppid),
page_address(sctp_con.rx_page),
sctp_con.cb.base, sctp_con.cb.len,
@@ -635,7 +642,7 @@ static int add_bind_addr(struct sockaddr_storage *addr, int addr_len, int num)
if (result < 0)
log_print("Can't bind to port %d addr number %d",
- dlm_config.tcp_port, num);
+ dlm_config.ci_tcp_port, num);
return result;
}
@@ -711,7 +718,7 @@ static int init_sock(void)
/* Bind to all interfaces. */
for (i = 0; i < dlm_local_count; i++) {
memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr));
- make_sockaddr(&localaddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len);
result = add_bind_addr(&localaddr, addr_len, num);
if (result)
@@ -820,7 +827,8 @@ void dlm_lowcomms_commit_buffer(void *arg)
spin_lock_bh(&write_nodes_lock);
list_add_tail(&ni->write_list, &write_nodes);
spin_unlock_bh(&write_nodes_lock);
- wake_up_process(send_task);
+
+ queue_work(send_workqueue, &ni->swork);
}
return;
@@ -863,7 +871,7 @@ static void initiate_association(int nodeid)
return;
}
- make_sockaddr(&rem_addr, dlm_config.tcp_port, &addrlen);
+ make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen);
outmessage.msg_name = &rem_addr;
outmessage.msg_namelen = addrlen;
@@ -1088,101 +1096,75 @@ int dlm_lowcomms_close(int nodeid)
return 0;
}
-static int write_list_empty(void)
+// PJC: The work queue function for receiving.
+static void process_recv_sockets(struct work_struct *work)
{
- int status;
-
- spin_lock_bh(&write_nodes_lock);
- status = list_empty(&write_nodes);
- spin_unlock_bh(&write_nodes_lock);
-
- return status;
-}
-
-static int dlm_recvd(void *data)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- while (!kthread_should_stop()) {
+ if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
+ int ret;
int count = 0;
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&lowcomms_recv_wait, &wait);
- if (!test_bit(CF_READ_PENDING, &sctp_con.flags))
- cond_resched();
- remove_wait_queue(&lowcomms_recv_wait, &wait);
- set_current_state(TASK_RUNNING);
-
- if (test_and_clear_bit(CF_READ_PENDING, &sctp_con.flags)) {
- int ret;
-
- do {
- ret = receive_from_sock();
+ do {
+ ret = receive_from_sock();
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
- } while (!kthread_should_stop() && ret >=0);
- }
- cond_resched();
+ /* Don't starve out everyone else */
+ if (++count >= MAX_RX_MSG_COUNT) {
+ cond_resched();
+ count = 0;
+ }
+ } while (!kthread_should_stop() && ret >=0);
}
-
- return 0;
+ cond_resched();
}
-static int dlm_sendd(void *data)
+// PJC: the work queue function for sending
+static void process_send_sockets(struct work_struct *work)
{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- if (sctp_con.eagain_flag) {
- sctp_con.eagain_flag = 0;
- refill_write_queue();
- }
- process_output_queue();
+ if (sctp_con.eagain_flag) {
+ sctp_con.eagain_flag = 0;
+ refill_write_queue();
}
+ process_output_queue();
+}
- remove_wait_queue(sctp_con.sock->sk->sk_sleep, &wait);
-
- return 0;
+// PJC: Process lock requests from a particular node.
+// TODO: can we optimise this out on UP ??
+static void process_lock_request(struct work_struct *work)
+{
}
static void daemons_stop(void)
{
- kthread_stop(recv_task);
- kthread_stop(send_task);
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(lock_workqueue);
}
static int daemons_start(void)
{
- struct task_struct *p;
int error;
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
+ if (error) {
+ log_print("can't start dlm_recv %d", error);
+ return error;
+ }
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ lock_workqueue = create_workqueue("dlm_rlock");
+ error = IS_ERR(lock_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_rlock %d", error);
+ destroy_workqueue(send_workqueue);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
@@ -1194,6 +1176,8 @@ int dlm_lowcomms_start(void)
{
int error;
+ INIT_WORK(&sctp_con.work, process_recv_sockets);
+
error = init_sock();
if (error)
goto fail_sock;
@@ -1224,4 +1208,3 @@ void dlm_lowcomms_stop(void)
for (i = 0; i < dlm_local_count; i++)
kfree(dlm_local_addr[i]);
}
-
diff --git a/fs/dlm/lowcomms-tcp.c b/fs/dlm/lowcomms-tcp.c
index 9be3a440c42a..07e0a122c32f 100644
--- a/fs/dlm/lowcomms-tcp.c
+++ b/fs/dlm/lowcomms-tcp.c
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -96,10 +96,7 @@ static bool cbuf_empty(struct cbuf *cb)
struct connection {
struct socket *sock; /* NULL if not connected */
uint32_t nodeid; /* So we know who we are in the list */
- struct rw_semaphore sock_sem; /* Stop connect races */
- struct list_head read_list; /* On this list when ready for reading */
- struct list_head write_list; /* On this list when ready for writing */
- struct list_head state_list; /* On this list when ready to connect */
+ struct mutex sock_mutex;
unsigned long flags; /* bit 1,2 = We are on the read/write lists */
#define CF_READ_PENDING 1
#define CF_WRITE_PENDING 2
@@ -112,9 +109,10 @@ struct connection {
struct page *rx_page;
struct cbuf cb;
int retries;
- atomic_t waiting_requests;
#define MAX_CONNECT_RETRIES 3
struct connection *othercon;
+ struct work_struct rwork; /* Receive workqueue */
+ struct work_struct swork; /* Send workqueue */
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -131,14 +129,9 @@ struct writequeue_entry {
static struct sockaddr_storage dlm_local_addr;
-/* Manage daemons */
-static struct task_struct *recv_task;
-static struct task_struct *send_task;
-
-static wait_queue_t lowcomms_send_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_send_waitq);
-static wait_queue_t lowcomms_recv_waitq_head;
-static DECLARE_WAIT_QUEUE_HEAD(lowcomms_recv_waitq);
+/* Work queues */
+static struct workqueue_struct *recv_workqueue;
+static struct workqueue_struct *send_workqueue;
/* An array of pointers to connections, indexed by NODEID */
static struct connection **connections;
@@ -146,17 +139,8 @@ static DECLARE_MUTEX(connections_lock);
static struct kmem_cache *con_cache;
static int conn_array_size;
-/* List of sockets that have reads pending */
-static LIST_HEAD(read_sockets);
-static DEFINE_SPINLOCK(read_sockets_lock);
-
-/* List of sockets which have writes pending */
-static LIST_HEAD(write_sockets);
-static DEFINE_SPINLOCK(write_sockets_lock);
-
-/* List of sockets which have connects pending */
-static LIST_HEAD(state_sockets);
-static DEFINE_SPINLOCK(state_sockets_lock);
+static void process_recv_sockets(struct work_struct *work);
+static void process_send_sockets(struct work_struct *work);
static struct connection *nodeid2con(int nodeid, gfp_t allocation)
{
@@ -186,9 +170,11 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation)
goto finish;
con->nodeid = nodeid;
- init_rwsem(&con->sock_sem);
+ mutex_init(&con->sock_mutex);
INIT_LIST_HEAD(&con->writequeue);
spin_lock_init(&con->writequeue_lock);
+ INIT_WORK(&con->swork, process_send_sockets);
+ INIT_WORK(&con->rwork, process_recv_sockets);
connections[nodeid] = con;
}
@@ -203,41 +189,22 @@ static void lowcomms_data_ready(struct sock *sk, int count_unused)
{
struct connection *con = sock2con(sk);
- atomic_inc(&con->waiting_requests);
- if (test_and_set_bit(CF_READ_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&read_sockets_lock);
- list_add_tail(&con->read_list, &read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- wake_up_interruptible(&lowcomms_recv_waitq);
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
}
static void lowcomms_write_space(struct sock *sk)
{
struct connection *con = sock2con(sk);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static inline void lowcomms_connect_sock(struct connection *con)
{
- if (test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
- return;
-
- spin_lock_bh(&state_sockets_lock);
- list_add_tail(&con->state_list, &state_sockets);
- spin_unlock_bh(&state_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_CONNECT_PENDING, &con->flags))
+ queue_work(send_workqueue, &con->swork);
}
static void lowcomms_state_change(struct sock *sk)
@@ -279,7 +246,7 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
/* Close a remote connection and tidy up */
static void close_connection(struct connection *con, bool and_other)
{
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock) {
sock_release(con->sock);
@@ -294,24 +261,27 @@ static void close_connection(struct connection *con, bool and_other)
con->rx_page = NULL;
}
con->retries = 0;
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
}
/* Data received from remote end */
static int receive_from_sock(struct connection *con)
{
int ret = 0;
- struct msghdr msg;
- struct iovec iov[2];
- mm_segment_t fs;
+ struct msghdr msg = {};
+ struct kvec iov[2];
unsigned len;
int r;
int call_again_soon = 0;
+ int nvec;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
+
+ if (con->sock == NULL) {
+ ret = -EAGAIN;
+ goto out_close;
+ }
- if (con->sock == NULL)
- goto out;
if (con->rx_page == NULL) {
/*
* This doesn't need to be atomic, but I think it should
@@ -323,21 +293,13 @@ static int receive_from_sock(struct connection *con)
cbuf_init(&con->cb, PAGE_CACHE_SIZE);
}
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_iovlen = 1;
- msg.msg_iov = iov;
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- msg.msg_flags = 0;
-
/*
* iov[0] is the bit of the circular buffer between the current end
* point (cb.base + cb.len) and the end of the buffer.
*/
iov[0].iov_len = con->cb.base - cbuf_data(&con->cb);
iov[0].iov_base = page_address(con->rx_page) + cbuf_data(&con->cb);
- iov[1].iov_len = 0;
+ nvec = 1;
/*
* iov[1] is the bit of the circular buffer between the start of the
@@ -347,18 +309,18 @@ static int receive_from_sock(struct connection *con)
iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
iov[1].iov_len = con->cb.base;
iov[1].iov_base = page_address(con->rx_page);
- msg.msg_iovlen = 2;
+ nvec = 2;
}
len = iov[0].iov_len + iov[1].iov_len;
- fs = get_fs();
- set_fs(get_ds());
- r = ret = sock_recvmsg(con->sock, &msg, len,
+ r = ret = kernel_recvmsg(con->sock, &msg, iov, nvec, len,
MSG_DONTWAIT | MSG_NOSIGNAL);
- set_fs(fs);
if (ret <= 0)
goto out_close;
+ if (ret == -EAGAIN)
+ goto out_resched;
+
if (ret == len)
call_again_soon = 1;
cbuf_add(&con->cb, ret);
@@ -381,24 +343,26 @@ static int receive_from_sock(struct connection *con)
con->rx_page = NULL;
}
-out:
if (call_again_soon)
goto out_resched;
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return 0;
out_resched:
- lowcomms_data_ready(con->sock->sk, 0);
- up_read(&con->sock_sem);
- cond_resched();
- return 0;
+ if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+ queue_work(recv_workqueue, &con->rwork);
+ mutex_unlock(&con->sock_mutex);
+ return -EAGAIN;
out_close:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
if (ret != -EAGAIN && !test_bit(CF_IS_OTHERCON, &con->flags)) {
close_connection(con, false);
/* Reconnect when there is something to send */
}
+ /* Don't return success if we really got EOF */
+ if (ret == 0)
+ ret = -EAGAIN;
return ret;
}
@@ -412,6 +376,7 @@ static int accept_from_sock(struct connection *con)
int len;
int nodeid;
struct connection *newcon;
+ struct connection *addcon;
memset(&peeraddr, 0, sizeof(peeraddr));
result = sock_create_kern(dlm_local_addr.ss_family, SOCK_STREAM,
@@ -419,7 +384,7 @@ static int accept_from_sock(struct connection *con)
if (result < 0)
return -ENOMEM;
- down_read(&con->sock_sem);
+ mutex_lock_nested(&con->sock_mutex, 0);
result = -ENOTCONN;
if (con->sock == NULL)
@@ -445,7 +410,7 @@ static int accept_from_sock(struct connection *con)
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
printk("dlm: connect from non cluster node\n");
sock_release(newsock);
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return -1;
}
@@ -462,7 +427,7 @@ static int accept_from_sock(struct connection *con)
result = -ENOMEM;
goto accept_err;
}
- down_write(&newcon->sock_sem);
+ mutex_lock_nested(&newcon->sock_mutex, 1);
if (newcon->sock) {
struct connection *othercon = newcon->othercon;
@@ -470,41 +435,45 @@ static int accept_from_sock(struct connection *con)
othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
if (!othercon) {
printk("dlm: failed to allocate incoming socket\n");
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
result = -ENOMEM;
goto accept_err;
}
othercon->nodeid = nodeid;
othercon->rx_action = receive_from_sock;
- init_rwsem(&othercon->sock_sem);
+ mutex_init(&othercon->sock_mutex);
+ INIT_WORK(&othercon->swork, process_send_sockets);
+ INIT_WORK(&othercon->rwork, process_recv_sockets);
set_bit(CF_IS_OTHERCON, &othercon->flags);
newcon->othercon = othercon;
}
othercon->sock = newsock;
newsock->sk->sk_user_data = othercon;
add_sock(newsock, othercon);
+ addcon = othercon;
}
else {
newsock->sk->sk_user_data = newcon;
newcon->rx_action = receive_from_sock;
add_sock(newsock, newcon);
-
+ addcon = newcon;
}
- up_write(&newcon->sock_sem);
+ mutex_unlock(&newcon->sock_mutex);
/*
* Add it to the active queue in case we got data
* beween processing the accept adding the socket
* to the read_sockets list
*/
- lowcomms_data_ready(newsock->sk, 0);
- up_read(&con->sock_sem);
+ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
+ queue_work(recv_workqueue, &addcon->rwork);
+ mutex_unlock(&con->sock_mutex);
return 0;
accept_err:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
sock_release(newsock);
if (result != -EAGAIN)
@@ -525,7 +494,7 @@ static void connect_to_sock(struct connection *con)
return;
}
- down_write(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->retries++ > MAX_CONNECT_RETRIES)
goto out;
@@ -548,7 +517,7 @@ static void connect_to_sock(struct connection *con)
sock->sk->sk_user_data = con;
con->rx_action = receive_from_sock;
- make_sockaddr(&saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len);
add_sock(sock, con);
@@ -577,7 +546,7 @@ out_err:
result = 0;
}
out:
- up_write(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
}
@@ -616,10 +585,10 @@ static struct socket *create_listen_sock(struct connection *con,
con->sock = sock;
/* Bind to our port */
- make_sockaddr(saddr, dlm_config.tcp_port, &addr_len);
+ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len);
result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len);
if (result < 0) {
- printk("dlm: Can't bind to port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't bind to port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
con->sock = NULL;
@@ -638,7 +607,7 @@ static struct socket *create_listen_sock(struct connection *con,
result = sock->ops->listen(sock, 5);
if (result < 0) {
- printk("dlm: Can't listen on port %d\n", dlm_config.tcp_port);
+ printk("dlm: Can't listen on port %d\n", dlm_config.ci_tcp_port);
sock_release(sock);
sock = NULL;
goto create_out;
@@ -709,6 +678,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len,
if (!con)
return NULL;
+ spin_lock(&con->writequeue_lock);
e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
if ((&e->list == &con->writequeue) ||
(PAGE_CACHE_SIZE - e->end < len)) {
@@ -747,6 +717,7 @@ void dlm_lowcomms_commit_buffer(void *mh)
struct connection *con = e->con;
int users;
+ spin_lock(&con->writequeue_lock);
users = --e->users;
if (users)
goto out;
@@ -754,12 +725,8 @@ void dlm_lowcomms_commit_buffer(void *mh)
kunmap(e->page);
spin_unlock(&con->writequeue_lock);
- if (test_and_set_bit(CF_WRITE_PENDING, &con->flags) == 0) {
- spin_lock_bh(&write_sockets_lock);
- list_add_tail(&con->write_list, &write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- wake_up_interruptible(&lowcomms_send_waitq);
+ if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) {
+ queue_work(send_workqueue, &con->swork);
}
return;
@@ -783,7 +750,7 @@ static void send_to_sock(struct connection *con)
struct writequeue_entry *e;
int len, offset;
- down_read(&con->sock_sem);
+ mutex_lock(&con->sock_mutex);
if (con->sock == NULL)
goto out_connect;
@@ -800,6 +767,7 @@ static void send_to_sock(struct connection *con)
offset = e->offset;
BUG_ON(len == 0 && e->users == 0);
spin_unlock(&con->writequeue_lock);
+ kmap(e->page);
ret = 0;
if (len) {
@@ -828,18 +796,18 @@ static void send_to_sock(struct connection *con)
}
spin_unlock(&con->writequeue_lock);
out:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
return;
send_error:
- up_read(&con->sock_sem);
+ mutex_unlock(&con->sock_mutex);
close_connection(con, false);
lowcomms_connect_sock(con);
return;
out_connect:
- up_read(&con->sock_sem);
- lowcomms_connect_sock(con);
+ mutex_unlock(&con->sock_mutex);
+ connect_to_sock(con);
return;
}
@@ -872,7 +840,6 @@ int dlm_lowcomms_close(int nodeid)
if (con) {
clean_one_writequeue(con);
close_connection(con, true);
- atomic_set(&con->waiting_requests, 0);
}
return 0;
@@ -880,102 +847,29 @@ out:
return -1;
}
-/* API send message call, may queue the request */
-/* N.B. This is the old interface - use the new one for new calls */
-int lowcomms_send_message(int nodeid, char *buf, int len, gfp_t allocation)
-{
- struct writequeue_entry *e;
- char *b;
-
- e = dlm_lowcomms_get_buffer(nodeid, len, allocation, &b);
- if (e) {
- memcpy(b, buf, len);
- dlm_lowcomms_commit_buffer(e);
- return 0;
- }
- return -ENOBUFS;
-}
-
/* Look for activity on active sockets */
-static void process_sockets(void)
+static void process_recv_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
- int count = 0;
-
- spin_lock_bh(&read_sockets_lock);
- list_for_each_safe(list, temp, &read_sockets) {
-
- struct connection *con =
- list_entry(list, struct connection, read_list);
- list_del(&con->read_list);
- clear_bit(CF_READ_PENDING, &con->flags);
-
- spin_unlock_bh(&read_sockets_lock);
-
- /* This can reach zero if we are processing requests
- * as they come in.
- */
- if (atomic_read(&con->waiting_requests) == 0) {
- spin_lock_bh(&read_sockets_lock);
- continue;
- }
-
- do {
- con->rx_action(con);
-
- /* Don't starve out everyone else */
- if (++count >= MAX_RX_MSG_COUNT) {
- cond_resched();
- count = 0;
- }
+ struct connection *con = container_of(work, struct connection, rwork);
+ int err;
- } while (!atomic_dec_and_test(&con->waiting_requests) &&
- !kthread_should_stop());
-
- spin_lock_bh(&read_sockets_lock);
- }
- spin_unlock_bh(&read_sockets_lock);
+ clear_bit(CF_READ_PENDING, &con->flags);
+ do {
+ err = con->rx_action(con);
+ } while (!err);
}
-/* Try to send any messages that are pending
- */
-static void process_output_queue(void)
-{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&write_sockets_lock);
- list_for_each_safe(list, temp, &write_sockets) {
- struct connection *con =
- list_entry(list, struct connection, write_list);
- clear_bit(CF_WRITE_PENDING, &con->flags);
- list_del(&con->write_list);
- spin_unlock_bh(&write_sockets_lock);
- send_to_sock(con);
- spin_lock_bh(&write_sockets_lock);
- }
- spin_unlock_bh(&write_sockets_lock);
-}
-
-static void process_state_queue(void)
+static void process_send_sockets(struct work_struct *work)
{
- struct list_head *list;
- struct list_head *temp;
-
- spin_lock_bh(&state_sockets_lock);
- list_for_each_safe(list, temp, &state_sockets) {
- struct connection *con =
- list_entry(list, struct connection, state_list);
- list_del(&con->state_list);
- clear_bit(CF_CONNECT_PENDING, &con->flags);
- spin_unlock_bh(&state_sockets_lock);
+ struct connection *con = container_of(work, struct connection, swork);
+ if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) {
connect_to_sock(con);
- spin_lock_bh(&state_sockets_lock);
}
- spin_unlock_bh(&state_sockets_lock);
+
+ clear_bit(CF_WRITE_PENDING, &con->flags);
+ send_to_sock(con);
}
@@ -992,109 +886,33 @@ static void clean_writequeues(void)
}
}
-static int read_list_empty(void)
-{
- int status;
-
- spin_lock_bh(&read_sockets_lock);
- status = list_empty(&read_sockets);
- spin_unlock_bh(&read_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport comms receive daemon */
-static int dlm_recvd(void *data)
+static void work_stop(void)
{
- init_waitqueue_entry(&lowcomms_recv_waitq_head, current);
- add_wait_queue(&lowcomms_recv_waitq, &lowcomms_recv_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (read_list_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_sockets();
- }
-
- return 0;
+ destroy_workqueue(recv_workqueue);
+ destroy_workqueue(send_workqueue);
}
-static int write_and_state_lists_empty(void)
+static int work_start(void)
{
- int status;
-
- spin_lock_bh(&write_sockets_lock);
- status = list_empty(&write_sockets);
- spin_unlock_bh(&write_sockets_lock);
-
- spin_lock_bh(&state_sockets_lock);
- if (list_empty(&state_sockets) == 0)
- status = 0;
- spin_unlock_bh(&state_sockets_lock);
-
- return status;
-}
-
-/* DLM Transport send daemon */
-static int dlm_sendd(void *data)
-{
- init_waitqueue_entry(&lowcomms_send_waitq_head, current);
- add_wait_queue(&lowcomms_send_waitq, &lowcomms_send_waitq_head);
-
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (write_and_state_lists_empty())
- cond_resched();
- set_current_state(TASK_RUNNING);
-
- process_state_queue();
- process_output_queue();
- }
-
- return 0;
-}
-
-static void daemons_stop(void)
-{
- kthread_stop(recv_task);
- kthread_stop(send_task);
-}
-
-static int daemons_start(void)
-{
- struct task_struct *p;
int error;
-
- p = kthread_run(dlm_recvd, NULL, "dlm_recvd");
- error = IS_ERR(p);
+ recv_workqueue = create_workqueue("dlm_recv");
+ error = IS_ERR(recv_workqueue);
if (error) {
- log_print("can't start dlm_recvd %d", error);
+ log_print("can't start dlm_recv %d", error);
return error;
}
- recv_task = p;
- p = kthread_run(dlm_sendd, NULL, "dlm_sendd");
- error = IS_ERR(p);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
+ error = IS_ERR(send_workqueue);
if (error) {
- log_print("can't start dlm_sendd %d", error);
- kthread_stop(recv_task);
+ log_print("can't start dlm_send %d", error);
+ destroy_workqueue(recv_workqueue);
return error;
}
- send_task = p;
return 0;
}
-/*
- * Return the largest buffer size we can cope with.
- */
-int lowcomms_max_buffer_size(void)
-{
- return PAGE_CACHE_SIZE;
-}
-
void dlm_lowcomms_stop(void)
{
int i;
@@ -1107,7 +925,7 @@ void dlm_lowcomms_stop(void)
connections[i]->flags |= 0xFF;
}
- daemons_stop();
+ work_stop();
clean_writequeues();
for (i = 0; i < conn_array_size; i++) {
@@ -1159,7 +977,7 @@ int dlm_lowcomms_start(void)
if (error)
goto fail_unlisten;
- error = daemons_start();
+ error = work_start();
if (error)
goto fail_unlisten;
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5352b03ff5aa..f858fef6e41c 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -76,9 +76,7 @@ struct dlm_lkb *allocate_lkb(struct dlm_ls *ls)
{
struct dlm_lkb *lkb;
- lkb = kmem_cache_alloc(lkb_cache, GFP_KERNEL);
- if (lkb)
- memset(lkb, 0, sizeof(*lkb));
+ lkb = kmem_cache_zalloc(lkb_cache, GFP_KERNEL);
return lkb;
}
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index c9b1c3d535f4..a5126e0c68a6 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -82,7 +82,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen < sizeof(struct dlm_header))
break;
err = -E2BIG;
- if (msglen > dlm_config.buffer_size) {
+ if (msglen > dlm_config.ci_buffer_size) {
log_print("message size %d from %d too big, buf len %d",
msglen, nodeid, len);
break;
@@ -103,7 +103,7 @@ int dlm_process_incoming_buffer(int nodeid, const void *base,
if (msglen > sizeof(__tmp) &&
msg == (struct dlm_header *) __tmp) {
- msg = kmalloc(dlm_config.buffer_size, GFP_KERNEL);
+ msg = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
if (msg == NULL)
return ret;
}
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 4cc31be9cd9d..6bfbd6153809 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -56,6 +56,10 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
rc->rc_type = type;
+ spin_lock(&ls->ls_recover_lock);
+ rc->rc_seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+
*mh_ret = mh;
*rc_ret = rc;
return 0;
@@ -78,8 +82,17 @@ static void make_config(struct dlm_ls *ls, struct rcom_config *rf)
rf->rf_lsflags = ls->ls_exflags;
}
-static int check_config(struct dlm_ls *ls, struct rcom_config *rf, int nodeid)
+static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
{
+ struct rcom_config *rf = (struct rcom_config *) rc->rc_buf;
+
+ if ((rc->rc_header.h_version & 0xFFFF0000) != DLM_HEADER_MAJOR) {
+ log_error(ls, "version mismatch: %x nodeid %d: %x",
+ DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
+ rc->rc_header.h_version);
+ return -EINVAL;
+ }
+
if (rf->rf_lvblen != ls->ls_lvblen ||
rf->rf_lsflags != ls->ls_exflags) {
log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
@@ -125,7 +138,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
goto out;
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -141,8 +154,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid)
log_debug(ls, "remote node %d not ready", nodeid);
rc->rc_result = 0;
} else
- error = check_config(ls, (struct rcom_config *) rc->rc_buf,
- nodeid);
+ error = check_config(ls, rc, nodeid);
/* the caller looks at rc_result for the remote recovery status */
out:
return error;
@@ -159,6 +171,7 @@ static void receive_rcom_status(struct dlm_ls *ls, struct dlm_rcom *rc_in)
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = dlm_recover_status(ls);
make_config(ls, (struct rcom_config *) rc->rc_buf);
@@ -200,7 +213,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
if (nodeid == dlm_our_nodeid()) {
dlm_copy_master_names(ls, last_name, last_len,
ls->ls_recover_buf + len,
- dlm_config.buffer_size - len, nodeid);
+ dlm_config.ci_buffer_size - len, nodeid);
goto out;
}
@@ -210,7 +223,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len)
memcpy(rc->rc_buf, last_name, last_len);
allow_sync_reply(ls, &rc->rc_id);
- memset(ls->ls_recover_buf, 0, dlm_config.buffer_size);
+ memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size);
send_rcom(ls, mh, rc);
@@ -224,30 +237,17 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
struct dlm_rcom *rc;
struct dlm_mhandle *mh;
- int error, inlen, outlen;
- int nodeid = rc_in->rc_header.h_nodeid;
- uint32_t status = dlm_recover_status(ls);
-
- /*
- * We can't run dlm_dir_rebuild_send (which uses ls_nodes) while
- * dlm_recoverd is running ls_nodes_reconfig (which changes ls_nodes).
- * It could only happen in rare cases where we get a late NAMES
- * message from a previous instance of recovery.
- */
-
- if (!(status & DLM_RS_NODES)) {
- log_debug(ls, "ignoring RCOM_NAMES from %u", nodeid);
- return;
- }
+ int error, inlen, outlen, nodeid;
nodeid = rc_in->rc_header.h_nodeid;
inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom);
- outlen = dlm_config.buffer_size - sizeof(struct dlm_rcom);
+ outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom);
error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh);
if (error)
return;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
dlm_copy_master_names(ls, rc_in->rc_buf, inlen, rc->rc_buf, outlen,
nodeid);
@@ -294,6 +294,7 @@ static void receive_rcom_lookup(struct dlm_ls *ls, struct dlm_rcom *rc_in)
ret_nodeid = error;
rc->rc_result = ret_nodeid;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
@@ -375,20 +376,13 @@ static void receive_rcom_lock(struct dlm_ls *ls, struct dlm_rcom *rc_in)
memcpy(rc->rc_buf, rc_in->rc_buf, sizeof(struct rcom_lock));
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
send_rcom(ls, mh, rc);
}
static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
{
- uint32_t status = dlm_recover_status(ls);
-
- if (!(status & DLM_RS_DIR)) {
- log_debug(ls, "ignoring RCOM_LOCK_REPLY from %u",
- rc_in->rc_header.h_nodeid);
- return;
- }
-
dlm_recover_process_copy(ls, rc_in);
}
@@ -415,6 +409,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
rc->rc_type = DLM_RCOM_STATUS_REPLY;
rc->rc_id = rc_in->rc_id;
+ rc->rc_seq_reply = rc_in->rc_seq;
rc->rc_result = -ESRCH;
rf = (struct rcom_config *) rc->rc_buf;
@@ -426,6 +421,31 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
return 0;
}
+static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc)
+{
+ uint64_t seq;
+ int rv = 0;
+
+ switch (rc->rc_type) {
+ case DLM_RCOM_STATUS_REPLY:
+ case DLM_RCOM_NAMES_REPLY:
+ case DLM_RCOM_LOOKUP_REPLY:
+ case DLM_RCOM_LOCK_REPLY:
+ spin_lock(&ls->ls_recover_lock);
+ seq = ls->ls_recover_seq;
+ spin_unlock(&ls->ls_recover_lock);
+ if (rc->rc_seq_reply != seq) {
+ log_debug(ls, "ignoring old reply %x from %d "
+ "seq_reply %llx expect %llx",
+ rc->rc_type, rc->rc_header.h_nodeid,
+ (unsigned long long)rc->rc_seq_reply,
+ (unsigned long long)seq);
+ rv = 1;
+ }
+ }
+ return rv;
+}
+
/* Called by dlm_recvd; corresponds to dlm_receive_message() but special
recovery-only comms are sent through here. */
@@ -449,11 +469,14 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
}
if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) {
- log_error(ls, "ignoring recovery message %x from %d",
+ log_debug(ls, "ignoring recovery message %x from %d",
rc->rc_type, nodeid);
goto out;
}
+ if (is_old_reply(ls, rc))
+ goto out;
+
if (nodeid != rc->rc_header.h_nodeid) {
log_error(ls, "bad rcom nodeid %d from %d",
rc->rc_header.h_nodeid, nodeid);
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c
index cf9f6831bab5..c2cc7694cd16 100644
--- a/fs/dlm/recover.c
+++ b/fs/dlm/recover.c
@@ -44,7 +44,7 @@
static void dlm_wait_timer_fn(unsigned long data)
{
struct dlm_ls *ls = (struct dlm_ls *) data;
- mod_timer(&ls->ls_timer, jiffies + (dlm_config.recover_timer * HZ));
+ mod_timer(&ls->ls_timer, jiffies + (dlm_config.ci_recover_timer * HZ));
wake_up(&ls->ls_wait_general);
}
@@ -55,7 +55,7 @@ int dlm_wait_function(struct dlm_ls *ls, int (*testfn) (struct dlm_ls *ls))
init_timer(&ls->ls_timer);
ls->ls_timer.function = dlm_wait_timer_fn;
ls->ls_timer.data = (long) ls;
- ls->ls_timer.expires = jiffies + (dlm_config.recover_timer * HZ);
+ ls->ls_timer.expires = jiffies + (dlm_config.ci_recover_timer * HZ);
add_timer(&ls->ls_timer);
wait_event(ls->ls_wait_general, testfn(ls) || dlm_recovery_stopped(ls));
@@ -397,7 +397,9 @@ int dlm_recover_masters(struct dlm_ls *ls)
if (dlm_no_directory(ls))
count += recover_master_static(r);
- else if (!is_master(r) && dlm_is_removed(ls, r->res_nodeid)) {
+ else if (!is_master(r) &&
+ (dlm_is_removed(ls, r->res_nodeid) ||
+ rsb_flag(r, RSB_NEW_MASTER))) {
recover_master(r);
count++;
}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 650536aa5139..3cb636d60249 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -77,7 +77,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_members(ls, rv, &neg);
if (error) {
- log_error(ls, "recover_members failed %d", error);
+ log_debug(ls, "recover_members failed %d", error);
goto fail;
}
start = jiffies;
@@ -89,7 +89,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory(ls);
if (error) {
- log_error(ls, "recover_directory failed %d", error);
+ log_debug(ls, "recover_directory failed %d", error);
goto fail;
}
@@ -99,7 +99,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_directory_wait(ls);
if (error) {
- log_error(ls, "recover_directory_wait failed %d", error);
+ log_debug(ls, "recover_directory_wait failed %d", error);
goto fail;
}
@@ -129,7 +129,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_masters(ls);
if (error) {
- log_error(ls, "recover_masters failed %d", error);
+ log_debug(ls, "recover_masters failed %d", error);
goto fail;
}
@@ -139,13 +139,13 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks(ls);
if (error) {
- log_error(ls, "recover_locks failed %d", error);
+ log_debug(ls, "recover_locks failed %d", error);
goto fail;
}
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
@@ -166,7 +166,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = dlm_recover_locks_wait(ls);
if (error) {
- log_error(ls, "recover_locks_wait failed %d", error);
+ log_debug(ls, "recover_locks_wait failed %d", error);
goto fail;
}
}
@@ -184,7 +184,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_set_recover_status(ls, DLM_RS_DONE);
error = dlm_recover_done_wait(ls);
if (error) {
- log_error(ls, "recover_done_wait failed %d", error);
+ log_debug(ls, "recover_done_wait failed %d", error);
goto fail;
}
@@ -192,19 +192,19 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
error = enable_locking(ls, rv->seq);
if (error) {
- log_error(ls, "enable_locking failed %d", error);
+ log_debug(ls, "enable_locking failed %d", error);
goto fail;
}
error = dlm_process_requestqueue(ls);
if (error) {
- log_error(ls, "process_requestqueue failed %d", error);
+ log_debug(ls, "process_requestqueue failed %d", error);
goto fail;
}
error = dlm_recover_waiters_post(ls);
if (error) {
- log_error(ls, "recover_waiters_post failed %d", error);
+ log_debug(ls, "recover_waiters_post failed %d", error);
goto fail;
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index c37e93e4f2df..40db61dc95f2 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -25,7 +25,7 @@
static const char *name_prefix="dlm";
static struct miscdevice ctl_device;
-static struct file_operations device_fops;
+static const struct file_operations device_fops;
#ifdef CONFIG_COMPAT
@@ -180,6 +180,14 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
ua->lksb.sb_status == -EAGAIN && !list_empty(&lkb->lkb_ownqueue))
remove_ownqueue = 1;
+ /* unlocks or cancels of waiting requests need to be removed from the
+ proc's unlocking list, again there must be a better way... */
+
+ if (ua->lksb.sb_status == -DLM_EUNLOCK ||
+ (ua->lksb.sb_status == -DLM_ECANCEL &&
+ lkb->lkb_grmode == DLM_LOCK_IV))
+ remove_ownqueue = 1;
+
/* We want to copy the lvb to userspace when the completion
ast is read if the status is 0, the lock has an lvb and
lvb_ops says we should. We could probably have set_lvb_lock()
@@ -523,6 +531,7 @@ static int device_open(struct inode *inode, struct file *file)
proc->lockspace = ls->ls_local_handle;
INIT_LIST_HEAD(&proc->asts);
INIT_LIST_HEAD(&proc->locks);
+ INIT_LIST_HEAD(&proc->unlocking);
spin_lock_init(&proc->asts_spin);
spin_lock_init(&proc->locks_spin);
init_waitqueue_head(&proc->wait);
@@ -750,7 +759,7 @@ static int ctl_device_close(struct inode *inode, struct file *file)
return 0;
}
-static struct file_operations device_fops = {
+static const struct file_operations device_fops = {
.open = device_open,
.release = device_close,
.read = device_read,
@@ -759,7 +768,7 @@ static struct file_operations device_fops = {
.owner = THIS_MODULE,
};
-static struct file_operations ctl_device_fops = {
+static const struct file_operations ctl_device_fops = {
.open = ctl_device_open,
.release = ctl_device_close,
.write = device_write,
diff --git a/fs/dlm/util.c b/fs/dlm/util.c
index 767197db9944..963889cf6740 100644
--- a/fs/dlm/util.c
+++ b/fs/dlm/util.c
@@ -134,6 +134,8 @@ void dlm_rcom_out(struct dlm_rcom *rc)
rc->rc_type = cpu_to_le32(rc->rc_type);
rc->rc_result = cpu_to_le32(rc->rc_result);
rc->rc_id = cpu_to_le64(rc->rc_id);
+ rc->rc_seq = cpu_to_le64(rc->rc_seq);
+ rc->rc_seq_reply = cpu_to_le64(rc->rc_seq_reply);
if (type == DLM_RCOM_LOCK)
rcom_lock_out((struct rcom_lock *) rc->rc_buf);
@@ -151,6 +153,8 @@ void dlm_rcom_in(struct dlm_rcom *rc)
rc->rc_type = le32_to_cpu(rc->rc_type);
rc->rc_result = le32_to_cpu(rc->rc_result);
rc->rc_id = le64_to_cpu(rc->rc_id);
+ rc->rc_seq = le64_to_cpu(rc->rc_seq);
+ rc->rc_seq_reply = le64_to_cpu(rc->rc_seq_reply);
if (rc->rc_type == DLM_RCOM_LOCK)
rcom_lock_in((struct rcom_lock *) rc->rc_buf);
diff --git a/fs/dquot.c b/fs/dquot.c
index 0952cc474d9a..b16f991662c1 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -79,6 +79,7 @@
#include <linux/buffer_head.h>
#include <linux/capability.h>
#include <linux/quotaops.h>
+#include <linux/writeback.h> /* for inode_lock, oddly enough.. */
#include <asm/uaccess.h>
@@ -600,11 +601,10 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type)
{
struct dquot *dquot;
- dquot = kmem_cache_alloc(dquot_cachep, GFP_NOFS);
+ dquot = kmem_cache_zalloc(dquot_cachep, GFP_NOFS);
if(!dquot)
return NODQUOT;
- memset((caddr_t)dquot, 0, sizeof(struct dquot));
mutex_init(&dquot->dq_lock);
INIT_LIST_HEAD(&dquot->dq_free);
INIT_LIST_HEAD(&dquot->dq_inuse);
@@ -688,23 +688,27 @@ static int dqinit_needed(struct inode *inode, int type)
/* This routine is guarded by dqonoff_mutex mutex */
static void add_dquot_ref(struct super_block *sb, int type)
{
- struct list_head *p;
+ struct inode *inode;
restart:
- file_list_lock();
- list_for_each(p, &sb->s_files) {
- struct file *filp = list_entry(p, struct file, f_u.fu_list);
- struct inode *inode = filp->f_path.dentry->d_inode;
- if (filp->f_mode & FMODE_WRITE && dqinit_needed(inode, type)) {
- struct dentry *dentry = dget(filp->f_path.dentry);
- file_list_unlock();
- sb->dq_op->initialize(inode, type);
- dput(dentry);
- /* As we may have blocked we had better restart... */
- goto restart;
- }
+ spin_lock(&inode_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ if (!atomic_read(&inode->i_writecount))
+ continue;
+ if (!dqinit_needed(inode, type))
+ continue;
+ if (inode->i_state & (I_FREEING|I_WILL_FREE))
+ continue;
+
+ __iget(inode);
+ spin_unlock(&inode_lock);
+
+ sb->dq_op->initialize(inode, type);
+ iput(inode);
+ /* As we may have blocked we had better restart... */
+ goto restart;
}
- file_list_unlock();
+ spin_unlock(&inode_lock);
}
/* Return 0 if dqput() won't block (note that 1 doesn't necessarily mean blocking) */
@@ -756,15 +760,30 @@ static void put_dquot_list(struct list_head *tofree_head)
}
}
+static void remove_dquot_ref(struct super_block *sb, int type,
+ struct list_head *tofree_head)
+{
+ struct inode *inode;
+
+ spin_lock(&inode_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ if (!IS_NOQUOTA(inode))
+ remove_inode_dquot_ref(inode, type, tofree_head);
+ }
+ spin_unlock(&inode_lock);
+}
+
/* Gather all references from inodes and drop them */
static void drop_dquot_ref(struct super_block *sb, int type)
{
LIST_HEAD(tofree_head);
- down_write(&sb_dqopt(sb)->dqptr_sem);
- remove_dquot_ref(sb, type, &tofree_head);
- up_write(&sb_dqopt(sb)->dqptr_sem);
- put_dquot_list(&tofree_head);
+ if (sb->dq_op) {
+ down_write(&sb_dqopt(sb)->dqptr_sem);
+ remove_dquot_ref(sb, type, &tofree_head);
+ up_write(&sb_dqopt(sb)->dqptr_sem);
+ put_dquot_list(&tofree_head);
+ }
}
static inline void dquot_incr_inodes(struct dquot *dquot, unsigned long number)
@@ -1822,7 +1841,7 @@ static int __init dquot_init(void)
printk(KERN_NOTICE "VFS: Disk quotas %s\n", __DQUOT_VERSION__);
- register_sysctl_table(sys_table, 0);
+ register_sysctl_table(sys_table);
dquot_cachep = kmem_cache_create("dquot",
sizeof(struct dquot), sizeof(unsigned long) * 4,
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 4e4762389bdc..03ea7696fe39 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -20,7 +20,7 @@ static void drop_pagecache_sb(struct super_block *sb)
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
if (inode->i_state & (I_FREEING|I_WILL_FREE))
continue;
- invalidate_inode_pages(inode->i_mapping);
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
}
spin_unlock(&inode_lock);
}
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index ca6562451eeb..1f1107237eab 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
-ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o debug.o
+ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o crypto.o keystore.o messaging.o netlink.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7196f50fe152..6ac630625b70 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 1997-2004 Erez Zadok
* Copyright (C) 2001-2004 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
* Michael C. Thompson <mcthomps@us.ibm.com>
*
@@ -207,7 +207,7 @@ ecryptfs_init_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
mutex_init(&crypt_stat->cs_mutex);
mutex_init(&crypt_stat->cs_tfm_mutex);
mutex_init(&crypt_stat->cs_hash_tfm_mutex);
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_STRUCT_INITIALIZED);
+ crypt_stat->flags |= ECRYPTFS_STRUCT_INITIALIZED;
}
/**
@@ -305,8 +305,7 @@ static int encrypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
int rc = 0;
BUG_ON(!crypt_stat || !crypt_stat->tfm
- || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ECRYPTFS_STRUCT_INITIALIZED));
+ || !(crypt_stat->flags & ECRYPTFS_STRUCT_INITIALIZED));
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Key size [%d]; key:\n",
crypt_stat->key_size);
@@ -429,10 +428,10 @@ static int ecryptfs_read_in_page(struct ecryptfs_page_crypt_context *ctx,
goto out;
}
} else {
- rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL,
- lower_inode,
- lower_page_idx);
- if (rc) {
+ *lower_page = grab_cache_page(lower_inode->i_mapping,
+ lower_page_idx);
+ if (!(*lower_page)) {
+ rc = -EINVAL;
ecryptfs_printk(
KERN_ERR, "Error attempting to grab and map "
"lower page with index [0x%.16x]; rc = [%d]\n",
@@ -485,7 +484,7 @@ int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx)
lower_inode = ecryptfs_inode_to_lower(ctx->page->mapping->host);
inode_info = ecryptfs_inode_to_private(ctx->page->mapping->host);
crypt_stat = &inode_info->crypt_stat;
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
+ if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_copy_page_to_lower(ctx->page, lower_inode,
ctx->param.lower_file);
if (rc)
@@ -617,7 +616,7 @@ int ecryptfs_decrypt_page(struct file *file, struct page *page)
crypt_stat = &(ecryptfs_inode_to_private(
page->mapping->host)->crypt_stat);
lower_inode = ecryptfs_inode_to_lower(page->mapping->host);
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)) {
+ if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_do_readpage(file, page, page->index);
if (rc)
ecryptfs_printk(KERN_ERR, "Error attempting to copy "
@@ -828,9 +827,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
mutex_unlock(&crypt_stat->cs_tfm_mutex);
goto out;
}
- crypto_blkcipher_set_flags(crypt_stat->tfm,
- (ECRYPTFS_DEFAULT_CHAINING_MODE
- | CRYPTO_TFM_REQ_WEAK_KEY));
+ crypto_blkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
mutex_unlock(&crypt_stat->cs_tfm_mutex);
rc = 0;
out:
@@ -865,7 +862,10 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
} else
crypt_stat->header_extent_size = PAGE_CACHE_SIZE;
- crypt_stat->num_header_extents_at_front = 1;
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+ crypt_stat->num_header_extents_at_front = 0;
+ else
+ crypt_stat->num_header_extents_at_front = 1;
}
/**
@@ -881,7 +881,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
BUG_ON(crypt_stat->iv_bytes > MD5_DIGEST_SIZE);
BUG_ON(crypt_stat->iv_bytes <= 0);
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID)) {
+ if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
rc = -EINVAL;
ecryptfs_printk(KERN_WARNING, "Session key not valid; "
"cannot generate root IV\n");
@@ -898,8 +898,7 @@ int ecryptfs_compute_root_iv(struct ecryptfs_crypt_stat *crypt_stat)
out:
if (rc) {
memset(crypt_stat->root_iv, 0, crypt_stat->iv_bytes);
- ECRYPTFS_SET_FLAG(crypt_stat->flags,
- ECRYPTFS_SECURITY_WARNING);
+ crypt_stat->flags |= ECRYPTFS_SECURITY_WARNING;
}
return rc;
}
@@ -907,7 +906,7 @@ out:
static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
{
get_random_bytes(crypt_stat->key, crypt_stat->key_size);
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ crypt_stat->flags |= ECRYPTFS_KEY_VALID;
ecryptfs_compute_root_iv(crypt_stat);
if (unlikely(ecryptfs_verbosity > 0)) {
ecryptfs_printk(KERN_DEBUG, "Generated new session key:\n");
@@ -917,6 +916,22 @@ static void ecryptfs_generate_new_key(struct ecryptfs_crypt_stat *crypt_stat)
}
/**
+ * ecryptfs_copy_mount_wide_flags_to_inode_flags
+ *
+ * This function propagates the mount-wide flags to individual inode
+ * flags.
+ */
+static void ecryptfs_copy_mount_wide_flags_to_inode_flags(
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
+{
+ if (mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED)
+ crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+ if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ crypt_stat->flags |= ECRYPTFS_VIEW_AS_ENCRYPTED;
+}
+
+/**
* ecryptfs_set_default_crypt_stat_vals
* @crypt_stat
*
@@ -926,10 +941,12 @@ static void ecryptfs_set_default_crypt_stat_vals(
struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
{
+ ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
+ mount_crypt_stat);
ecryptfs_set_default_sizes(crypt_stat);
strcpy(crypt_stat->cipher, ECRYPTFS_DEFAULT_CIPHER);
crypt_stat->key_size = ECRYPTFS_DEFAULT_KEY_BYTES;
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ crypt_stat->flags &= ~(ECRYPTFS_KEY_VALID);
crypt_stat->file_version = ECRYPTFS_FILE_VERSION;
crypt_stat->mount_crypt_stat = mount_crypt_stat;
}
@@ -969,8 +986,10 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
if (mount_crypt_stat->global_auth_tok) {
ecryptfs_printk(KERN_DEBUG, "Initializing context for new "
"file using mount_crypt_stat\n");
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
+ crypt_stat->flags |= ECRYPTFS_KEY_VALID;
+ ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
+ mount_crypt_stat);
memcpy(crypt_stat->keysigs[crypt_stat->num_keysigs++],
mount_crypt_stat->global_auth_tok_sig,
ECRYPTFS_SIG_SIZE_HEX);
@@ -1003,7 +1022,7 @@ int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry)
*
* Returns one if marker found; zero if not found
*/
-int contains_ecryptfs_marker(char *data)
+static int contains_ecryptfs_marker(char *data)
{
u32 m_1, m_2;
@@ -1029,7 +1048,8 @@ struct ecryptfs_flag_map_elem {
/* Add support for additional flags by adding elements here. */
static struct ecryptfs_flag_map_elem ecryptfs_flag_map[] = {
{0x00000001, ECRYPTFS_ENABLE_HMAC},
- {0x00000002, ECRYPTFS_ENCRYPTED}
+ {0x00000002, ECRYPTFS_ENCRYPTED},
+ {0x00000004, ECRYPTFS_METADATA_IN_XATTR}
};
/**
@@ -1052,11 +1072,9 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
for (i = 0; i < ((sizeof(ecryptfs_flag_map)
/ sizeof(struct ecryptfs_flag_map_elem))); i++)
if (flags & ecryptfs_flag_map[i].file_flag) {
- ECRYPTFS_SET_FLAG(crypt_stat->flags,
- ecryptfs_flag_map[i].local_flag);
+ crypt_stat->flags |= ecryptfs_flag_map[i].local_flag;
} else
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
- ecryptfs_flag_map[i].local_flag);
+ crypt_stat->flags &= ~(ecryptfs_flag_map[i].local_flag);
/* Version is in top 8 bits of the 32-bit flag vector */
crypt_stat->file_version = ((flags >> 24) & 0xFF);
(*bytes_read) = 4;
@@ -1093,8 +1111,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
for (i = 0; i < ((sizeof(ecryptfs_flag_map)
/ sizeof(struct ecryptfs_flag_map_elem))); i++)
- if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ecryptfs_flag_map[i].local_flag))
+ if (crypt_stat->flags & ecryptfs_flag_map[i].local_flag)
flags |= ecryptfs_flag_map[i].file_flag;
/* Version is in top 8 bits of the 32-bit flag vector */
flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
@@ -1189,8 +1206,8 @@ int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code)
*
* Returns zero on success; non-zero otherwise
*/
-int ecryptfs_read_header_region(char *data, struct dentry *dentry,
- struct vfsmount *mnt)
+static int ecryptfs_read_header_region(char *data, struct dentry *dentry,
+ struct vfsmount *mnt)
{
struct file *lower_file;
mm_segment_t oldfs;
@@ -1219,9 +1236,25 @@ out:
return rc;
}
-static void
-write_header_metadata(char *virt, struct ecryptfs_crypt_stat *crypt_stat,
- size_t *written)
+int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
+ struct vfsmount *mnt)
+{
+ int rc;
+
+ rc = ecryptfs_read_header_region(data, dentry, mnt);
+ if (rc)
+ goto out;
+ if (!contains_ecryptfs_marker(data + ECRYPTFS_FILE_SIZE_BYTES))
+ rc = -EINVAL;
+out:
+ return rc;
+}
+
+
+void
+ecryptfs_write_header_metadata(char *virt,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ size_t *written)
{
u32 header_extent_size;
u16 num_header_extents_at_front;
@@ -1270,9 +1303,9 @@ struct kmem_cache *ecryptfs_header_cache_2;
*
* Returns zero on success
*/
-int ecryptfs_write_headers_virt(char *page_virt,
- struct ecryptfs_crypt_stat *crypt_stat,
- struct dentry *ecryptfs_dentry)
+static int ecryptfs_write_headers_virt(char *page_virt, size_t *size,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct dentry *ecryptfs_dentry)
{
int rc;
size_t written;
@@ -1283,7 +1316,8 @@ int ecryptfs_write_headers_virt(char *page_virt,
offset += written;
write_ecryptfs_flags((page_virt + offset), crypt_stat, &written);
offset += written;
- write_header_metadata((page_virt + offset), crypt_stat, &written);
+ ecryptfs_write_header_metadata((page_virt + offset), crypt_stat,
+ &written);
offset += written;
rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat,
ecryptfs_dentry, &written,
@@ -1291,11 +1325,70 @@ int ecryptfs_write_headers_virt(char *page_virt,
if (rc)
ecryptfs_printk(KERN_WARNING, "Error generating key packet "
"set; rc = [%d]\n", rc);
+ if (size) {
+ offset += written;
+ *size = offset;
+ }
+ return rc;
+}
+
+static int ecryptfs_write_metadata_to_contents(struct ecryptfs_crypt_stat *crypt_stat,
+ struct file *lower_file,
+ char *page_virt)
+{
+ mm_segment_t oldfs;
+ int current_header_page;
+ int header_pages;
+ ssize_t size;
+ int rc = 0;
+
+ lower_file->f_pos = 0;
+ oldfs = get_fs();
+ set_fs(get_ds());
+ size = vfs_write(lower_file, (char __user *)page_virt, PAGE_CACHE_SIZE,
+ &lower_file->f_pos);
+ if (size < 0) {
+ rc = (int)size;
+ printk(KERN_ERR "Error attempting to write lower page; "
+ "rc = [%d]\n", rc);
+ set_fs(oldfs);
+ goto out;
+ }
+ header_pages = ((crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front)
+ / PAGE_CACHE_SIZE);
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ current_header_page = 1;
+ while (current_header_page < header_pages) {
+ size = vfs_write(lower_file, (char __user *)page_virt,
+ PAGE_CACHE_SIZE, &lower_file->f_pos);
+ if (size < 0) {
+ rc = (int)size;
+ printk(KERN_ERR "Error attempting to write lower page; "
+ "rc = [%d]\n", rc);
+ set_fs(oldfs);
+ goto out;
+ }
+ current_header_page++;
+ }
+ set_fs(oldfs);
+out:
+ return rc;
+}
+
+static int ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ char *page_virt, size_t size)
+{
+ int rc;
+
+ rc = ecryptfs_setxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME, page_virt,
+ size, 0);
return rc;
}
/**
- * ecryptfs_write_headers
+ * ecryptfs_write_metadata
* @lower_file: The lower file struct, which was returned from dentry_open
*
* Write the file headers out. This will likely involve a userspace
@@ -1306,22 +1399,18 @@ int ecryptfs_write_headers_virt(char *page_virt,
*
* Returns zero on success; non-zero on error
*/
-int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
- struct file *lower_file)
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+ struct file *lower_file)
{
- mm_segment_t oldfs;
struct ecryptfs_crypt_stat *crypt_stat;
char *page_virt;
- int current_header_page;
- int header_pages;
+ size_t size;
int rc = 0;
crypt_stat = &ecryptfs_inode_to_private(
ecryptfs_dentry->d_inode)->crypt_stat;
- if (likely(ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ECRYPTFS_ENCRYPTED))) {
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ECRYPTFS_KEY_VALID)) {
+ if (likely(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
+ if (!(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
ecryptfs_printk(KERN_DEBUG, "Key is "
"invalid; bailing out\n");
rc = -EINVAL;
@@ -1334,54 +1423,42 @@ int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
goto out;
}
/* Released in this function */
- page_virt = kmem_cache_alloc(ecryptfs_header_cache_0, GFP_USER);
+ page_virt = kmem_cache_zalloc(ecryptfs_header_cache_0, GFP_USER);
if (!page_virt) {
ecryptfs_printk(KERN_ERR, "Out of memory\n");
rc = -ENOMEM;
goto out;
}
- memset(page_virt, 0, PAGE_CACHE_SIZE);
- rc = ecryptfs_write_headers_virt(page_virt, crypt_stat,
- ecryptfs_dentry);
+ rc = ecryptfs_write_headers_virt(page_virt, &size, crypt_stat,
+ ecryptfs_dentry);
if (unlikely(rc)) {
ecryptfs_printk(KERN_ERR, "Error whilst writing headers\n");
memset(page_virt, 0, PAGE_CACHE_SIZE);
goto out_free;
}
- ecryptfs_printk(KERN_DEBUG,
- "Writing key packet set to underlying file\n");
- lower_file->f_pos = 0;
- oldfs = get_fs();
- set_fs(get_ds());
- ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
- "write() w/ header page; lower_file->f_pos = "
- "[0x%.16x]\n", lower_file->f_pos);
- lower_file->f_op->write(lower_file, (char __user *)page_virt,
- PAGE_CACHE_SIZE, &lower_file->f_pos);
- header_pages = ((crypt_stat->header_extent_size
- * crypt_stat->num_header_extents_at_front)
- / PAGE_CACHE_SIZE);
- memset(page_virt, 0, PAGE_CACHE_SIZE);
- current_header_page = 1;
- while (current_header_page < header_pages) {
- ecryptfs_printk(KERN_DEBUG, "Calling lower_file->f_op->"
- "write() w/ zero'd page; lower_file->f_pos = "
- "[0x%.16x]\n", lower_file->f_pos);
- lower_file->f_op->write(lower_file, (char __user *)page_virt,
- PAGE_CACHE_SIZE, &lower_file->f_pos);
- current_header_page++;
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+ rc = ecryptfs_write_metadata_to_xattr(ecryptfs_dentry,
+ crypt_stat, page_virt,
+ size);
+ else
+ rc = ecryptfs_write_metadata_to_contents(crypt_stat, lower_file,
+ page_virt);
+ if (rc) {
+ printk(KERN_ERR "Error writing metadata out to lower file; "
+ "rc = [%d]\n", rc);
+ goto out_free;
}
- set_fs(oldfs);
- ecryptfs_printk(KERN_DEBUG,
- "Done writing key packet set to underlying file.\n");
out_free:
kmem_cache_free(ecryptfs_header_cache_0, page_virt);
out:
return rc;
}
+#define ECRYPTFS_DONT_VALIDATE_HEADER_SIZE 0
+#define ECRYPTFS_VALIDATE_HEADER_SIZE 1
static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
- char *virt, int *bytes_read)
+ char *virt, int *bytes_read,
+ int validate_header_size)
{
int rc = 0;
u32 header_extent_size;
@@ -1396,9 +1473,10 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
crypt_stat->num_header_extents_at_front =
(int)num_header_extents_at_front;
(*bytes_read) = 6;
- if ((crypt_stat->header_extent_size
- * crypt_stat->num_header_extents_at_front)
- < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
+ if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
+ && ((crypt_stat->header_extent_size
+ * crypt_stat->num_header_extents_at_front)
+ < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
rc = -EINVAL;
ecryptfs_printk(KERN_WARNING, "Invalid header extent size: "
"[%d]\n", crypt_stat->header_extent_size);
@@ -1429,7 +1507,8 @@ static void set_default_header_data(struct ecryptfs_crypt_stat *crypt_stat)
*/
static int ecryptfs_read_headers_virt(char *page_virt,
struct ecryptfs_crypt_stat *crypt_stat,
- struct dentry *ecryptfs_dentry)
+ struct dentry *ecryptfs_dentry,
+ int validate_header_size)
{
int rc = 0;
int offset;
@@ -1463,7 +1542,7 @@ static int ecryptfs_read_headers_virt(char *page_virt,
offset += bytes_read;
if (crypt_stat->file_version >= 1) {
rc = parse_header_metadata(crypt_stat, (page_virt + offset),
- &bytes_read);
+ &bytes_read, validate_header_size);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error reading header "
"metadata; rc = [%d]\n", rc);
@@ -1478,12 +1557,60 @@ out:
}
/**
- * ecryptfs_read_headers
+ * ecryptfs_read_xattr_region
+ *
+ * Attempts to read the crypto metadata from the extended attribute
+ * region of the lower file.
+ */
+int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry)
+{
+ ssize_t size;
+ int rc = 0;
+
+ size = ecryptfs_getxattr(ecryptfs_dentry, ECRYPTFS_XATTR_NAME,
+ page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE);
+ if (size < 0) {
+ printk(KERN_DEBUG "Error attempting to read the [%s] "
+ "xattr from the lower file; return value = [%zd]\n",
+ ECRYPTFS_XATTR_NAME, size);
+ rc = -EINVAL;
+ goto out;
+ }
+out:
+ return rc;
+}
+
+int ecryptfs_read_and_validate_xattr_region(char *page_virt,
+ struct dentry *ecryptfs_dentry)
+{
+ int rc;
+
+ rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_dentry);
+ if (rc)
+ goto out;
+ if (!contains_ecryptfs_marker(page_virt + ECRYPTFS_FILE_SIZE_BYTES)) {
+ printk(KERN_WARNING "Valid data found in [%s] xattr, but "
+ "the marker is invalid\n", ECRYPTFS_XATTR_NAME);
+ rc = -EINVAL;
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_read_metadata
+ *
+ * Common entry point for reading file metadata. From here, we could
+ * retrieve the header information from the header region of the file,
+ * the xattr region of the file, or some other repostory that is
+ * stored separately from the file itself. The current implementation
+ * supports retrieving the metadata information from the file contents
+ * and from the xattr region.
*
* Returns zero if valid headers found and parsed; non-zero otherwise
*/
-int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
- struct file *lower_file)
+int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
+ struct file *lower_file)
{
int rc = 0;
char *page_virt = NULL;
@@ -1491,7 +1618,12 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
ssize_t bytes_read;
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->crypt_stat;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
+ &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ ecryptfs_copy_mount_wide_flags_to_inode_flags(crypt_stat,
+ mount_crypt_stat);
/* Read the first page from the underlying file */
page_virt = kmem_cache_alloc(ecryptfs_header_cache_1, GFP_USER);
if (!page_virt) {
@@ -1512,11 +1644,36 @@ int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
goto out;
}
rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
- ecryptfs_dentry);
+ ecryptfs_dentry,
+ ECRYPTFS_VALIDATE_HEADER_SIZE);
if (rc) {
- ecryptfs_printk(KERN_DEBUG, "Valid eCryptfs headers not "
- "found\n");
- rc = -EINVAL;
+ rc = ecryptfs_read_xattr_region(page_virt,
+ ecryptfs_dentry);
+ if (rc) {
+ printk(KERN_DEBUG "Valid eCryptfs headers not found in "
+ "file header region or xattr region\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = ecryptfs_read_headers_virt(page_virt, crypt_stat,
+ ecryptfs_dentry,
+ ECRYPTFS_DONT_VALIDATE_HEADER_SIZE);
+ if (rc) {
+ printk(KERN_DEBUG "Valid eCryptfs headers not found in "
+ "file xattr region either\n");
+ rc = -EINVAL;
+ }
+ if (crypt_stat->mount_crypt_stat->flags
+ & ECRYPTFS_XATTR_METADATA_ENABLED) {
+ crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+ } else {
+ printk(KERN_WARNING "Attempt to access file with "
+ "crypto metadata only in the extended attribute "
+ "region, but eCryptfs was mounted without "
+ "xattr support enabled. eCryptfs will not treat "
+ "this like an encrypted file.\n");
+ rc = -EINVAL;
+ }
}
out:
if (page_virt) {
diff --git a/fs/ecryptfs/debug.c b/fs/ecryptfs/debug.c
index 61f8e894284f..434c7efd80f8 100644
--- a/fs/ecryptfs/debug.c
+++ b/fs/ecryptfs/debug.c
@@ -36,7 +36,7 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
ecryptfs_printk(KERN_DEBUG, "Auth tok at mem loc [%p]:\n",
auth_tok);
- if (ECRYPTFS_CHECK_FLAG(auth_tok->flags, ECRYPTFS_PRIVATE_KEY)) {
+ if (auth_tok->flags & ECRYPTFS_PRIVATE_KEY) {
ecryptfs_printk(KERN_DEBUG, " * private key type\n");
ecryptfs_printk(KERN_DEBUG, " * (NO PRIVATE KEY SUPPORT "
"IN ECRYPTFS VERSION 0.1)\n");
@@ -46,8 +46,8 @@ void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok)
ECRYPTFS_SALT_SIZE);
salt[ECRYPTFS_SALT_SIZE * 2] = '\0';
ecryptfs_printk(KERN_DEBUG, " * salt = [%s]\n", salt);
- if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
- ECRYPTFS_PERSISTENT_PASSWORD)) {
+ if (auth_tok->token.password.flags &
+ ECRYPTFS_PERSISTENT_PASSWORD) {
ecryptfs_printk(KERN_DEBUG, " * persistent\n");
}
memcpy(sig, auth_tok->token.password.signature,
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index afb64bdbe6ad..403e3bad1455 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -4,8 +4,10 @@
*
* Copyright (C) 1997-2003 Erez Zadok
* Copyright (C) 2001-2003 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
+ * Trevor S. Highland <trevor.highland@gmail.com>
+ * Tyler Hicks <tyhicks@ou.edu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -31,22 +33,25 @@
#include <linux/fs_stack.h>
#include <linux/namei.h>
#include <linux/scatterlist.h>
+#include <linux/hash.h>
/* Version verification for shared data structures w/ userspace */
#define ECRYPTFS_VERSION_MAJOR 0x00
#define ECRYPTFS_VERSION_MINOR 0x04
-#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x01
+#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x02
/* These flags indicate which features are supported by the kernel
* module; userspace tools such as the mount helper read
* ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine
* how to behave. */
-#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
-#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
+#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001
+#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002
#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004
-#define ECRYPTFS_VERSIONING_POLICY 0x00000008
+#define ECRYPTFS_VERSIONING_POLICY 0x00000008
+#define ECRYPTFS_VERSIONING_XATTR 0x00000010
#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \
- | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH)
-
+ | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \
+ | ECRYPTFS_VERSIONING_PUBKEY \
+ | ECRYPTFS_VERSIONING_XATTR)
#define ECRYPTFS_MAX_PASSWORD_LENGTH 64
#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH
#define ECRYPTFS_SALT_SIZE 8
@@ -60,10 +65,25 @@
#define ECRYPTFS_MAX_KEY_BYTES 64
#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512
#define ECRYPTFS_DEFAULT_IV_BYTES 16
-#define ECRYPTFS_FILE_VERSION 0x01
+#define ECRYPTFS_FILE_VERSION 0x02
#define ECRYPTFS_DEFAULT_HEADER_EXTENT_SIZE 8192
#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
#define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192
+#define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32
+#define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ
+#define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3)
+#define ECRYPTFS_NLMSG_HELO 100
+#define ECRYPTFS_NLMSG_QUIT 101
+#define ECRYPTFS_NLMSG_REQUEST 102
+#define ECRYPTFS_NLMSG_RESPONSE 103
+#define ECRYPTFS_MAX_PKI_NAME_BYTES 16
+#define ECRYPTFS_DEFAULT_NUM_USERS 4
+#define ECRYPTFS_MAX_NUM_USERS 32768
+#define ECRYPTFS_TRANSPORT_NETLINK 0
+#define ECRYPTFS_TRANSPORT_CONNECTOR 1
+#define ECRYPTFS_TRANSPORT_RELAYFS 2
+#define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_NETLINK
+#define ECRYPTFS_XATTR_NAME "user.ecryptfs"
#define RFC2440_CIPHER_DES3_EDE 0x02
#define RFC2440_CIPHER_CAST_5 0x03
@@ -74,9 +94,7 @@
#define RFC2440_CIPHER_TWOFISH 0x0a
#define RFC2440_CIPHER_CAST_6 0x0b
-#define ECRYPTFS_SET_FLAG(flag_bit_vector, flag) (flag_bit_vector |= (flag))
-#define ECRYPTFS_CLEAR_FLAG(flag_bit_vector, flag) (flag_bit_vector &= ~(flag))
-#define ECRYPTFS_CHECK_FLAG(flag_bit_vector, flag) (flag_bit_vector & (flag))
+#define RFC2440_CIPHER_RSA 0x01
/**
* For convenience, we may need to pass around the encrypted session
@@ -114,6 +132,14 @@ struct ecryptfs_password {
enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY};
+struct ecryptfs_private_key {
+ u32 key_size;
+ u32 data_len;
+ u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1];
+ char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1];
+ u8 data[];
+};
+
/* May be a password or a private key */
struct ecryptfs_auth_tok {
u16 version; /* 8-bit major and 8-bit minor */
@@ -123,7 +149,7 @@ struct ecryptfs_auth_tok {
u8 reserved[32];
union {
struct ecryptfs_password password;
- /* Private key is in future eCryptfs releases */
+ struct ecryptfs_private_key private_key;
} token;
} __attribute__ ((packed));
@@ -176,10 +202,14 @@ ecryptfs_get_key_payload_data(struct key *key)
#define ECRYPTFS_FILE_SIZE_BYTES 8
#define ECRYPTFS_DEFAULT_CIPHER "aes"
#define ECRYPTFS_DEFAULT_KEY_BYTES 16
-#define ECRYPTFS_DEFAULT_CHAINING_MODE CRYPTO_TFM_MODE_CBC
#define ECRYPTFS_DEFAULT_HASH "md5"
+#define ECRYPTFS_TAG_1_PACKET_TYPE 0x01
#define ECRYPTFS_TAG_3_PACKET_TYPE 0x8C
#define ECRYPTFS_TAG_11_PACKET_TYPE 0xED
+#define ECRYPTFS_TAG_64_PACKET_TYPE 0x40
+#define ECRYPTFS_TAG_65_PACKET_TYPE 0x41
+#define ECRYPTFS_TAG_66_PACKET_TYPE 0x42
+#define ECRYPTFS_TAG_67_PACKET_TYPE 0x43
#define MD5_DIGEST_SIZE 16
/**
@@ -196,6 +226,8 @@ struct ecryptfs_crypt_stat {
#define ECRYPTFS_ENABLE_HMAC 0x00000020
#define ECRYPTFS_ENCRYPT_IV_PAGES 0x00000040
#define ECRYPTFS_KEY_VALID 0x00000080
+#define ECRYPTFS_METADATA_IN_XATTR 0x00000100
+#define ECRYPTFS_VIEW_AS_ENCRYPTED 0x00000200
u32 flags;
unsigned int file_version;
size_t iv_bytes;
@@ -242,6 +274,8 @@ struct ecryptfs_dentry_info {
struct ecryptfs_mount_crypt_stat {
/* Pointers to memory we do not own, do not free these */
#define ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED 0x00000001
+#define ECRYPTFS_XATTR_METADATA_ENABLED 0x00000002
+#define ECRYPTFS_ENCRYPTED_VIEW_ENABLED 0x00000004
u32 flags;
struct ecryptfs_auth_tok *global_auth_tok;
struct key *global_auth_tok_key;
@@ -272,6 +306,33 @@ struct ecryptfs_auth_tok_list_item {
struct ecryptfs_auth_tok auth_tok;
};
+struct ecryptfs_message {
+ u32 index;
+ u32 data_len;
+ u8 data[];
+};
+
+struct ecryptfs_msg_ctx {
+#define ECRYPTFS_MSG_CTX_STATE_FREE 0x0001
+#define ECRYPTFS_MSG_CTX_STATE_PENDING 0x0002
+#define ECRYPTFS_MSG_CTX_STATE_DONE 0x0003
+ u32 state;
+ unsigned int index;
+ unsigned int counter;
+ struct ecryptfs_message *msg;
+ struct task_struct *task;
+ struct list_head node;
+ struct mutex mux;
+};
+
+extern unsigned int ecryptfs_transport;
+
+struct ecryptfs_daemon_id {
+ pid_t pid;
+ uid_t uid;
+ struct hlist_node id_chain;
+};
+
static inline struct ecryptfs_file_info *
ecryptfs_file_to_private(struct file *file)
{
@@ -385,13 +446,16 @@ void __ecryptfs_printk(const char *fmt, ...);
extern const struct file_operations ecryptfs_main_fops;
extern const struct file_operations ecryptfs_dir_fops;
-extern struct inode_operations ecryptfs_main_iops;
-extern struct inode_operations ecryptfs_dir_iops;
-extern struct inode_operations ecryptfs_symlink_iops;
-extern struct super_operations ecryptfs_sops;
+extern const struct inode_operations ecryptfs_main_iops;
+extern const struct inode_operations ecryptfs_dir_iops;
+extern const struct inode_operations ecryptfs_symlink_iops;
+extern const struct super_operations ecryptfs_sops;
extern struct dentry_operations ecryptfs_dops;
extern struct address_space_operations ecryptfs_aops;
extern int ecryptfs_verbosity;
+extern unsigned int ecryptfs_message_buf_len;
+extern signed long ecryptfs_message_wait_timeout;
+extern unsigned int ecryptfs_number_of_users;
extern struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
extern struct kmem_cache *ecryptfs_file_info_cache;
@@ -401,7 +465,9 @@ extern struct kmem_cache *ecryptfs_sb_info_cache;
extern struct kmem_cache *ecryptfs_header_cache_0;
extern struct kmem_cache *ecryptfs_header_cache_1;
extern struct kmem_cache *ecryptfs_header_cache_2;
+extern struct kmem_cache *ecryptfs_xattr_cache;
extern struct kmem_cache *ecryptfs_lower_page_cache;
+extern struct kmem_cache *ecryptfs_key_record_cache;
int ecryptfs_interpose(struct dentry *hidden_dentry,
struct dentry *this_dentry, struct super_block *sb,
@@ -427,9 +493,13 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat);
int ecryptfs_crypto_api_algify_cipher_name(char **algified_name,
char *cipher_name,
char *chaining_modifier);
-int ecryptfs_write_inode_size_to_header(struct file *lower_file,
- struct inode *lower_inode,
- struct inode *inode);
+#define ECRYPTFS_LOWER_I_MUTEX_NOT_HELD 0
+#define ECRYPTFS_LOWER_I_MUTEX_HELD 1
+int ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
+ struct inode *lower_inode,
+ struct inode *inode,
+ struct dentry *ecryptfs_dentry,
+ int lower_i_mutex_held);
int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
struct file *lower_file,
unsigned long lower_page_index, int byte_offset,
@@ -442,26 +512,20 @@ int ecryptfs_copy_page_to_lower(struct page *page, struct inode *lower_inode,
struct file *lower_file);
int ecryptfs_do_readpage(struct file *file, struct page *page,
pgoff_t lower_page_index);
-int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
- char **lower_virt,
- struct inode *lower_inode,
- unsigned long lower_page_index);
int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
struct inode *lower_inode,
struct writeback_control *wbc);
int ecryptfs_encrypt_page(struct ecryptfs_page_crypt_context *ctx);
int ecryptfs_decrypt_page(struct file *file, struct page *page);
-int ecryptfs_write_headers(struct dentry *ecryptfs_dentry,
+int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry,
+ struct file *lower_file);
+int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry,
struct file *lower_file);
-int ecryptfs_write_headers_virt(char *page_virt,
- struct ecryptfs_crypt_stat *crypt_stat,
- struct dentry *ecryptfs_dentry);
-int ecryptfs_read_headers(struct dentry *ecryptfs_dentry,
- struct file *lower_file);
int ecryptfs_new_file_context(struct dentry *ecryptfs_dentry);
-int contains_ecryptfs_marker(char *data);
-int ecryptfs_read_header_region(char *data, struct dentry *dentry,
- struct vfsmount *mnt);
+int ecryptfs_read_and_validate_header_region(char *data, struct dentry *dentry,
+ struct vfsmount *mnt);
+int ecryptfs_read_and_validate_xattr_region(char *page_virt,
+ struct dentry *ecryptfs_dentry);
u16 ecryptfs_code_for_cipher_string(struct ecryptfs_crypt_stat *crypt_stat);
int ecryptfs_cipher_code_to_string(char *str, u16 cipher_code);
void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat);
@@ -484,5 +548,37 @@ int ecryptfs_open_lower_file(struct file **lower_file,
struct dentry *lower_dentry,
struct vfsmount *lower_mnt, int flags);
int ecryptfs_close_lower_file(struct file *lower_file);
+ssize_t ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
+ size_t size);
+int
+ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags);
+int ecryptfs_read_xattr_region(char *page_virt, struct dentry *ecryptfs_dentry);
+int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid);
+int ecryptfs_process_quit(uid_t uid, pid_t pid);
+int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
+ pid_t pid, u32 seq);
+int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
+ struct ecryptfs_msg_ctx **msg_ctx);
+int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
+ struct ecryptfs_message **emsg);
+int ecryptfs_init_messaging(unsigned int transport);
+void ecryptfs_release_messaging(unsigned int transport);
+
+int ecryptfs_send_netlink(char *data, int data_len,
+ struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+ u16 msg_flags, pid_t daemon_pid);
+int ecryptfs_init_netlink(void);
+void ecryptfs_release_netlink(void);
+
+int ecryptfs_send_connector(char *data, int data_len,
+ struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+ u16 msg_flags, pid_t daemon_pid);
+int ecryptfs_init_connector(void);
+void ecryptfs_release_connector(void);
+void
+ecryptfs_write_header_metadata(char *virt,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ size_t *written);
#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index c5a2e5298f15..bd969adf70d7 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 1997-2004 Erez Zadok
* Copyright (C) 2001-2004 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
* Michael C. Thompson <mcthomps@us.ibm.com>
*
@@ -250,8 +250,19 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
struct ecryptfs_file_info *file_info;
int lower_flags;
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ ecryptfs_dentry->d_sb)->mount_crypt_stat;
+ if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
+ || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
+ || (file->f_flags & O_APPEND))) {
+ printk(KERN_WARNING "Mount has encrypted view enabled; "
+ "files may only be read\n");
+ rc = -EPERM;
+ goto out;
+ }
/* Released in ecryptfs_release or end of function if failure */
- file_info = kmem_cache_alloc(ecryptfs_file_info_cache, GFP_KERNEL);
+ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
if (!file_info) {
ecryptfs_printk(KERN_ERR,
@@ -259,17 +270,14 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
rc = -ENOMEM;
goto out;
}
- memset(file_info, 0, sizeof(*file_info));
lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
mutex_lock(&crypt_stat->cs_mutex);
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED)) {
+ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) {
ecryptfs_printk(KERN_DEBUG, "Setting flags for stat...\n");
/* Policy code enabled in future release */
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED);
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ crypt_stat->flags |= ECRYPTFS_POLICY_APPLIED;
+ crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
}
mutex_unlock(&crypt_stat->cs_mutex);
lower_flags = file->f_flags;
@@ -289,31 +297,14 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
lower_inode = lower_dentry->d_inode;
if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
rc = 0;
goto out;
}
mutex_lock(&crypt_stat->cs_mutex);
- if (i_size_read(lower_inode) < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE) {
- if (!(mount_crypt_stat->flags
- & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
- rc = -EIO;
- printk(KERN_WARNING "Attempt to read file that is "
- "not in a valid eCryptfs format, and plaintext "
- "passthrough mode is not enabled; returning "
- "-EIO\n");
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out_puts;
- }
- crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
- rc = 0;
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out;
- } else if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ECRYPTFS_POLICY_APPLIED)
- || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags,
- ECRYPTFS_KEY_VALID)) {
- rc = ecryptfs_read_headers(ecryptfs_dentry, lower_file);
+ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
+ || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
+ rc = ecryptfs_read_metadata(ecryptfs_dentry, lower_file);
if (rc) {
ecryptfs_printk(KERN_DEBUG,
"Valid headers not found\n");
@@ -327,9 +318,8 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
mutex_unlock(&crypt_stat->cs_mutex);
goto out_puts;
}
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags,
- ECRYPTFS_ENCRYPTED);
rc = 0;
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
mutex_unlock(&crypt_stat->cs_mutex);
goto out;
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 11f5e5076aef..9fa7e0b27a96 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -3,7 +3,7 @@
*
* Copyright (C) 1997-2004 Erez Zadok
* Copyright (C) 2001-2004 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
* Michael C. Thompsion <mcthomps@us.ibm.com>
*
@@ -161,17 +161,17 @@ static int grow_file(struct dentry *ecryptfs_dentry, struct file *lower_file,
ecryptfs_set_file_lower(&fake_file, lower_file);
rc = ecryptfs_fill_zeros(&fake_file, 1);
if (rc) {
- ECRYPTFS_SET_FLAG(
- ecryptfs_inode_to_private(inode)->crypt_stat.flags,
- ECRYPTFS_SECURITY_WARNING);
+ ecryptfs_inode_to_private(inode)->crypt_stat.flags |=
+ ECRYPTFS_SECURITY_WARNING;
ecryptfs_printk(KERN_WARNING, "Error attempting to fill zeros "
"in file; rc = [%d]\n", rc);
goto out;
}
i_size_write(inode, 0);
- ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
- ECRYPTFS_SET_FLAG(ecryptfs_inode_to_private(inode)->crypt_stat.flags,
- ECRYPTFS_NEW_FILE);
+ ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode, inode,
+ ecryptfs_dentry,
+ ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
+ ecryptfs_inode_to_private(inode)->crypt_stat.flags |= ECRYPTFS_NEW_FILE;
out:
return rc;
}
@@ -199,7 +199,7 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
lower_dentry->d_name.name);
inode = ecryptfs_dentry->d_inode;
crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
- lower_flags = ((O_CREAT | O_WRONLY | O_TRUNC) & O_ACCMODE) | O_RDWR;
+ lower_flags = ((O_CREAT | O_TRUNC) & O_ACCMODE) | O_RDWR;
#if BITS_PER_LONG != 32
lower_flags |= O_LARGEFILE;
#endif
@@ -214,10 +214,10 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
lower_inode = lower_dentry->d_inode;
if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED);
+ crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
goto out_fput;
}
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
+ crypt_stat->flags |= ECRYPTFS_NEW_FILE;
ecryptfs_printk(KERN_DEBUG, "Initializing crypto context\n");
rc = ecryptfs_new_file_context(ecryptfs_dentry);
if (rc) {
@@ -225,7 +225,7 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
"context\n");
goto out_fput;
}
- rc = ecryptfs_write_headers(ecryptfs_dentry, lower_file);
+ rc = ecryptfs_write_metadata(ecryptfs_dentry, lower_file);
if (rc) {
ecryptfs_printk(KERN_DEBUG, "Error writing headers\n");
goto out_fput;
@@ -287,6 +287,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
char *encoded_name;
unsigned int encoded_namelen;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
char *page_virt = NULL;
struct inode *lower_inode;
u64 file_size;
@@ -361,34 +362,44 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
/* Released in this function */
- page_virt =
- (char *)kmem_cache_alloc(ecryptfs_header_cache_2,
- GFP_USER);
+ page_virt = kmem_cache_zalloc(ecryptfs_header_cache_2,
+ GFP_USER);
if (!page_virt) {
rc = -ENOMEM;
ecryptfs_printk(KERN_ERR,
"Cannot ecryptfs_kmalloc a page\n");
goto out_dput;
}
- memset(page_virt, 0, PAGE_CACHE_SIZE);
- rc = ecryptfs_read_header_region(page_virt, lower_dentry, nd->mnt);
crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
- if (!ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_POLICY_APPLIED))
+ if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
ecryptfs_set_default_sizes(crypt_stat);
+ rc = ecryptfs_read_and_validate_header_region(page_virt, lower_dentry,
+ nd->mnt);
if (rc) {
- rc = 0;
- ecryptfs_printk(KERN_WARNING, "Error reading header region;"
- " assuming unencrypted\n");
- } else {
- if (!contains_ecryptfs_marker(page_virt
- + ECRYPTFS_FILE_SIZE_BYTES)) {
+ rc = ecryptfs_read_and_validate_xattr_region(page_virt, dentry);
+ if (rc) {
+ printk(KERN_DEBUG "Valid metadata not found in header "
+ "region or xattr region; treating file as "
+ "unencrypted\n");
+ rc = 0;
kmem_cache_free(ecryptfs_header_cache_2, page_virt);
goto out;
}
+ crypt_stat->flags |= ECRYPTFS_METADATA_IN_XATTR;
+ }
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ dentry->d_sb)->mount_crypt_stat;
+ if (mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED) {
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+ file_size = (crypt_stat->header_extent_size
+ + i_size_read(lower_dentry->d_inode));
+ else
+ file_size = i_size_read(lower_dentry->d_inode);
+ } else {
memcpy(&file_size, page_virt, sizeof(file_size));
file_size = be64_to_cpu(file_size);
- i_size_write(dentry->d_inode, (loff_t)file_size);
}
+ i_size_write(dentry->d_inode, (loff_t)file_size);
kmem_cache_free(ecryptfs_header_cache_2, page_virt);
goto out;
@@ -782,20 +793,26 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
goto out_fput;
}
i_size_write(inode, new_length);
- rc = ecryptfs_write_inode_size_to_header(lower_file,
- lower_dentry->d_inode,
- inode);
+ rc = ecryptfs_write_inode_size_to_metadata(
+ lower_file, lower_dentry->d_inode, inode, dentry,
+ ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
if (rc) {
- ecryptfs_printk(KERN_ERR,
- "Problem with ecryptfs_write"
- "_inode_size\n");
+ printk(KERN_ERR "Problem with "
+ "ecryptfs_write_inode_size_to_metadata; "
+ "rc = [%d]\n", rc);
goto out_fput;
}
} else { /* new_length < i_size_read(inode) */
vmtruncate(inode, new_length);
- ecryptfs_write_inode_size_to_header(lower_file,
- lower_dentry->d_inode,
- inode);
+ rc = ecryptfs_write_inode_size_to_metadata(
+ lower_file, lower_dentry->d_inode, inode, dentry,
+ ECRYPTFS_LOWER_I_MUTEX_NOT_HELD);
+ if (rc) {
+ printk(KERN_ERR "Problem with "
+ "ecryptfs_write_inode_size_to_metadata; "
+ "rc = [%d]\n", rc);
+ goto out_fput;
+ }
/* We are reducing the size of the ecryptfs file, and need to
* know if we need to reduce the size of the lower file. */
lower_size_before_truncate =
@@ -882,7 +899,7 @@ out:
return rc;
}
-static int
+int
ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
@@ -902,7 +919,7 @@ out:
return rc;
}
-static ssize_t
+ssize_t
ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value,
size_t size)
{
@@ -972,7 +989,7 @@ int ecryptfs_inode_set(struct inode *inode, void *lower_inode)
return 0;
}
-struct inode_operations ecryptfs_symlink_iops = {
+const struct inode_operations ecryptfs_symlink_iops = {
.readlink = ecryptfs_readlink,
.follow_link = ecryptfs_follow_link,
.put_link = ecryptfs_put_link,
@@ -984,7 +1001,7 @@ struct inode_operations ecryptfs_symlink_iops = {
.removexattr = ecryptfs_removexattr
};
-struct inode_operations ecryptfs_dir_iops = {
+const struct inode_operations ecryptfs_dir_iops = {
.create = ecryptfs_create,
.lookup = ecryptfs_lookup,
.link = ecryptfs_link,
@@ -1002,7 +1019,7 @@ struct inode_operations ecryptfs_dir_iops = {
.removexattr = ecryptfs_removexattr
};
-struct inode_operations ecryptfs_main_iops = {
+const struct inode_operations ecryptfs_main_iops = {
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
.setxattr = ecryptfs_setxattr,
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 745c0f1bfbbd..b550dea8eee6 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -7,6 +7,7 @@
* Copyright (C) 2004-2006 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
* Michael C. Thompson <mcthomps@us.ibm.com>
+ * Trevor S. Highland <trevor.highland@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -25,7 +26,6 @@
*/
#include <linux/string.h>
-#include <linux/sched.h>
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/key.h>
@@ -64,26 +64,6 @@ int process_request_key_err(long err_code)
return rc;
}
-static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
-{
- struct list_head *walker;
- struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
-
- walker = auth_tok_list_head->next;
- while (walker != auth_tok_list_head) {
- auth_tok_list_item =
- list_entry(walker, struct ecryptfs_auth_tok_list_item,
- list);
- walker = auth_tok_list_item->list.next;
- memset(auth_tok_list_item, 0,
- sizeof(struct ecryptfs_auth_tok_list_item));
- kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
- auth_tok_list_item);
- }
-}
-
-struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
-
/**
* parse_packet_length
* @data: Pointer to memory containing length at offset
@@ -102,12 +82,12 @@ static int parse_packet_length(unsigned char *data, size_t *size,
(*size) = 0;
if (data[0] < 192) {
/* One-byte length */
- (*size) = data[0];
+ (*size) = (unsigned char)data[0];
(*length_size) = 1;
} else if (data[0] < 224) {
/* Two-byte length */
- (*size) = ((data[0] - 192) * 256);
- (*size) += (data[1] + 192);
+ (*size) = (((unsigned char)(data[0]) - 192) * 256);
+ (*size) += ((unsigned char)(data[1]) + 192);
(*length_size) = 2;
} else if (data[0] == 255) {
/* Five-byte length; we're not supposed to see this */
@@ -154,6 +134,499 @@ static int write_packet_length(char *dest, size_t size,
return rc;
}
+static int
+write_tag_64_packet(char *signature, struct ecryptfs_session_key *session_key,
+ char **packet, size_t *packet_len)
+{
+ size_t i = 0;
+ size_t data_len;
+ size_t packet_size_len;
+ char *message;
+ int rc;
+
+ /*
+ * ***** TAG 64 Packet Format *****
+ * | Content Type | 1 byte |
+ * | Key Identifier Size | 1 or 2 bytes |
+ * | Key Identifier | arbitrary |
+ * | Encrypted File Encryption Key Size | 1 or 2 bytes |
+ * | Encrypted File Encryption Key | arbitrary |
+ */
+ data_len = (5 + ECRYPTFS_SIG_SIZE_HEX
+ + session_key->encrypted_key_size);
+ *packet = kmalloc(data_len, GFP_KERNEL);
+ message = *packet;
+ if (!message) {
+ ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ message[i++] = ECRYPTFS_TAG_64_PACKET_TYPE;
+ rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
+ &packet_size_len);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ i += packet_size_len;
+ memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
+ i += ECRYPTFS_SIG_SIZE_HEX;
+ rc = write_packet_length(&message[i], session_key->encrypted_key_size,
+ &packet_size_len);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 64 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ i += packet_size_len;
+ memcpy(&message[i], session_key->encrypted_key,
+ session_key->encrypted_key_size);
+ i += session_key->encrypted_key_size;
+ *packet_len = i;
+out:
+ return rc;
+}
+
+static int
+parse_tag_65_packet(struct ecryptfs_session_key *session_key, u16 *cipher_code,
+ struct ecryptfs_message *msg)
+{
+ size_t i = 0;
+ char *data;
+ size_t data_len;
+ size_t m_size;
+ size_t message_len;
+ u16 checksum = 0;
+ u16 expected_checksum = 0;
+ int rc;
+
+ /*
+ * ***** TAG 65 Packet Format *****
+ * | Content Type | 1 byte |
+ * | Status Indicator | 1 byte |
+ * | File Encryption Key Size | 1 or 2 bytes |
+ * | File Encryption Key | arbitrary |
+ */
+ message_len = msg->data_len;
+ data = msg->data;
+ if (message_len < 4) {
+ rc = -EIO;
+ goto out;
+ }
+ if (data[i++] != ECRYPTFS_TAG_65_PACKET_TYPE) {
+ ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_65\n");
+ rc = -EIO;
+ goto out;
+ }
+ if (data[i++]) {
+ ecryptfs_printk(KERN_ERR, "Status indicator has non-zero value "
+ "[%d]\n", data[i-1]);
+ rc = -EIO;
+ goto out;
+ }
+ rc = parse_packet_length(&data[i], &m_size, &data_len);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
+ i += data_len;
+ if (message_len < (i + m_size)) {
+ ecryptfs_printk(KERN_ERR, "The received netlink message is "
+ "shorter than expected\n");
+ rc = -EIO;
+ goto out;
+ }
+ if (m_size < 3) {
+ ecryptfs_printk(KERN_ERR,
+ "The decrypted key is not long enough to "
+ "include a cipher code and checksum\n");
+ rc = -EIO;
+ goto out;
+ }
+ *cipher_code = data[i++];
+ /* The decrypted key includes 1 byte cipher code and 2 byte checksum */
+ session_key->decrypted_key_size = m_size - 3;
+ if (session_key->decrypted_key_size > ECRYPTFS_MAX_KEY_BYTES) {
+ ecryptfs_printk(KERN_ERR, "key_size [%d] larger than "
+ "the maximum key size [%d]\n",
+ session_key->decrypted_key_size,
+ ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
+ rc = -EIO;
+ goto out;
+ }
+ memcpy(session_key->decrypted_key, &data[i],
+ session_key->decrypted_key_size);
+ i += session_key->decrypted_key_size;
+ expected_checksum += (unsigned char)(data[i++]) << 8;
+ expected_checksum += (unsigned char)(data[i++]);
+ for (i = 0; i < session_key->decrypted_key_size; i++)
+ checksum += session_key->decrypted_key[i];
+ if (expected_checksum != checksum) {
+ ecryptfs_printk(KERN_ERR, "Invalid checksum for file "
+ "encryption key; expected [%x]; calculated "
+ "[%x]\n", expected_checksum, checksum);
+ rc = -EIO;
+ }
+out:
+ return rc;
+}
+
+
+static int
+write_tag_66_packet(char *signature, size_t cipher_code,
+ struct ecryptfs_crypt_stat *crypt_stat, char **packet,
+ size_t *packet_len)
+{
+ size_t i = 0;
+ size_t j;
+ size_t data_len;
+ size_t checksum = 0;
+ size_t packet_size_len;
+ char *message;
+ int rc;
+
+ /*
+ * ***** TAG 66 Packet Format *****
+ * | Content Type | 1 byte |
+ * | Key Identifier Size | 1 or 2 bytes |
+ * | Key Identifier | arbitrary |
+ * | File Encryption Key Size | 1 or 2 bytes |
+ * | File Encryption Key | arbitrary |
+ */
+ data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size);
+ *packet = kmalloc(data_len, GFP_KERNEL);
+ message = *packet;
+ if (!message) {
+ ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ message[i++] = ECRYPTFS_TAG_66_PACKET_TYPE;
+ rc = write_packet_length(&message[i], ECRYPTFS_SIG_SIZE_HEX,
+ &packet_size_len);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ i += packet_size_len;
+ memcpy(&message[i], signature, ECRYPTFS_SIG_SIZE_HEX);
+ i += ECRYPTFS_SIG_SIZE_HEX;
+ /* The encrypted key includes 1 byte cipher code and 2 byte checksum */
+ rc = write_packet_length(&message[i], crypt_stat->key_size + 3,
+ &packet_size_len);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ i += packet_size_len;
+ message[i++] = cipher_code;
+ memcpy(&message[i], crypt_stat->key, crypt_stat->key_size);
+ i += crypt_stat->key_size;
+ for (j = 0; j < crypt_stat->key_size; j++)
+ checksum += crypt_stat->key[j];
+ message[i++] = (checksum / 256) % 256;
+ message[i++] = (checksum % 256);
+ *packet_len = i;
+out:
+ return rc;
+}
+
+static int
+parse_tag_67_packet(struct ecryptfs_key_record *key_rec,
+ struct ecryptfs_message *msg)
+{
+ size_t i = 0;
+ char *data;
+ size_t data_len;
+ size_t message_len;
+ int rc;
+
+ /*
+ * ***** TAG 65 Packet Format *****
+ * | Content Type | 1 byte |
+ * | Status Indicator | 1 byte |
+ * | Encrypted File Encryption Key Size | 1 or 2 bytes |
+ * | Encrypted File Encryption Key | arbitrary |
+ */
+ message_len = msg->data_len;
+ data = msg->data;
+ /* verify that everything through the encrypted FEK size is present */
+ if (message_len < 4) {
+ rc = -EIO;
+ goto out;
+ }
+ if (data[i++] != ECRYPTFS_TAG_67_PACKET_TYPE) {
+ ecryptfs_printk(KERN_ERR, "Type should be ECRYPTFS_TAG_67\n");
+ rc = -EIO;
+ goto out;
+ }
+ if (data[i++]) {
+ ecryptfs_printk(KERN_ERR, "Status indicator has non zero value"
+ " [%d]\n", data[i-1]);
+ rc = -EIO;
+ goto out;
+ }
+ rc = parse_packet_length(&data[i], &key_rec->enc_key_size, &data_len);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
+ i += data_len;
+ if (message_len < (i + key_rec->enc_key_size)) {
+ ecryptfs_printk(KERN_ERR, "message_len [%d]; max len is [%d]\n",
+ message_len, (i + key_rec->enc_key_size));
+ rc = -EIO;
+ goto out;
+ }
+ if (key_rec->enc_key_size > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
+ ecryptfs_printk(KERN_ERR, "Encrypted key_size [%d] larger than "
+ "the maximum key size [%d]\n",
+ key_rec->enc_key_size,
+ ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES);
+ rc = -EIO;
+ goto out;
+ }
+ memcpy(key_rec->enc_key, &data[i], key_rec->enc_key_size);
+out:
+ return rc;
+}
+
+/**
+ * decrypt_pki_encrypted_session_key - Decrypt the session key with
+ * the given auth_tok.
+ *
+ * Returns Zero on success; non-zero error otherwise.
+ */
+static int decrypt_pki_encrypted_session_key(
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+ struct ecryptfs_auth_tok *auth_tok,
+ struct ecryptfs_crypt_stat *crypt_stat)
+{
+ u16 cipher_code = 0;
+ struct ecryptfs_msg_ctx *msg_ctx;
+ struct ecryptfs_message *msg = NULL;
+ char *netlink_message;
+ size_t netlink_message_length;
+ int rc;
+
+ rc = write_tag_64_packet(mount_crypt_stat->global_auth_tok_sig,
+ &(auth_tok->session_key),
+ &netlink_message, &netlink_message_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet");
+ goto out;
+ }
+ rc = ecryptfs_send_message(ecryptfs_transport, netlink_message,
+ netlink_message_length, &msg_ctx);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error sending netlink message\n");
+ goto out;
+ }
+ rc = ecryptfs_wait_for_response(msg_ctx, &msg);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failed to receive tag 65 packet "
+ "from the user space daemon\n");
+ rc = -EIO;
+ goto out;
+ }
+ rc = parse_tag_65_packet(&(auth_tok->session_key),
+ &cipher_code, msg);
+ if (rc) {
+ printk(KERN_ERR "Failed to parse tag 65 packet; rc = [%d]\n",
+ rc);
+ goto out;
+ }
+ auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
+ auth_tok->session_key.decrypted_key_size);
+ crypt_stat->key_size = auth_tok->session_key.decrypted_key_size;
+ rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n",
+ cipher_code)
+ goto out;
+ }
+ crypt_stat->flags |= ECRYPTFS_KEY_VALID;
+ if (ecryptfs_verbosity > 0) {
+ ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
+ ecryptfs_dump_hex(crypt_stat->key,
+ crypt_stat->key_size);
+ }
+out:
+ if (msg)
+ kfree(msg);
+ return rc;
+}
+
+static void wipe_auth_tok_list(struct list_head *auth_tok_list_head)
+{
+ struct list_head *walker;
+ struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+
+ walker = auth_tok_list_head->next;
+ while (walker != auth_tok_list_head) {
+ auth_tok_list_item =
+ list_entry(walker, struct ecryptfs_auth_tok_list_item,
+ list);
+ walker = auth_tok_list_item->list.next;
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
+ auth_tok_list_item);
+ }
+ auth_tok_list_head->next = NULL;
+}
+
+struct kmem_cache *ecryptfs_auth_tok_list_item_cache;
+
+
+/**
+ * parse_tag_1_packet
+ * @crypt_stat: The cryptographic context to modify based on packet
+ * contents.
+ * @data: The raw bytes of the packet.
+ * @auth_tok_list: eCryptfs parses packets into authentication tokens;
+ * a new authentication token will be placed at the end
+ * of this list for this packet.
+ * @new_auth_tok: Pointer to a pointer to memory that this function
+ * allocates; sets the memory address of the pointer to
+ * NULL on error. This object is added to the
+ * auth_tok_list.
+ * @packet_size: This function writes the size of the parsed packet
+ * into this memory location; zero on error.
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+parse_tag_1_packet(struct ecryptfs_crypt_stat *crypt_stat,
+ unsigned char *data, struct list_head *auth_tok_list,
+ struct ecryptfs_auth_tok **new_auth_tok,
+ size_t *packet_size, size_t max_packet_size)
+{
+ size_t body_size;
+ struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
+ size_t length_size;
+ int rc = 0;
+
+ (*packet_size) = 0;
+ (*new_auth_tok) = NULL;
+
+ /* we check that:
+ * one byte for the Tag 1 ID flag
+ * two bytes for the body size
+ * do not exceed the maximum_packet_size
+ */
+ if (unlikely((*packet_size) + 3 > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ /* check for Tag 1 identifier - one byte */
+ if (data[(*packet_size)++] != ECRYPTFS_TAG_1_PACKET_TYPE) {
+ ecryptfs_printk(KERN_ERR, "Enter w/ first byte != 0x%.2x\n",
+ ECRYPTFS_TAG_1_PACKET_TYPE);
+ rc = -EINVAL;
+ goto out;
+ }
+ /* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
+ * at end of function upon failure */
+ auth_tok_list_item =
+ kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache,
+ GFP_KERNEL);
+ if (!auth_tok_list_item) {
+ ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ (*new_auth_tok) = &auth_tok_list_item->auth_tok;
+ /* check for body size - one to two bytes
+ *
+ * ***** TAG 1 Packet Format *****
+ * | version number | 1 byte |
+ * | key ID | 8 bytes |
+ * | public key algorithm | 1 byte |
+ * | encrypted session key | arbitrary |
+ */
+ rc = parse_packet_length(&data[(*packet_size)], &body_size,
+ &length_size);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error parsing packet length; "
+ "rc = [%d]\n", rc);
+ goto out_free;
+ }
+ if (unlikely(body_size < (0x02 + ECRYPTFS_SIG_SIZE))) {
+ ecryptfs_printk(KERN_WARNING, "Invalid body size ([%d])\n",
+ body_size);
+ rc = -EINVAL;
+ goto out_free;
+ }
+ (*packet_size) += length_size;
+ if (unlikely((*packet_size) + body_size > max_packet_size)) {
+ ecryptfs_printk(KERN_ERR, "Packet size exceeds max\n");
+ rc = -EINVAL;
+ goto out_free;
+ }
+ /* Version 3 (from RFC2440) - one byte */
+ if (unlikely(data[(*packet_size)++] != 0x03)) {
+ ecryptfs_printk(KERN_DEBUG, "Unknown version number "
+ "[%d]\n", data[(*packet_size) - 1]);
+ rc = -EINVAL;
+ goto out_free;
+ }
+ /* Read Signature */
+ ecryptfs_to_hex((*new_auth_tok)->token.private_key.signature,
+ &data[(*packet_size)], ECRYPTFS_SIG_SIZE);
+ *packet_size += ECRYPTFS_SIG_SIZE;
+ /* This byte is skipped because the kernel does not need to
+ * know which public key encryption algorithm was used */
+ (*packet_size)++;
+ (*new_auth_tok)->session_key.encrypted_key_size =
+ body_size - (0x02 + ECRYPTFS_SIG_SIZE);
+ if ((*new_auth_tok)->session_key.encrypted_key_size
+ > ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES) {
+ ecryptfs_printk(KERN_ERR, "Tag 1 packet contains key larger "
+ "than ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES");
+ rc = -EINVAL;
+ goto out;
+ }
+ ecryptfs_printk(KERN_DEBUG, "Encrypted key size = [%d]\n",
+ (*new_auth_tok)->session_key.encrypted_key_size);
+ memcpy((*new_auth_tok)->session_key.encrypted_key,
+ &data[(*packet_size)], (body_size - 0x02 - ECRYPTFS_SIG_SIZE));
+ (*packet_size) += (*new_auth_tok)->session_key.encrypted_key_size;
+ (*new_auth_tok)->session_key.flags &=
+ ~ECRYPTFS_CONTAINS_DECRYPTED_KEY;
+ (*new_auth_tok)->session_key.flags |=
+ ECRYPTFS_CONTAINS_ENCRYPTED_KEY;
+ (*new_auth_tok)->token_type = ECRYPTFS_PRIVATE_KEY;
+ (*new_auth_tok)->flags |= ECRYPTFS_PRIVATE_KEY;
+ /* TODO: Why are we setting this flag here? Don't we want the
+ * userspace to decrypt the session key? */
+ (*new_auth_tok)->session_key.flags &=
+ ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
+ (*new_auth_tok)->session_key.flags &=
+ ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
+ list_add(&auth_tok_list_item->list, auth_tok_list);
+ goto out;
+out_free:
+ (*new_auth_tok) = NULL;
+ memset(auth_tok_list_item, 0,
+ sizeof(struct ecryptfs_auth_tok_list_item));
+ kmem_cache_free(ecryptfs_auth_tok_list_item_cache,
+ auth_tok_list_item);
+out:
+ if (rc)
+ (*packet_size) = 0;
+ return rc;
+}
+
/**
* parse_tag_3_packet
* @crypt_stat: The cryptographic context to modify based on packet
@@ -178,10 +651,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_auth_tok **new_auth_tok,
size_t *packet_size, size_t max_packet_size)
{
- int rc = 0;
size_t body_size;
struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
size_t length_size;
+ int rc = 0;
(*packet_size) = 0;
(*new_auth_tok) = NULL;
@@ -207,14 +680,12 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
/* Released: wipe_auth_tok_list called in ecryptfs_parse_packet_set or
* at end of function upon failure */
auth_tok_list_item =
- kmem_cache_alloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
+ kmem_cache_zalloc(ecryptfs_auth_tok_list_item_cache, GFP_KERNEL);
if (!auth_tok_list_item) {
ecryptfs_printk(KERN_ERR, "Unable to allocate memory\n");
rc = -ENOMEM;
goto out;
}
- memset(auth_tok_list_item, 0,
- sizeof(struct ecryptfs_auth_tok_list_item));
(*new_auth_tok) = &auth_tok_list_item->auth_tok;
/* check for body size - one to two bytes */
@@ -321,10 +792,10 @@ parse_tag_3_packet(struct ecryptfs_crypt_stat *crypt_stat,
(*new_auth_tok)->token_type = ECRYPTFS_PASSWORD;
/* TODO: Parametarize; we might actually want userspace to
* decrypt the session key. */
- ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
- ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
- ECRYPTFS_CLEAR_FLAG((*new_auth_tok)->session_key.flags,
- ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
+ (*new_auth_tok)->session_key.flags &=
+ ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT);
+ (*new_auth_tok)->session_key.flags &=
+ ~(ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT);
list_add(&auth_tok_list_item->list, auth_tok_list);
goto out;
out_free:
@@ -360,9 +831,9 @@ parse_tag_11_packet(unsigned char *data, unsigned char *contents,
size_t max_contents_bytes, size_t *tag_11_contents_size,
size_t *packet_size, size_t max_packet_size)
{
- int rc = 0;
size_t body_size;
size_t length_size;
+ int rc = 0;
(*packet_size) = 0;
(*tag_11_contents_size) = 0;
@@ -461,7 +932,6 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
struct ecryptfs_password *password_s_ptr;
struct scatterlist src_sg[2], dst_sg[2];
struct mutex *tfm_mutex = NULL;
- /* TODO: Use virt_to_scatterlist for these */
char *encrypted_session_key;
char *session_key;
struct blkcipher_desc desc = {
@@ -470,8 +940,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
int rc = 0;
password_s_ptr = &auth_tok->token.password;
- if (ECRYPTFS_CHECK_FLAG(password_s_ptr->flags,
- ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET))
+ if (password_s_ptr->flags & ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)
ecryptfs_printk(KERN_DEBUG, "Session key encryption key "
"set; skipping key generation\n");
ecryptfs_printk(KERN_DEBUG, "Session key encryption key (size [%d])"
@@ -553,7 +1022,7 @@ static int decrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->session_key.flags |= ECRYPTFS_CONTAINS_DECRYPTED_KEY;
memcpy(crypt_stat->key, auth_tok->session_key.decrypted_key,
auth_tok->session_key.decrypted_key_size);
- ECRYPTFS_SET_FLAG(crypt_stat->flags, ECRYPTFS_KEY_VALID);
+ crypt_stat->flags |= ECRYPTFS_KEY_VALID;
ecryptfs_printk(KERN_DEBUG, "Decrypted session key:\n");
if (ecryptfs_verbosity > 0)
ecryptfs_dump_hex(crypt_stat->key,
@@ -589,7 +1058,6 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
struct dentry *ecryptfs_dentry)
{
size_t i = 0;
- int rc = 0;
size_t found_auth_tok = 0;
size_t next_packet_is_auth_tok_packet;
char sig[ECRYPTFS_SIG_SIZE_HEX];
@@ -605,6 +1073,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
unsigned char sig_tmp_space[ECRYPTFS_SIG_SIZE];
size_t tag_11_contents_size;
size_t tag_11_packet_size;
+ int rc = 0;
INIT_LIST_HEAD(&auth_tok_list);
/* Parse the header to find as many packets as we can, these will be
@@ -656,8 +1125,21 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
sig_tmp_space, tag_11_contents_size);
new_auth_tok->token.password.signature[
ECRYPTFS_PASSWORD_SIG_SIZE] = '\0';
- ECRYPTFS_SET_FLAG(crypt_stat->flags,
- ECRYPTFS_ENCRYPTED);
+ crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
+ break;
+ case ECRYPTFS_TAG_1_PACKET_TYPE:
+ rc = parse_tag_1_packet(crypt_stat,
+ (unsigned char *)&src[i],
+ &auth_tok_list, &new_auth_tok,
+ &packet_size, max_packet_size);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error parsing "
+ "tag 1 packet\n");
+ rc = -EIO;
+ goto out_wipe_list;
+ }
+ i += packet_size;
+ crypt_stat->flags |= ECRYPTFS_ENCRYPTED;
break;
case ECRYPTFS_TAG_11_PACKET_TYPE:
ecryptfs_printk(KERN_WARNING, "Invalid packet set "
@@ -706,31 +1188,46 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
goto leave_list;
/* TODO: Transfer the common salt into the
* crypt_stat salt */
+ } else if ((candidate_auth_tok->token_type
+ == ECRYPTFS_PRIVATE_KEY)
+ && !strncmp(candidate_auth_tok->token.private_key.signature,
+ sig, ECRYPTFS_SIG_SIZE_HEX)) {
+ found_auth_tok = 1;
+ goto leave_list;
}
}
-leave_list:
if (!found_auth_tok) {
ecryptfs_printk(KERN_ERR, "Could not find authentication "
"token on temporary list for sig [%.*s]\n",
ECRYPTFS_SIG_SIZE_HEX, sig);
rc = -EIO;
goto out_wipe_list;
- } else {
+ }
+leave_list:
+ rc = -ENOTSUPP;
+ if (candidate_auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
+ memcpy(&(candidate_auth_tok->token.private_key),
+ &(chosen_auth_tok->token.private_key),
+ sizeof(struct ecryptfs_private_key));
+ rc = decrypt_pki_encrypted_session_key(mount_crypt_stat,
+ candidate_auth_tok,
+ crypt_stat);
+ } else if (candidate_auth_tok->token_type == ECRYPTFS_PASSWORD) {
memcpy(&(candidate_auth_tok->token.password),
&(chosen_auth_tok->token.password),
sizeof(struct ecryptfs_password));
rc = decrypt_session_key(candidate_auth_tok, crypt_stat);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error decrypting the "
- "session key\n");
- goto out_wipe_list;
- }
- rc = ecryptfs_compute_root_iv(crypt_stat);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error computing "
- "the root IV\n");
- goto out_wipe_list;
- }
+ }
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error decrypting the "
+ "session key; rc = [%d]\n", rc);
+ goto out_wipe_list;
+ }
+ rc = ecryptfs_compute_root_iv(crypt_stat);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error computing "
+ "the root IV\n");
+ goto out_wipe_list;
}
rc = ecryptfs_init_crypt_ctx(crypt_stat);
if (rc) {
@@ -743,6 +1240,145 @@ out_wipe_list:
out:
return rc;
}
+static int
+pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct ecryptfs_key_record *key_rec)
+{
+ struct ecryptfs_msg_ctx *msg_ctx = NULL;
+ char *netlink_payload;
+ size_t netlink_payload_length;
+ struct ecryptfs_message *msg;
+ int rc;
+
+ rc = write_tag_66_packet(auth_tok->token.private_key.signature,
+ ecryptfs_code_for_cipher_string(crypt_stat),
+ crypt_stat, &netlink_payload,
+ &netlink_payload_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n");
+ goto out;
+ }
+ rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload,
+ netlink_payload_length, &msg_ctx);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error sending netlink message\n");
+ goto out;
+ }
+ rc = ecryptfs_wait_for_response(msg_ctx, &msg);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failed to receive tag 67 packet "
+ "from the user space daemon\n");
+ rc = -EIO;
+ goto out;
+ }
+ rc = parse_tag_67_packet(key_rec, msg);
+ if (rc)
+ ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n");
+ kfree(msg);
+out:
+ if (netlink_payload)
+ kfree(netlink_payload);
+ return rc;
+}
+/**
+ * write_tag_1_packet - Write an RFC2440-compatible tag 1 (public key) packet
+ * @dest: Buffer into which to write the packet
+ * @max: Maximum number of bytes that can be writtn
+ * @packet_size: This function will write the number of bytes that end
+ * up constituting the packet; set to zero on error
+ *
+ * Returns zero on success; non-zero on error.
+ */
+static int
+write_tag_1_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
+ struct ecryptfs_crypt_stat *crypt_stat,
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
+ struct ecryptfs_key_record *key_rec, size_t *packet_size)
+{
+ size_t i;
+ size_t encrypted_session_key_valid = 0;
+ size_t key_rec_size;
+ size_t packet_size_length;
+ int rc = 0;
+
+ (*packet_size) = 0;
+ ecryptfs_from_hex(key_rec->sig, auth_tok->token.private_key.signature,
+ ECRYPTFS_SIG_SIZE);
+ encrypted_session_key_valid = 0;
+ for (i = 0; i < crypt_stat->key_size; i++)
+ encrypted_session_key_valid |=
+ auth_tok->session_key.encrypted_key[i];
+ if (encrypted_session_key_valid) {
+ memcpy(key_rec->enc_key,
+ auth_tok->session_key.encrypted_key,
+ auth_tok->session_key.encrypted_key_size);
+ goto encrypted_session_key_set;
+ }
+ if (auth_tok->session_key.encrypted_key_size == 0)
+ auth_tok->session_key.encrypted_key_size =
+ auth_tok->token.private_key.key_size;
+ rc = pki_encrypt_session_key(auth_tok, crypt_stat, key_rec);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failed to encrypt session key "
+ "via a pki");
+ goto out;
+ }
+ if (ecryptfs_verbosity > 0) {
+ ecryptfs_printk(KERN_DEBUG, "Encrypted key:\n");
+ ecryptfs_dump_hex(key_rec->enc_key, key_rec->enc_key_size);
+ }
+encrypted_session_key_set:
+ /* Now we have a valid key_rec. Append it to the
+ * key_rec set. */
+ key_rec_size = (sizeof(struct ecryptfs_key_record)
+ - ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
+ + (key_rec->enc_key_size));
+ /* TODO: Include a packet size limit as a parameter to this
+ * function once we have multi-packet headers (for versions
+ * later than 0.1 */
+ if (key_rec_size >= ECRYPTFS_MAX_KEYSET_SIZE) {
+ ecryptfs_printk(KERN_ERR, "Keyset too large\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ /* ***** TAG 1 Packet Format *****
+ * | version number | 1 byte |
+ * | key ID | 8 bytes |
+ * | public key algorithm | 1 byte |
+ * | encrypted session key | arbitrary |
+ */
+ if ((0x02 + ECRYPTFS_SIG_SIZE + key_rec->enc_key_size) >= max) {
+ ecryptfs_printk(KERN_ERR,
+ "Authentication token is too large\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ dest[(*packet_size)++] = ECRYPTFS_TAG_1_PACKET_TYPE;
+ /* This format is inspired by OpenPGP; see RFC 2440
+ * packet tag 1 */
+ rc = write_packet_length(&dest[(*packet_size)],
+ (0x02 + ECRYPTFS_SIG_SIZE +
+ key_rec->enc_key_size),
+ &packet_size_length);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Error generating tag 1 packet "
+ "header; cannot generate packet length\n");
+ goto out;
+ }
+ (*packet_size) += packet_size_length;
+ dest[(*packet_size)++] = 0x03; /* version 3 */
+ memcpy(&dest[(*packet_size)], key_rec->sig, ECRYPTFS_SIG_SIZE);
+ (*packet_size) += ECRYPTFS_SIG_SIZE;
+ dest[(*packet_size)++] = RFC2440_CIPHER_RSA;
+ memcpy(&dest[(*packet_size)], key_rec->enc_key,
+ key_rec->enc_key_size);
+ (*packet_size) += key_rec->enc_key_size;
+out:
+ if (rc)
+ (*packet_size) = 0;
+ return rc;
+}
/**
* write_tag_11_packet
@@ -758,8 +1394,8 @@ static int
write_tag_11_packet(char *dest, int max, char *contents, size_t contents_length,
size_t *packet_length)
{
- int rc = 0;
size_t packet_size_length;
+ int rc = 0;
(*packet_length) = 0;
if ((13 + contents_length) > max) {
@@ -817,7 +1453,6 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
struct ecryptfs_key_record *key_rec, size_t *packet_size)
{
size_t i;
- size_t signature_is_valid = 0;
size_t encrypted_session_key_valid = 0;
char session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES];
struct scatterlist dest_sg[2];
@@ -833,19 +1468,14 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
int rc = 0;
(*packet_size) = 0;
- /* Check for a valid signature on the auth_tok */
- for (i = 0; i < ECRYPTFS_SIG_SIZE_HEX; i++)
- signature_is_valid |= auth_tok->token.password.signature[i];
- if (!signature_is_valid)
- BUG();
- ecryptfs_from_hex((*key_rec).sig, auth_tok->token.password.signature,
+ ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
ECRYPTFS_SIG_SIZE);
encrypted_session_key_valid = 0;
for (i = 0; i < crypt_stat->key_size; i++)
encrypted_session_key_valid |=
auth_tok->session_key.encrypted_key[i];
if (encrypted_session_key_valid) {
- memcpy((*key_rec).enc_key,
+ memcpy(key_rec->enc_key,
auth_tok->session_key.encrypted_key,
auth_tok->session_key.encrypted_key_size);
goto encrypted_session_key_set;
@@ -858,10 +1488,10 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
memset((crypt_stat->key + 24), 0, 8);
auth_tok->session_key.encrypted_key_size = 32;
}
- (*key_rec).enc_key_size =
+ key_rec->enc_key_size =
auth_tok->session_key.encrypted_key_size;
- if (ECRYPTFS_CHECK_FLAG(auth_tok->token.password.flags,
- ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET)) {
+ if (auth_tok->token.password.flags &
+ ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET) {
ecryptfs_printk(KERN_DEBUG, "Using previously generated "
"session key encryption key of size [%d]\n",
auth_tok->token.password.
@@ -879,15 +1509,15 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
ecryptfs_dump_hex(session_key_encryption_key, 16);
}
rc = virt_to_scatterlist(crypt_stat->key,
- (*key_rec).enc_key_size, src_sg, 2);
+ key_rec->enc_key_size, src_sg, 2);
if (!rc) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat session key\n");
rc = -ENOMEM;
goto out;
}
- rc = virt_to_scatterlist((*key_rec).enc_key,
- (*key_rec).enc_key_size, dest_sg, 2);
+ rc = virt_to_scatterlist(key_rec->enc_key,
+ key_rec->enc_key_size, dest_sg, 2);
if (!rc) {
ecryptfs_printk(KERN_ERR, "Error generating scatterlist "
"for crypt_stat encrypted session key\n");
@@ -943,14 +1573,14 @@ write_tag_3_packet(char *dest, size_t max, struct ecryptfs_auth_tok *auth_tok,
mutex_unlock(tfm_mutex);
ecryptfs_printk(KERN_DEBUG, "This should be the encrypted key:\n");
if (ecryptfs_verbosity > 0)
- ecryptfs_dump_hex((*key_rec).enc_key,
- (*key_rec).enc_key_size);
+ ecryptfs_dump_hex(key_rec->enc_key,
+ key_rec->enc_key_size);
encrypted_session_key_set:
/* Now we have a valid key_rec. Append it to the
* key_rec set. */
key_rec_size = (sizeof(struct ecryptfs_key_record)
- ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES
- + ((*key_rec).enc_key_size));
+ + (key_rec->enc_key_size));
/* TODO: Include a packet size limit as a parameter to this
* function once we have multi-packet headers (for versions
* later than 0.1 */
@@ -962,7 +1592,7 @@ encrypted_session_key_set:
/* TODO: Packet size limit */
/* We have 5 bytes of surrounding packet data */
if ((0x05 + ECRYPTFS_SALT_SIZE
- + (*key_rec).enc_key_size) >= max) {
+ + key_rec->enc_key_size) >= max) {
ecryptfs_printk(KERN_ERR, "Authentication token is too "
"large\n");
rc = -EINVAL;
@@ -974,7 +1604,7 @@ encrypted_session_key_set:
/* ver+cipher+s2k+hash+salt+iter+enc_key */
rc = write_packet_length(&dest[(*packet_size)],
(0x05 + ECRYPTFS_SALT_SIZE
- + (*key_rec).enc_key_size),
+ + key_rec->enc_key_size),
&packet_size_length);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error generating tag 3 packet "
@@ -997,9 +1627,9 @@ encrypted_session_key_set:
ECRYPTFS_SALT_SIZE);
(*packet_size) += ECRYPTFS_SALT_SIZE; /* salt */
dest[(*packet_size)++] = 0x60; /* hash iterations (65536) */
- memcpy(&dest[(*packet_size)], (*key_rec).enc_key,
- (*key_rec).enc_key_size);
- (*packet_size) += (*key_rec).enc_key_size;
+ memcpy(&dest[(*packet_size)], key_rec->enc_key,
+ key_rec->enc_key_size);
+ (*packet_size) += key_rec->enc_key_size;
out:
if (desc.tfm && !tfm_mutex)
crypto_free_blkcipher(desc.tfm);
@@ -1008,6 +1638,8 @@ out:
return rc;
}
+struct kmem_cache *ecryptfs_key_record_cache;
+
/**
* ecryptfs_generate_key_packet_set
* @dest: Virtual address from which to write the key record set
@@ -1029,52 +1661,60 @@ ecryptfs_generate_key_packet_set(char *dest_base,
struct dentry *ecryptfs_dentry, size_t *len,
size_t max)
{
- int rc = 0;
struct ecryptfs_auth_tok *auth_tok;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
&ecryptfs_superblock_to_private(
ecryptfs_dentry->d_sb)->mount_crypt_stat;
size_t written;
- struct ecryptfs_key_record key_rec;
+ struct ecryptfs_key_record *key_rec;
+ int rc = 0;
(*len) = 0;
+ key_rec = kmem_cache_alloc(ecryptfs_key_record_cache, GFP_KERNEL);
+ if (!key_rec) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (mount_crypt_stat->global_auth_tok) {
auth_tok = mount_crypt_stat->global_auth_tok;
if (auth_tok->token_type == ECRYPTFS_PASSWORD) {
rc = write_tag_3_packet((dest_base + (*len)),
max, auth_tok,
- crypt_stat, &key_rec,
+ crypt_stat, key_rec,
&written);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error "
"writing tag 3 packet\n");
- goto out;
+ goto out_free;
}
(*len) += written;
/* Write auth tok signature packet */
rc = write_tag_11_packet(
(dest_base + (*len)),
(max - (*len)),
- key_rec.sig, ECRYPTFS_SIG_SIZE, &written);
+ key_rec->sig, ECRYPTFS_SIG_SIZE, &written);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error writing "
"auth tok signature packet\n");
- goto out;
+ goto out_free;
+ }
+ (*len) += written;
+ } else if (auth_tok->token_type == ECRYPTFS_PRIVATE_KEY) {
+ rc = write_tag_1_packet(dest_base + (*len),
+ max, auth_tok,
+ crypt_stat,mount_crypt_stat,
+ key_rec, &written);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error "
+ "writing tag 1 packet\n");
+ goto out_free;
}
(*len) += written;
} else {
ecryptfs_printk(KERN_WARNING, "Unsupported "
"authentication token type\n");
rc = -EINVAL;
- goto out;
- }
- if (rc) {
- ecryptfs_printk(KERN_WARNING, "Error writing "
- "authentication token packet with sig "
- "= [%s]\n",
- mount_crypt_stat->global_auth_tok_sig);
- rc = -EIO;
- goto out;
+ goto out_free;
}
} else
BUG();
@@ -1084,6 +1724,9 @@ ecryptfs_generate_key_packet_set(char *dest_base,
ecryptfs_printk(KERN_ERR, "Error writing boundary byte\n");
rc = -EIO;
}
+
+out_free:
+ kmem_cache_free(ecryptfs_key_record_cache, key_rec);
out:
if (rc)
(*len) = 0;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d0541ae8faba..80044d196fe0 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -3,9 +3,10 @@
*
* Copyright (C) 1997-2003 Erez Zadok
* Copyright (C) 2001-2003 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
* Michael C. Thompson <mcthomps@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
@@ -48,6 +49,43 @@ MODULE_PARM_DESC(ecryptfs_verbosity,
"Initial verbosity level (0 or 1; defaults to "
"0, which is Quiet)");
+/**
+ * Module parameter that defines the number of netlink message buffer
+ * elements
+ */
+unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS;
+
+module_param(ecryptfs_message_buf_len, uint, 0);
+MODULE_PARM_DESC(ecryptfs_message_buf_len,
+ "Number of message buffer elements");
+
+/**
+ * Module parameter that defines the maximum guaranteed amount of time to wait
+ * for a response through netlink. The actual sleep time will be, more than
+ * likely, a small amount greater than this specified value, but only less if
+ * the netlink message successfully arrives.
+ */
+signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ;
+
+module_param(ecryptfs_message_wait_timeout, long, 0);
+MODULE_PARM_DESC(ecryptfs_message_wait_timeout,
+ "Maximum number of seconds that an operation will "
+ "sleep while waiting for a message response from "
+ "userspace");
+
+/**
+ * Module parameter that is an estimate of the maximum number of users
+ * that will be concurrently using eCryptfs. Set this to the right
+ * value to balance performance and memory use.
+ */
+unsigned int ecryptfs_number_of_users = ECRYPTFS_DEFAULT_NUM_USERS;
+
+module_param(ecryptfs_number_of_users, uint, 0);
+MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of "
+ "concurrent users of eCryptfs");
+
+unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT;
+
void __ecryptfs_printk(const char *fmt, ...)
{
va_list args;
@@ -124,7 +162,8 @@ out:
enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, ecryptfs_opt_debug,
ecryptfs_opt_ecryptfs_debug, ecryptfs_opt_cipher,
ecryptfs_opt_ecryptfs_cipher, ecryptfs_opt_ecryptfs_key_bytes,
- ecryptfs_opt_passthrough, ecryptfs_opt_err };
+ ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
+ ecryptfs_opt_encrypted_view, ecryptfs_opt_err };
static match_table_t tokens = {
{ecryptfs_opt_sig, "sig=%s"},
@@ -135,6 +174,8 @@ static match_table_t tokens = {
{ecryptfs_opt_ecryptfs_cipher, "ecryptfs_cipher=%s"},
{ecryptfs_opt_ecryptfs_key_bytes, "ecryptfs_key_bytes=%u"},
{ecryptfs_opt_passthrough, "ecryptfs_passthrough"},
+ {ecryptfs_opt_xattr_metadata, "ecryptfs_xattr_metadata"},
+ {ecryptfs_opt_encrypted_view, "ecryptfs_encrypted_view"},
{ecryptfs_opt_err, NULL}
};
@@ -275,6 +316,16 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
mount_crypt_stat->flags |=
ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED;
break;
+ case ecryptfs_opt_xattr_metadata:
+ mount_crypt_stat->flags |=
+ ECRYPTFS_XATTR_METADATA_ENABLED;
+ break;
+ case ecryptfs_opt_encrypted_view:
+ mount_crypt_stat->flags |=
+ ECRYPTFS_XATTR_METADATA_ENABLED;
+ mount_crypt_stat->flags |=
+ ECRYPTFS_ENCRYPTED_VIEW_ENABLED;
+ break;
case ecryptfs_opt_err:
default:
ecryptfs_printk(KERN_WARNING,
@@ -347,9 +398,10 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
rc = -EINVAL;
goto out;
}
- if (auth_tok->token_type != ECRYPTFS_PASSWORD) {
+ if (auth_tok->token_type != ECRYPTFS_PASSWORD
+ && auth_tok->token_type != ECRYPTFS_PRIVATE_KEY) {
ecryptfs_printk(KERN_ERR, "Invalid auth_tok structure "
- "returned from key\n");
+ "returned from key query\n");
rc = -EINVAL;
goto out;
}
@@ -378,15 +430,13 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
/* Released in ecryptfs_put_super() */
ecryptfs_set_superblock_private(sb,
- kmem_cache_alloc(ecryptfs_sb_info_cache,
+ kmem_cache_zalloc(ecryptfs_sb_info_cache,
GFP_KERNEL));
if (!ecryptfs_superblock_to_private(sb)) {
ecryptfs_printk(KERN_WARNING, "Out of memory\n");
rc = -ENOMEM;
goto out;
}
- memset(ecryptfs_superblock_to_private(sb), 0,
- sizeof(struct ecryptfs_sb_info));
sb->s_op = &ecryptfs_sops;
/* Released through deactivate_super(sb) from get_sb_nodev */
sb->s_root = d_alloc(NULL, &(const struct qstr) {
@@ -402,7 +452,7 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
/* Released in d_release when dput(sb->s_root) is called */
/* through deactivate_super(sb) from get_sb_nodev() */
ecryptfs_set_dentry_private(sb->s_root,
- kmem_cache_alloc(ecryptfs_dentry_info_cache,
+ kmem_cache_zalloc(ecryptfs_dentry_info_cache,
GFP_KERNEL));
if (!ecryptfs_dentry_to_private(sb->s_root)) {
ecryptfs_printk(KERN_ERR,
@@ -410,8 +460,6 @@ ecryptfs_fill_super(struct super_block *sb, void *raw_data, int silent)
rc = -ENOMEM;
goto out;
}
- memset(ecryptfs_dentry_to_private(sb->s_root), 0,
- sizeof(struct ecryptfs_dentry_info));
rc = 0;
out:
/* Should be able to rely on deactivate_super called from
@@ -594,10 +642,20 @@ static struct ecryptfs_cache_info {
.size = PAGE_CACHE_SIZE,
},
{
+ .cache = &ecryptfs_xattr_cache,
+ .name = "ecryptfs_xattr_cache",
+ .size = PAGE_CACHE_SIZE,
+ },
+ {
.cache = &ecryptfs_lower_page_cache,
.name = "ecryptfs_lower_page_cache",
.size = PAGE_CACHE_SIZE,
},
+ {
+ .cache = &ecryptfs_key_record_cache,
+ .name = "ecryptfs_key_record_cache",
+ .size = sizeof(struct ecryptfs_key_record),
+ },
};
static void ecryptfs_free_kmem_caches(void)
@@ -699,7 +757,8 @@ static struct ecryptfs_version_str_map_elem {
{ECRYPTFS_VERSIONING_PASSPHRASE, "passphrase"},
{ECRYPTFS_VERSIONING_PUBKEY, "pubkey"},
{ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH, "plaintext passthrough"},
- {ECRYPTFS_VERSIONING_POLICY, "policy"}
+ {ECRYPTFS_VERSIONING_POLICY, "policy"},
+ {ECRYPTFS_VERSIONING_XATTR, "metadata in extended attribute"}
};
static ssize_t version_str_show(struct ecryptfs_obj *obj, char *buff)
@@ -798,6 +857,11 @@ static int __init ecryptfs_init(void)
ecryptfs_free_kmem_caches();
goto out;
}
+ rc = ecryptfs_init_messaging(ecryptfs_transport);
+ if (rc) {
+ ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
+ "initialize the eCryptfs netlink socket\n");
+ }
out:
return rc;
}
@@ -809,6 +873,7 @@ static void __exit ecryptfs_exit(void)
sysfs_remove_file(&ecryptfs_subsys.kset.kobj,
&sysfs_attr_version_str.attr);
subsystem_unregister(&ecryptfs_subsys);
+ ecryptfs_release_messaging(ecryptfs_transport);
unregister_filesystem(&ecryptfs_fs_type);
ecryptfs_free_kmem_caches();
}
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
new file mode 100644
index 000000000000..3baf253be95a
--- /dev/null
+++ b/fs/ecryptfs/messaging.c
@@ -0,0 +1,516 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include "ecryptfs_kernel.h"
+
+static LIST_HEAD(ecryptfs_msg_ctx_free_list);
+static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
+static struct mutex ecryptfs_msg_ctx_lists_mux;
+
+static struct hlist_head *ecryptfs_daemon_id_hash;
+static struct mutex ecryptfs_daemon_id_hash_mux;
+static int ecryptfs_hash_buckets;
+#define ecryptfs_uid_hash(uid) \
+ hash_long((unsigned long)uid, ecryptfs_hash_buckets)
+
+static unsigned int ecryptfs_msg_counter;
+static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
+
+/**
+ * ecryptfs_acquire_free_msg_ctx
+ * @msg_ctx: The context that was acquired from the free list
+ *
+ * Acquires a context element from the free list and locks the mutex
+ * on the context. Returns zero on success; non-zero on error or upon
+ * failure to acquire a free context element. Be sure to lock the
+ * list mutex before calling.
+ */
+static int ecryptfs_acquire_free_msg_ctx(struct ecryptfs_msg_ctx **msg_ctx)
+{
+ struct list_head *p;
+ int rc;
+
+ if (list_empty(&ecryptfs_msg_ctx_free_list)) {
+ ecryptfs_printk(KERN_WARNING, "The eCryptfs free "
+ "context list is empty. It may be helpful to "
+ "specify the ecryptfs_message_buf_len "
+ "parameter to be greater than the current "
+ "value of [%d]\n", ecryptfs_message_buf_len);
+ rc = -ENOMEM;
+ goto out;
+ }
+ list_for_each(p, &ecryptfs_msg_ctx_free_list) {
+ *msg_ctx = list_entry(p, struct ecryptfs_msg_ctx, node);
+ if (mutex_trylock(&(*msg_ctx)->mux)) {
+ (*msg_ctx)->task = current;
+ rc = 0;
+ goto out;
+ }
+ }
+ rc = -ENOMEM;
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_msg_ctx_free_to_alloc
+ * @msg_ctx: The context to move from the free list to the alloc list
+ *
+ * Be sure to lock the list mutex and the context mutex before
+ * calling.
+ */
+static void ecryptfs_msg_ctx_free_to_alloc(struct ecryptfs_msg_ctx *msg_ctx)
+{
+ list_move(&msg_ctx->node, &ecryptfs_msg_ctx_alloc_list);
+ msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_PENDING;
+ msg_ctx->counter = ++ecryptfs_msg_counter;
+}
+
+/**
+ * ecryptfs_msg_ctx_alloc_to_free
+ * @msg_ctx: The context to move from the alloc list to the free list
+ *
+ * Be sure to lock the list mutex and the context mutex before
+ * calling.
+ */
+static void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
+{
+ list_move(&(msg_ctx->node), &ecryptfs_msg_ctx_free_list);
+ if (msg_ctx->msg)
+ kfree(msg_ctx->msg);
+ msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_FREE;
+}
+
+/**
+ * ecryptfs_find_daemon_id
+ * @uid: The user id which maps to the desired daemon id
+ * @id: If return value is zero, points to the desired daemon id
+ * pointer
+ *
+ * Search the hash list for the given user id. Returns zero if the
+ * user id exists in the list; non-zero otherwise. The daemon id hash
+ * mutex should be held before calling this function.
+ */
+static int ecryptfs_find_daemon_id(uid_t uid, struct ecryptfs_daemon_id **id)
+{
+ struct hlist_node *elem;
+ int rc;
+
+ hlist_for_each_entry(*id, elem,
+ &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)],
+ id_chain) {
+ if ((*id)->uid == uid) {
+ rc = 0;
+ goto out;
+ }
+ }
+ rc = -EINVAL;
+out:
+ return rc;
+}
+
+static int ecryptfs_send_raw_message(unsigned int transport, u16 msg_type,
+ pid_t pid)
+{
+ int rc;
+
+ switch(transport) {
+ case ECRYPTFS_TRANSPORT_NETLINK:
+ rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, pid);
+ break;
+ case ECRYPTFS_TRANSPORT_CONNECTOR:
+ case ECRYPTFS_TRANSPORT_RELAYFS:
+ default:
+ rc = -ENOSYS;
+ }
+ return rc;
+}
+
+/**
+ * ecryptfs_process_helo
+ * @transport: The underlying transport (netlink, etc.)
+ * @uid: The user ID owner of the message
+ * @pid: The process ID for the userspace program that sent the
+ * message
+ *
+ * Adds the uid and pid values to the daemon id hash. If a uid
+ * already has a daemon pid registered, the daemon will be
+ * unregistered before the new daemon id is put into the hash list.
+ * Returns zero after adding a new daemon id to the hash list;
+ * non-zero otherwise.
+ */
+int ecryptfs_process_helo(unsigned int transport, uid_t uid, pid_t pid)
+{
+ struct ecryptfs_daemon_id *new_id;
+ struct ecryptfs_daemon_id *old_id;
+ int rc;
+
+ mutex_lock(&ecryptfs_daemon_id_hash_mux);
+ new_id = kmalloc(sizeof(*new_id), GFP_KERNEL);
+ if (!new_id) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Failed to allocate memory; unable "
+ "to register daemon [%d] for user [%d]\n",
+ pid, uid);
+ goto unlock;
+ }
+ if (!ecryptfs_find_daemon_id(uid, &old_id)) {
+ printk(KERN_WARNING "Received request from user [%d] "
+ "to register daemon [%d]; unregistering daemon "
+ "[%d]\n", uid, pid, old_id->pid);
+ hlist_del(&old_id->id_chain);
+ rc = ecryptfs_send_raw_message(transport, ECRYPTFS_NLMSG_QUIT,
+ old_id->pid);
+ if (rc)
+ printk(KERN_WARNING "Failed to send QUIT "
+ "message to daemon [%d]; rc = [%d]\n",
+ old_id->pid, rc);
+ kfree(old_id);
+ }
+ new_id->uid = uid;
+ new_id->pid = pid;
+ hlist_add_head(&new_id->id_chain,
+ &ecryptfs_daemon_id_hash[ecryptfs_uid_hash(uid)]);
+ rc = 0;
+unlock:
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+ return rc;
+}
+
+/**
+ * ecryptfs_process_quit
+ * @uid: The user ID owner of the message
+ * @pid: The process ID for the userspace program that sent the
+ * message
+ *
+ * Deletes the corresponding daemon id for the given uid and pid, if
+ * it is the registered that is requesting the deletion. Returns zero
+ * after deleting the desired daemon id; non-zero otherwise.
+ */
+int ecryptfs_process_quit(uid_t uid, pid_t pid)
+{
+ struct ecryptfs_daemon_id *id;
+ int rc;
+
+ mutex_lock(&ecryptfs_daemon_id_hash_mux);
+ if (ecryptfs_find_daemon_id(uid, &id)) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Received request from user [%d] to "
+ "unregister unrecognized daemon [%d]\n", uid,
+ pid);
+ goto unlock;
+ }
+ if (id->pid != pid) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING, "Received request from user [%d] "
+ "with pid [%d] to unregister daemon [%d]\n",
+ uid, pid, id->pid);
+ goto unlock;
+ }
+ hlist_del(&id->id_chain);
+ kfree(id);
+ rc = 0;
+unlock:
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+ return rc;
+}
+
+/**
+ * ecryptfs_process_reponse
+ * @msg: The ecryptfs message received; the caller should sanity check
+ * msg->data_len
+ * @pid: The process ID of the userspace application that sent the
+ * message
+ * @seq: The sequence number of the message
+ *
+ * Processes a response message after sending a operation request to
+ * userspace. Returns zero upon delivery to desired context element;
+ * non-zero upon delivery failure or error.
+ */
+int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t uid,
+ pid_t pid, u32 seq)
+{
+ struct ecryptfs_daemon_id *id;
+ struct ecryptfs_msg_ctx *msg_ctx;
+ int msg_size;
+ int rc;
+
+ if (msg->index >= ecryptfs_message_buf_len) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Attempt to reference "
+ "context buffer at index [%d]; maximum "
+ "allowable is [%d]\n", msg->index,
+ (ecryptfs_message_buf_len - 1));
+ goto out;
+ }
+ msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
+ mutex_lock(&msg_ctx->mux);
+ if (ecryptfs_find_daemon_id(msg_ctx->task->euid, &id)) {
+ rc = -EBADMSG;
+ ecryptfs_printk(KERN_WARNING, "User [%d] received a "
+ "message response from process [%d] but does "
+ "not have a registered daemon\n",
+ msg_ctx->task->euid, pid);
+ goto wake_up;
+ }
+ if (msg_ctx->task->euid != uid) {
+ rc = -EBADMSG;
+ ecryptfs_printk(KERN_WARNING, "Received message from user "
+ "[%d]; expected message from user [%d]\n",
+ uid, msg_ctx->task->euid);
+ goto unlock;
+ }
+ if (id->pid != pid) {
+ rc = -EBADMSG;
+ ecryptfs_printk(KERN_ERR, "User [%d] received a "
+ "message response from an unrecognized "
+ "process [%d]\n", msg_ctx->task->euid, pid);
+ goto unlock;
+ }
+ if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING, "Desired context element is not "
+ "pending a response\n");
+ goto unlock;
+ } else if (msg_ctx->counter != seq) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_WARNING, "Invalid message sequence; "
+ "expected [%d]; received [%d]\n",
+ msg_ctx->counter, seq);
+ goto unlock;
+ }
+ msg_size = sizeof(*msg) + msg->data_len;
+ msg_ctx->msg = kmalloc(msg_size, GFP_KERNEL);
+ if (!msg_ctx->msg) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+ goto unlock;
+ }
+ memcpy(msg_ctx->msg, msg, msg_size);
+ msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE;
+ rc = 0;
+wake_up:
+ wake_up_process(msg_ctx->task);
+unlock:
+ mutex_unlock(&msg_ctx->mux);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_send_message
+ * @transport: The transport over which to send the message (i.e.,
+ * netlink)
+ * @data: The data to send
+ * @data_len: The length of data
+ * @msg_ctx: The message context allocated for the send
+ */
+int ecryptfs_send_message(unsigned int transport, char *data, int data_len,
+ struct ecryptfs_msg_ctx **msg_ctx)
+{
+ struct ecryptfs_daemon_id *id;
+ int rc;
+
+ mutex_lock(&ecryptfs_daemon_id_hash_mux);
+ if (ecryptfs_find_daemon_id(current->euid, &id)) {
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+ rc = -ENOTCONN;
+ ecryptfs_printk(KERN_ERR, "User [%d] does not have a daemon "
+ "registered\n", current->euid);
+ goto out;
+ }
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+ mutex_lock(&ecryptfs_msg_ctx_lists_mux);
+ rc = ecryptfs_acquire_free_msg_ctx(msg_ctx);
+ if (rc) {
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ ecryptfs_printk(KERN_WARNING, "Could not claim a free "
+ "context element\n");
+ goto out;
+ }
+ ecryptfs_msg_ctx_free_to_alloc(*msg_ctx);
+ mutex_unlock(&(*msg_ctx)->mux);
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ switch (transport) {
+ case ECRYPTFS_TRANSPORT_NETLINK:
+ rc = ecryptfs_send_netlink(data, data_len, *msg_ctx,
+ ECRYPTFS_NLMSG_REQUEST, 0, id->pid);
+ break;
+ case ECRYPTFS_TRANSPORT_CONNECTOR:
+ case ECRYPTFS_TRANSPORT_RELAYFS:
+ default:
+ rc = -ENOSYS;
+ }
+ if (rc) {
+ printk(KERN_ERR "Error attempting to send message to userspace "
+ "daemon; rc = [%d]\n", rc);
+ }
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_wait_for_response
+ * @msg_ctx: The context that was assigned when sending a message
+ * @msg: The incoming message from userspace; not set if rc != 0
+ *
+ * Sleeps until awaken by ecryptfs_receive_message or until the amount
+ * of time exceeds ecryptfs_message_wait_timeout. If zero is
+ * returned, msg will point to a valid message from userspace; a
+ * non-zero value is returned upon failure to receive a message or an
+ * error occurs.
+ */
+int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
+ struct ecryptfs_message **msg)
+{
+ signed long timeout = ecryptfs_message_wait_timeout * HZ;
+ int rc = 0;
+
+sleep:
+ timeout = schedule_timeout_interruptible(timeout);
+ mutex_lock(&ecryptfs_msg_ctx_lists_mux);
+ mutex_lock(&msg_ctx->mux);
+ if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_DONE) {
+ if (timeout) {
+ mutex_unlock(&msg_ctx->mux);
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ goto sleep;
+ }
+ rc = -ENOMSG;
+ } else {
+ *msg = msg_ctx->msg;
+ msg_ctx->msg = NULL;
+ }
+ ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
+ mutex_unlock(&msg_ctx->mux);
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ return rc;
+}
+
+int ecryptfs_init_messaging(unsigned int transport)
+{
+ int i;
+ int rc = 0;
+
+ if (ecryptfs_number_of_users > ECRYPTFS_MAX_NUM_USERS) {
+ ecryptfs_number_of_users = ECRYPTFS_MAX_NUM_USERS;
+ ecryptfs_printk(KERN_WARNING, "Specified number of users is "
+ "too large, defaulting to [%d] users\n",
+ ecryptfs_number_of_users);
+ }
+ mutex_init(&ecryptfs_daemon_id_hash_mux);
+ mutex_lock(&ecryptfs_daemon_id_hash_mux);
+ ecryptfs_hash_buckets = 0;
+ while (ecryptfs_number_of_users >> ++ecryptfs_hash_buckets);
+ ecryptfs_daemon_id_hash = kmalloc(sizeof(struct hlist_head)
+ * ecryptfs_hash_buckets, GFP_KERNEL);
+ if (!ecryptfs_daemon_id_hash) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+ goto out;
+ }
+ for (i = 0; i < ecryptfs_hash_buckets; i++)
+ INIT_HLIST_HEAD(&ecryptfs_daemon_id_hash[i]);
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+
+ ecryptfs_msg_ctx_arr = kmalloc((sizeof(struct ecryptfs_msg_ctx)
+ * ecryptfs_message_buf_len), GFP_KERNEL);
+ if (!ecryptfs_msg_ctx_arr) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Failed to allocate memory\n");
+ goto out;
+ }
+ mutex_init(&ecryptfs_msg_ctx_lists_mux);
+ mutex_lock(&ecryptfs_msg_ctx_lists_mux);
+ ecryptfs_msg_counter = 0;
+ for (i = 0; i < ecryptfs_message_buf_len; i++) {
+ INIT_LIST_HEAD(&ecryptfs_msg_ctx_arr[i].node);
+ mutex_init(&ecryptfs_msg_ctx_arr[i].mux);
+ mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
+ ecryptfs_msg_ctx_arr[i].index = i;
+ ecryptfs_msg_ctx_arr[i].state = ECRYPTFS_MSG_CTX_STATE_FREE;
+ ecryptfs_msg_ctx_arr[i].counter = 0;
+ ecryptfs_msg_ctx_arr[i].task = NULL;
+ ecryptfs_msg_ctx_arr[i].msg = NULL;
+ list_add_tail(&ecryptfs_msg_ctx_arr[i].node,
+ &ecryptfs_msg_ctx_free_list);
+ mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux);
+ }
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ switch(transport) {
+ case ECRYPTFS_TRANSPORT_NETLINK:
+ rc = ecryptfs_init_netlink();
+ if (rc)
+ ecryptfs_release_messaging(transport);
+ break;
+ case ECRYPTFS_TRANSPORT_CONNECTOR:
+ case ECRYPTFS_TRANSPORT_RELAYFS:
+ default:
+ rc = -ENOSYS;
+ }
+out:
+ return rc;
+}
+
+void ecryptfs_release_messaging(unsigned int transport)
+{
+ if (ecryptfs_msg_ctx_arr) {
+ int i;
+
+ mutex_lock(&ecryptfs_msg_ctx_lists_mux);
+ for (i = 0; i < ecryptfs_message_buf_len; i++) {
+ mutex_lock(&ecryptfs_msg_ctx_arr[i].mux);
+ if (ecryptfs_msg_ctx_arr[i].msg)
+ kfree(ecryptfs_msg_ctx_arr[i].msg);
+ mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux);
+ }
+ kfree(ecryptfs_msg_ctx_arr);
+ mutex_unlock(&ecryptfs_msg_ctx_lists_mux);
+ }
+ if (ecryptfs_daemon_id_hash) {
+ struct hlist_node *elem;
+ struct ecryptfs_daemon_id *id;
+ int i;
+
+ mutex_lock(&ecryptfs_daemon_id_hash_mux);
+ for (i = 0; i < ecryptfs_hash_buckets; i++) {
+ hlist_for_each_entry(id, elem,
+ &ecryptfs_daemon_id_hash[i],
+ id_chain) {
+ hlist_del(elem);
+ kfree(id);
+ }
+ }
+ kfree(ecryptfs_daemon_id_hash);
+ mutex_unlock(&ecryptfs_daemon_id_hash_mux);
+ }
+ switch(transport) {
+ case ECRYPTFS_TRANSPORT_NETLINK:
+ ecryptfs_release_netlink();
+ break;
+ case ECRYPTFS_TRANSPORT_CONNECTOR:
+ case ECRYPTFS_TRANSPORT_RELAYFS:
+ default:
+ break;
+ }
+ return;
+}
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 06843d24f239..3a6f65c3f14f 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -6,7 +6,7 @@
*
* Copyright (C) 1997-2003 Erez Zadok
* Copyright (C) 2001-2003 Stony Brook University
- * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Copyright (C) 2004-2007 International Business Machines Corp.
* Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
*
* This program is free software; you can redistribute it and/or
@@ -234,22 +234,13 @@ int ecryptfs_do_readpage(struct file *file, struct page *page,
goto out;
}
wait_on_page_locked(lower_page);
- page_data = (char *)kmap(page);
- if (!page_data) {
- rc = -ENOMEM;
- ecryptfs_printk(KERN_ERR, "Error mapping page\n");
- goto out;
- }
- lower_page_data = (char *)kmap(lower_page);
- if (!lower_page_data) {
- rc = -ENOMEM;
- ecryptfs_printk(KERN_ERR, "Error mapping page\n");
- kunmap(page);
- goto out;
- }
+ page_data = kmap_atomic(page, KM_USER0);
+ lower_page_data = kmap_atomic(lower_page, KM_USER1);
memcpy(page_data, lower_page_data, PAGE_CACHE_SIZE);
- kunmap(lower_page);
- kunmap(page);
+ kunmap_atomic(lower_page_data, KM_USER1);
+ flush_dcache_page(lower_page);
+ kunmap_atomic(page_data, KM_USER0);
+ flush_dcache_page(page);
rc = 0;
out:
if (likely(lower_page))
@@ -260,6 +251,33 @@ out:
ClearPageUptodate(page);
return rc;
}
+/**
+ * Header Extent:
+ * Octets 0-7: Unencrypted file size (big-endian)
+ * Octets 8-15: eCryptfs special marker
+ * Octets 16-19: Flags
+ * Octet 16: File format version number (between 0 and 255)
+ * Octets 17-18: Reserved
+ * Octet 19: Bit 1 (lsb): Reserved
+ * Bit 2: Encrypted?
+ * Bits 3-8: Reserved
+ * Octets 20-23: Header extent size (big-endian)
+ * Octets 24-25: Number of header extents at front of file
+ * (big-endian)
+ * Octet 26: Begin RFC 2440 authentication token packet set
+ */
+static void set_header_info(char *page_virt,
+ struct ecryptfs_crypt_stat *crypt_stat)
+{
+ size_t written;
+ int save_num_header_extents_at_front =
+ crypt_stat->num_header_extents_at_front;
+
+ crypt_stat->num_header_extents_at_front = 1;
+ ecryptfs_write_header_metadata(page_virt + 20, crypt_stat, &written);
+ crypt_stat->num_header_extents_at_front =
+ save_num_header_extents_at_front;
+}
/**
* ecryptfs_readpage
@@ -279,8 +297,8 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
->crypt_stat;
if (!crypt_stat
- || !ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_ENCRYPTED)
- || ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
+ || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)
+ || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) {
ecryptfs_printk(KERN_DEBUG,
"Passing through unencrypted page\n");
rc = ecryptfs_do_readpage(file, page, page->index);
@@ -289,10 +307,51 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
"[%d]\n", rc);
goto out;
}
+ } else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
+ int num_pages_in_header_region =
+ (crypt_stat->header_extent_size
+ / PAGE_CACHE_SIZE);
+
+ if (page->index < num_pages_in_header_region) {
+ char *page_virt;
+
+ page_virt = kmap_atomic(page, KM_USER0);
+ memset(page_virt, 0, PAGE_CACHE_SIZE);
+ if (page->index == 0) {
+ rc = ecryptfs_read_xattr_region(
+ page_virt, file->f_path.dentry);
+ set_header_info(page_virt, crypt_stat);
+ }
+ kunmap_atomic(page_virt, KM_USER0);
+ flush_dcache_page(page);
+ if (rc) {
+ printk(KERN_ERR "Error reading xattr "
+ "region\n");
+ goto out;
+ }
+ } else {
+ rc = ecryptfs_do_readpage(
+ file, page,
+ (page->index
+ - num_pages_in_header_region));
+ if (rc) {
+ printk(KERN_ERR "Error reading page; "
+ "rc = [%d]\n", rc);
+ goto out;
+ }
+ }
+ } else {
+ rc = ecryptfs_do_readpage(file, page, page->index);
+ if (rc) {
+ printk(KERN_ERR "Error reading page; rc = "
+ "[%d]\n", rc);
+ goto out;
+ }
+ }
} else {
rc = ecryptfs_decrypt_page(file, page);
if (rc) {
-
ecryptfs_printk(KERN_ERR, "Error decrypting page; "
"rc = [%d]\n", rc);
goto out;
@@ -308,30 +367,27 @@ out:
return rc;
}
+/**
+ * Called with lower inode mutex held.
+ */
static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
{
struct inode *inode = page->mapping->host;
int end_byte_in_page;
- int rc = 0;
char *page_virt;
- if ((i_size_read(inode) / PAGE_CACHE_SIZE) == page->index) {
- end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
- if (to > end_byte_in_page)
- end_byte_in_page = to;
- page_virt = kmap(page);
- if (!page_virt) {
- rc = -ENOMEM;
- ecryptfs_printk(KERN_WARNING,
- "Could not map page\n");
- goto out;
- }
- memset((page_virt + end_byte_in_page), 0,
- (PAGE_CACHE_SIZE - end_byte_in_page));
- kunmap(page);
- }
+ if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
+ goto out;
+ end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
+ if (to > end_byte_in_page)
+ end_byte_in_page = to;
+ page_virt = kmap_atomic(page, KM_USER0);
+ memset((page_virt + end_byte_in_page), 0,
+ (PAGE_CACHE_SIZE - end_byte_in_page));
+ kunmap_atomic(page_virt, KM_USER0);
+ flush_dcache_page(page);
out:
- return rc;
+ return 0;
}
static int ecryptfs_prepare_write(struct file *file, struct page *page,
@@ -339,7 +395,6 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page,
{
int rc = 0;
- kmap(page);
if (from == 0 && to == PAGE_CACHE_SIZE)
goto out; /* If we are writing a full page, it will be
up to date. */
@@ -349,30 +404,6 @@ out:
return rc;
}
-int ecryptfs_grab_and_map_lower_page(struct page **lower_page,
- char **lower_virt,
- struct inode *lower_inode,
- unsigned long lower_page_index)
-{
- int rc = 0;
-
- (*lower_page) = grab_cache_page(lower_inode->i_mapping,
- lower_page_index);
- if (!(*lower_page)) {
- ecryptfs_printk(KERN_ERR, "grab_cache_page for "
- "lower_page_index = [0x%.16x] failed\n",
- lower_page_index);
- rc = -EINVAL;
- goto out;
- }
- if (lower_virt)
- (*lower_virt) = kmap((*lower_page));
- else
- kmap((*lower_page));
-out:
- return rc;
-}
-
int ecryptfs_writepage_and_release_lower_page(struct page *lower_page,
struct inode *lower_inode,
struct writeback_control *wbc)
@@ -391,11 +422,8 @@ out:
return rc;
}
-static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
+static void ecryptfs_release_lower_page(struct page *lower_page)
{
- kunmap(lower_page);
- ecryptfs_printk(KERN_DEBUG, "Unlocking lower page with index = "
- "[0x%.16x]\n", lower_page->index);
unlock_page(lower_page);
page_cache_release(lower_page);
}
@@ -407,10 +435,9 @@ static void ecryptfs_unmap_and_release_lower_page(struct page *lower_page)
*
* Returns zero on success; non-zero on error.
*/
-int
-ecryptfs_write_inode_size_to_header(struct file *lower_file,
- struct inode *lower_inode,
- struct inode *inode)
+static int ecryptfs_write_inode_size_to_header(struct file *lower_file,
+ struct inode *lower_inode,
+ struct inode *inode)
{
int rc = 0;
struct page *header_page;
@@ -418,11 +445,11 @@ ecryptfs_write_inode_size_to_header(struct file *lower_file,
const struct address_space_operations *lower_a_ops;
u64 file_size;
- rc = ecryptfs_grab_and_map_lower_page(&header_page, &header_virt,
- lower_inode, 0);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "grab_cache_page for header page "
- "failed\n");
+ header_page = grab_cache_page(lower_inode->i_mapping, 0);
+ if (!header_page) {
+ ecryptfs_printk(KERN_ERR, "grab_cache_page for "
+ "lower_page_index 0 failed\n");
+ rc = -EINVAL;
goto out;
}
lower_a_ops = lower_inode->i_mapping->a_ops;
@@ -430,18 +457,95 @@ ecryptfs_write_inode_size_to_header(struct file *lower_file,
file_size = (u64)i_size_read(inode);
ecryptfs_printk(KERN_DEBUG, "Writing size: [0x%.16x]\n", file_size);
file_size = cpu_to_be64(file_size);
+ header_virt = kmap_atomic(header_page, KM_USER0);
memcpy(header_virt, &file_size, sizeof(u64));
+ kunmap_atomic(header_virt, KM_USER0);
+ flush_dcache_page(header_page);
rc = lower_a_ops->commit_write(lower_file, header_page, 0, 8);
if (rc < 0)
ecryptfs_printk(KERN_ERR, "Error commiting header page "
"write\n");
- ecryptfs_unmap_and_release_lower_page(header_page);
+ ecryptfs_release_lower_page(header_page);
lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
out:
return rc;
}
+static int ecryptfs_write_inode_size_to_xattr(struct inode *lower_inode,
+ struct inode *inode,
+ struct dentry *ecryptfs_dentry,
+ int lower_i_mutex_held)
+{
+ ssize_t size;
+ void *xattr_virt;
+ struct dentry *lower_dentry;
+ u64 file_size;
+ int rc;
+
+ xattr_virt = kmem_cache_alloc(ecryptfs_xattr_cache, GFP_KERNEL);
+ if (!xattr_virt) {
+ printk(KERN_ERR "Out of memory whilst attempting to write "
+ "inode size to xattr\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+ lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ if (!lower_dentry->d_inode->i_op->getxattr) {
+ printk(KERN_WARNING
+ "No support for setting xattr in lower filesystem\n");
+ rc = -ENOSYS;
+ kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
+ goto out;
+ }
+ if (!lower_i_mutex_held)
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ size = lower_dentry->d_inode->i_op->getxattr(lower_dentry,
+ ECRYPTFS_XATTR_NAME,
+ xattr_virt,
+ PAGE_CACHE_SIZE);
+ if (!lower_i_mutex_held)
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+ if (size < 0)
+ size = 8;
+ file_size = (u64)i_size_read(inode);
+ file_size = cpu_to_be64(file_size);
+ memcpy(xattr_virt, &file_size, sizeof(u64));
+ if (!lower_i_mutex_held)
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry,
+ ECRYPTFS_XATTR_NAME,
+ xattr_virt, size, 0);
+ if (!lower_i_mutex_held)
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+ if (rc)
+ printk(KERN_ERR "Error whilst attempting to write inode size "
+ "to lower file xattr; rc = [%d]\n", rc);
+ kmem_cache_free(ecryptfs_xattr_cache, xattr_virt);
+out:
+ return rc;
+}
+
+int
+ecryptfs_write_inode_size_to_metadata(struct file *lower_file,
+ struct inode *lower_inode,
+ struct inode *inode,
+ struct dentry *ecryptfs_dentry,
+ int lower_i_mutex_held)
+{
+ struct ecryptfs_crypt_stat *crypt_stat;
+
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
+ return ecryptfs_write_inode_size_to_xattr(lower_inode, inode,
+ ecryptfs_dentry,
+ lower_i_mutex_held);
+ else
+ return ecryptfs_write_inode_size_to_header(lower_file,
+ lower_inode,
+ inode);
+}
+
int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
struct file *lower_file,
unsigned long lower_page_index, int byte_offset,
@@ -449,10 +553,10 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
{
int rc = 0;
- rc = ecryptfs_grab_and_map_lower_page(lower_page, NULL, lower_inode,
- lower_page_index);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error attempting to grab and map "
+ *lower_page = grab_cache_page(lower_inode->i_mapping, lower_page_index);
+ if (!(*lower_page)) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Error attempting to grab "
"lower page with index [0x%.16x]\n",
lower_page_index);
goto out;
@@ -468,7 +572,7 @@ int ecryptfs_get_lower_page(struct page **lower_page, struct inode *lower_inode,
}
out:
if (rc && (*lower_page)) {
- ecryptfs_unmap_and_release_lower_page(*lower_page);
+ ecryptfs_release_lower_page(*lower_page);
(*lower_page) = NULL;
}
return rc;
@@ -493,7 +597,7 @@ ecryptfs_commit_lower_page(struct page *lower_page, struct inode *lower_inode,
"Error committing write; rc = [%d]\n", rc);
} else
rc = 0;
- ecryptfs_unmap_and_release_lower_page(lower_page);
+ ecryptfs_release_lower_page(lower_page);
return rc;
}
@@ -528,89 +632,7 @@ out:
return rc;
}
-static int
-process_new_file(struct ecryptfs_crypt_stat *crypt_stat,
- struct file *file, struct inode *inode)
-{
- struct page *header_page;
- const struct address_space_operations *lower_a_ops;
- struct inode *lower_inode;
- struct file *lower_file;
- char *header_virt;
- int rc = 0;
- int current_header_page = 0;
- int header_pages;
- int more_header_data_to_be_written = 1;
-
- lower_inode = ecryptfs_inode_to_lower(inode);
- lower_file = ecryptfs_file_to_lower(file);
- lower_a_ops = lower_inode->i_mapping->a_ops;
- header_pages = ((crypt_stat->header_extent_size
- * crypt_stat->num_header_extents_at_front)
- / PAGE_CACHE_SIZE);
- BUG_ON(header_pages < 1);
- while (current_header_page < header_pages) {
- rc = ecryptfs_grab_and_map_lower_page(&header_page,
- &header_virt,
- lower_inode,
- current_header_page);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "grab_cache_page for "
- "header page [%d] failed; rc = [%d]\n",
- current_header_page, rc);
- goto out;
- }
- rc = lower_a_ops->prepare_write(lower_file, header_page, 0,
- PAGE_CACHE_SIZE);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error preparing to write "
- "header page out; rc = [%d]\n", rc);
- goto out;
- }
- memset(header_virt, 0, PAGE_CACHE_SIZE);
- if (more_header_data_to_be_written) {
- rc = ecryptfs_write_headers_virt(header_virt,
- crypt_stat,
- file->f_dentry);
- if (rc) {
- ecryptfs_printk(KERN_WARNING, "Error "
- "generating header; rc = "
- "[%d]\n", rc);
- rc = -EIO;
- memset(header_virt, 0, PAGE_CACHE_SIZE);
- ecryptfs_unmap_and_release_lower_page(
- header_page);
- goto out;
- }
- if (current_header_page == 0)
- memset(header_virt, 0, 8);
- more_header_data_to_be_written = 0;
- }
- rc = lower_a_ops->commit_write(lower_file, header_page, 0,
- PAGE_CACHE_SIZE);
- ecryptfs_unmap_and_release_lower_page(header_page);
- if (rc < 0) {
- ecryptfs_printk(KERN_ERR,
- "Error commiting header page write; "
- "rc = [%d]\n", rc);
- break;
- }
- current_header_page++;
- }
- if (rc >= 0) {
- rc = 0;
- ecryptfs_printk(KERN_DEBUG, "lower_inode->i_blocks = "
- "[0x%.16x]\n", lower_inode->i_blocks);
- i_size_write(inode, 0);
- lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
- mark_inode_dirty_sync(inode);
- }
- ecryptfs_printk(KERN_DEBUG, "Clearing ECRYPTFS_NEW_FILE flag in "
- "crypt_stat at memory location [%p]\n", crypt_stat);
- ECRYPTFS_CLEAR_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE);
-out:
- return rc;
-}
+struct kmem_cache *ecryptfs_xattr_cache;
/**
* ecryptfs_commit_write
@@ -640,15 +662,10 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
mutex_lock(&lower_inode->i_mutex);
crypt_stat = &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)
->crypt_stat;
- if (ECRYPTFS_CHECK_FLAG(crypt_stat->flags, ECRYPTFS_NEW_FILE)) {
+ if (crypt_stat->flags & ECRYPTFS_NEW_FILE) {
ecryptfs_printk(KERN_DEBUG, "ECRYPTFS_NEW_FILE flag set in "
"crypt_stat at memory location [%p]\n", crypt_stat);
- rc = process_new_file(crypt_stat, file, inode);
- if (rc) {
- ecryptfs_printk(KERN_ERR, "Error processing new "
- "file; rc = [%d]\n", rc);
- goto out;
- }
+ crypt_stat->flags &= ~(ECRYPTFS_NEW_FILE);
} else
ecryptfs_printk(KERN_DEBUG, "Not a new file\n");
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
@@ -670,7 +687,6 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
"index [0x%.16x])\n", page->index);
goto out;
}
- rc = 0;
inode->i_blocks = lower_inode->i_blocks;
pos = (page->index << PAGE_CACHE_SHIFT) + to;
if (pos > i_size_read(inode)) {
@@ -678,11 +694,15 @@ static int ecryptfs_commit_write(struct file *file, struct page *page,
ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
"[0x%.16x]\n", i_size_read(inode));
}
- ecryptfs_write_inode_size_to_header(lower_file, lower_inode, inode);
+ rc = ecryptfs_write_inode_size_to_metadata(lower_file, lower_inode,
+ inode, file->f_dentry,
+ ECRYPTFS_LOWER_I_MUTEX_HELD);
+ if (rc)
+ printk(KERN_ERR "Error writing inode size to metadata; "
+ "rc = [%d]\n", rc);
lower_inode->i_mtime = lower_inode->i_ctime = CURRENT_TIME;
mark_inode_dirty_sync(inode);
out:
- kunmap(page); /* mapped in prior call (prepare_write) */
if (rc < 0)
ClearPageUptodate(page);
else
@@ -707,6 +727,7 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
{
int rc = 0;
struct page *tmp_page;
+ char *tmp_page_virt;
tmp_page = ecryptfs_get1page(file, index);
if (IS_ERR(tmp_page)) {
@@ -715,28 +736,27 @@ int write_zeros(struct file *file, pgoff_t index, int start, int num_zeros)
rc = PTR_ERR(tmp_page);
goto out;
}
- kmap(tmp_page);
rc = ecryptfs_prepare_write(file, tmp_page, start, start + num_zeros);
if (rc) {
ecryptfs_printk(KERN_ERR, "Error preparing to write zero's "
"to remainder of page at index [0x%.16x]\n",
index);
- kunmap(tmp_page);
page_cache_release(tmp_page);
goto out;
}
- memset(((char *)page_address(tmp_page) + start), 0, num_zeros);
+ tmp_page_virt = kmap_atomic(tmp_page, KM_USER0);
+ memset(((char *)tmp_page_virt + start), 0, num_zeros);
+ kunmap_atomic(tmp_page_virt, KM_USER0);
+ flush_dcache_page(tmp_page);
rc = ecryptfs_commit_write(file, tmp_page, start, start + num_zeros);
if (rc < 0) {
ecryptfs_printk(KERN_ERR, "Error attempting to write zero's "
"to remainder of page at index [0x%.16x]\n",
index);
- kunmap(tmp_page);
page_cache_release(tmp_page);
goto out;
}
rc = 0;
- kunmap(tmp_page);
page_cache_release(tmp_page);
out:
return rc;
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c
new file mode 100644
index 000000000000..e3aa2253c850
--- /dev/null
+++ b/fs/ecryptfs/netlink.c
@@ -0,0 +1,255 @@
+/**
+ * eCryptfs: Linux filesystem encryption layer
+ *
+ * Copyright (C) 2004-2006 International Business Machines Corp.
+ * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com>
+ * Tyler Hicks <tyhicks@ou.edu>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+ * 02111-1307, USA.
+ */
+
+#include <net/sock.h>
+#include <linux/hash.h>
+#include <linux/random.h>
+#include "ecryptfs_kernel.h"
+
+static struct sock *ecryptfs_nl_sock;
+
+/**
+ * ecryptfs_send_netlink
+ * @data: The data to include as the payload
+ * @data_len: The byte count of the data
+ * @msg_ctx: The netlink context that will be used to handle the
+ * response message
+ * @msg_type: The type of netlink message to send
+ * @msg_flags: The flags to include in the netlink header
+ * @daemon_pid: The process id of the daemon to send the message to
+ *
+ * Sends the data to the specified daemon pid and uses the netlink
+ * context element to store the data needed for validation upon
+ * receiving the response. The data and the netlink context can be
+ * null if just sending a netlink header is sufficient. Returns zero
+ * upon sending the message; non-zero upon error.
+ */
+int ecryptfs_send_netlink(char *data, int data_len,
+ struct ecryptfs_msg_ctx *msg_ctx, u16 msg_type,
+ u16 msg_flags, pid_t daemon_pid)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ struct ecryptfs_message *msg;
+ size_t payload_len;
+ int rc;
+
+ payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0);
+ skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL);
+ if (!skb) {
+ rc = -ENOMEM;
+ ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n");
+ goto out;
+ }
+ nlh = NLMSG_PUT(skb, daemon_pid, msg_ctx ? msg_ctx->counter : 0,
+ msg_type, payload_len);
+ nlh->nlmsg_flags = msg_flags;
+ if (msg_ctx && payload_len) {
+ msg = (struct ecryptfs_message *)NLMSG_DATA(nlh);
+ msg->index = msg_ctx->index;
+ msg->data_len = data_len;
+ memcpy(msg->data, data, data_len);
+ }
+ rc = netlink_unicast(ecryptfs_nl_sock, skb, daemon_pid, 0);
+ if (rc < 0) {
+ ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink "
+ "message; rc = [%d]\n", rc);
+ goto out;
+ }
+ rc = 0;
+ goto out;
+nlmsg_failure:
+ rc = -EMSGSIZE;
+ kfree_skb(skb);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_process_nl_reponse
+ * @skb: The socket buffer containing the netlink message of state
+ * RESPONSE
+ *
+ * Processes a response message after sending a operation request to
+ * userspace. Attempts to assign the msg to a netlink context element
+ * at the index specified in the msg. The sk_buff and nlmsghdr must
+ * be validated before this function. Returns zero upon delivery to
+ * desired context element; non-zero upon delivery failure or error.
+ */
+static int ecryptfs_process_nl_response(struct sk_buff *skb)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr*)skb->data;
+ struct ecryptfs_message *msg = NLMSG_DATA(nlh);
+ int rc;
+
+ if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) {
+ rc = -EINVAL;
+ ecryptfs_printk(KERN_ERR, "Received netlink message with "
+ "incorrectly specified data length\n");
+ goto out;
+ }
+ rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid,
+ NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq);
+ if (rc)
+ printk(KERN_ERR
+ "Error processing response message; rc = [%d]\n", rc);
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_process_nl_helo
+ * @skb: The socket buffer containing the nlmsghdr in HELO state
+ *
+ * Gets uid and pid of the skb and adds the values to the daemon id
+ * hash. Returns zero after adding a new daemon id to the hash list;
+ * non-zero otherwise.
+ */
+static int ecryptfs_process_nl_helo(struct sk_buff *skb)
+{
+ int rc;
+
+ rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK,
+ NETLINK_CREDS(skb)->uid,
+ NETLINK_CREDS(skb)->pid);
+ if (rc)
+ printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
+ return rc;
+}
+
+/**
+ * ecryptfs_process_nl_quit
+ * @skb: The socket buffer containing the nlmsghdr in QUIT state
+ *
+ * Gets uid and pid of the skb and deletes the corresponding daemon
+ * id, if it is the registered that is requesting the
+ * deletion. Returns zero after deleting the desired daemon id;
+ * non-zero otherwise.
+ */
+static int ecryptfs_process_nl_quit(struct sk_buff *skb)
+{
+ int rc;
+
+ rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid,
+ NETLINK_CREDS(skb)->pid);
+ if (rc)
+ printk(KERN_WARNING
+ "Error processing QUIT message; rc = [%d]\n", rc);
+ return rc;
+}
+
+/**
+ * ecryptfs_receive_nl_message
+ *
+ * Callback function called by netlink system when a message arrives.
+ * If the message looks to be valid, then an attempt is made to assign
+ * it to its desired netlink context element and wake up the process
+ * that is waiting for a response.
+ */
+static void ecryptfs_receive_nl_message(struct sock *sk, int len)
+{
+ struct sk_buff *skb;
+ struct nlmsghdr *nlh;
+ int rc = 0; /* skb_recv_datagram requires this */
+
+receive:
+ skb = skb_recv_datagram(sk, 0, 0, &rc);
+ if (rc == -EINTR)
+ goto receive;
+ else if (rc < 0) {
+ ecryptfs_printk(KERN_ERR, "Error occurred while "
+ "receiving eCryptfs netlink message; "
+ "rc = [%d]\n", rc);
+ return;
+ }
+ nlh = (struct nlmsghdr *)skb->data;
+ if (!NLMSG_OK(nlh, skb->len)) {
+ ecryptfs_printk(KERN_ERR, "Received corrupt netlink "
+ "message\n");
+ goto free;
+ }
+ switch (nlh->nlmsg_type) {
+ case ECRYPTFS_NLMSG_RESPONSE:
+ if (ecryptfs_process_nl_response(skb)) {
+ ecryptfs_printk(KERN_WARNING, "Failed to "
+ "deliver netlink response to "
+ "requesting operation\n");
+ }
+ break;
+ case ECRYPTFS_NLMSG_HELO:
+ if (ecryptfs_process_nl_helo(skb)) {
+ ecryptfs_printk(KERN_WARNING, "Failed to "
+ "fulfill HELO request\n");
+ }
+ break;
+ case ECRYPTFS_NLMSG_QUIT:
+ if (ecryptfs_process_nl_quit(skb)) {
+ ecryptfs_printk(KERN_WARNING, "Failed to "
+ "fulfill QUIT request\n");
+ }
+ break;
+ default:
+ ecryptfs_printk(KERN_WARNING, "Dropping netlink "
+ "message of unrecognized type [%d]\n",
+ nlh->nlmsg_type);
+ break;
+ }
+free:
+ kfree_skb(skb);
+}
+
+/**
+ * ecryptfs_init_netlink
+ *
+ * Initializes the daemon id hash list, netlink context array, and
+ * necessary locks. Returns zero upon success; non-zero upon error.
+ */
+int ecryptfs_init_netlink(void)
+{
+ int rc;
+
+ ecryptfs_nl_sock = netlink_kernel_create(NETLINK_ECRYPTFS, 0,
+ ecryptfs_receive_nl_message,
+ THIS_MODULE);
+ if (!ecryptfs_nl_sock) {
+ rc = -EIO;
+ ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n");
+ goto out;
+ }
+ ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT;
+ rc = 0;
+out:
+ return rc;
+}
+
+/**
+ * ecryptfs_release_netlink
+ *
+ * Frees all memory used by the netlink context array and releases the
+ * netlink socket.
+ */
+void ecryptfs_release_netlink(void)
+{
+ if (ecryptfs_nl_sock && ecryptfs_nl_sock->sk_socket)
+ sock_release(ecryptfs_nl_sock->sk_socket);
+ ecryptfs_nl_sock = NULL;
+}
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index eaa5daaf106e..7b3f0cc09a6f 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -168,7 +168,7 @@ out:
return rc;
}
-struct super_operations ecryptfs_sops = {
+const struct super_operations ecryptfs_sops = {
.alloc_inode = ecryptfs_alloc_inode,
.destroy_inode = ecryptfs_destroy_inode,
.drop_inode = generic_delete_inode,
diff --git a/fs/efs/dir.c b/fs/efs/dir.c
index b46c488eefc8..dfb5cb400217 100644
--- a/fs/efs/dir.c
+++ b/fs/efs/dir.c
@@ -15,7 +15,7 @@ const struct file_operations efs_dir_operations = {
.readdir = efs_readdir,
};
-struct inode_operations efs_dir_inode_operations = {
+const struct inode_operations efs_dir_inode_operations = {
.lookup = efs_lookup,
};
diff --git a/fs/efs/super.c b/fs/efs/super.c
index dfebf21289f4..c2235e46edcd 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -105,7 +105,7 @@ static int efs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations efs_superblock_operations = {
+static const struct super_operations efs_superblock_operations = {
.alloc_inode = efs_alloc_inode,
.destroy_inode = efs_destroy_inode,
.read_inode = efs_read_inode,
diff --git a/fs/exec.c b/fs/exec.c
index 11fe93f7363c..7e36c6f6f538 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -405,12 +405,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
bprm->loader += stack_base;
bprm->exec += stack_base;
- mpnt = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
+ mpnt = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
if (!mpnt)
return -ENOMEM;
- memset(mpnt, 0, sizeof(*mpnt));
-
down_write(&mm->mmap_sem);
{
mpnt->vm_mm = mm;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0b02ba9642d2..e89bfc8cf957 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -368,6 +368,14 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir,
}
if (++n >= npages)
n = 0;
+ /* next page is past the blocks we've got */
+ if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+ ext2_error(dir->i_sb, __FUNCTION__,
+ "dir %lu size %lld exceeds block count %llu",
+ dir->i_ino, dir->i_size,
+ (unsigned long long)dir->i_blocks);
+ goto out;
+ }
} while (n != start);
out:
return NULL;
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index c19ac153f56b..e2a0ea50af1d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -158,7 +158,7 @@ extern void ext2_write_super (struct super_block *);
extern const struct file_operations ext2_dir_operations;
/* file.c */
-extern struct inode_operations ext2_file_inode_operations;
+extern const struct inode_operations ext2_file_inode_operations;
extern const struct file_operations ext2_file_operations;
extern const struct file_operations ext2_xip_file_operations;
@@ -168,9 +168,9 @@ extern const struct address_space_operations ext2_aops_xip;
extern const struct address_space_operations ext2_nobh_aops;
/* namei.c */
-extern struct inode_operations ext2_dir_inode_operations;
-extern struct inode_operations ext2_special_inode_operations;
+extern const struct inode_operations ext2_dir_inode_operations;
+extern const struct inode_operations ext2_special_inode_operations;
/* symlink.c */
-extern struct inode_operations ext2_fast_symlink_inode_operations;
-extern struct inode_operations ext2_symlink_inode_operations;
+extern const struct inode_operations ext2_fast_symlink_inode_operations;
+extern const struct inode_operations ext2_symlink_inode_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2dba473c524a..566d4e2d3852 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -75,7 +75,7 @@ const struct file_operations ext2_xip_file_operations = {
};
#endif
-struct inode_operations ext2_file_inode_operations = {
+const struct inode_operations ext2_file_inode_operations = {
.truncate = ext2_truncate,
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e1af5b4cf80c..e69beed839ac 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -373,7 +373,7 @@ out:
return err;
}
-struct inode_operations ext2_dir_inode_operations = {
+const struct inode_operations ext2_dir_inode_operations = {
.create = ext2_create,
.lookup = ext2_lookup,
.link = ext2_link,
@@ -393,7 +393,7 @@ struct inode_operations ext2_dir_inode_operations = {
.permission = ext2_permission,
};
-struct inode_operations ext2_special_inode_operations = {
+const struct inode_operations ext2_special_inode_operations = {
#ifdef CONFIG_EXT2_FS_XATTR
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 6347c2dbdd81..a046a419d8af 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -231,7 +231,7 @@ static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, siz
static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off);
#endif
-static struct super_operations ext2_sops = {
+static const struct super_operations ext2_sops = {
.alloc_inode = ext2_alloc_inode,
.destroy_inode = ext2_destroy_inode,
.read_inode = ext2_read_inode,
@@ -708,10 +708,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT2_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
+#ifdef CONFIG_EXT2_FS_XATTR
if (def_mount_opts & EXT2_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
+#endif
+#ifdef CONFIG_EXT2_FS_POSIX_ACL
if (def_mount_opts & EXT2_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
+#endif
if (le16_to_cpu(sbi->s_es->s_errors) == EXT2_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC);
diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c
index 1e67d87cfa91..4e2426e22bbe 100644
--- a/fs/ext2/symlink.c
+++ b/fs/ext2/symlink.c
@@ -28,7 +28,7 @@ static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations ext2_symlink_inode_operations = {
+const struct inode_operations ext2_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -40,7 +40,7 @@ struct inode_operations ext2_symlink_inode_operations = {
#endif
};
-struct inode_operations ext2_fast_symlink_inode_operations = {
+const struct inode_operations ext2_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext2_follow_link,
#ifdef CONFIG_EXT2_FS_XATTR
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index 881f6365c41a..1e6f13864536 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -125,7 +125,7 @@ const struct file_operations ext3_file_operations = {
.splice_write = generic_file_splice_write,
};
-struct inode_operations ext3_file_inode_operations = {
+const struct inode_operations ext3_file_inode_operations = {
.truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index deeb27b5ba83..c30e149fbd2e 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -11,7 +11,6 @@
#include <linux/fs.h>
#include <linux/jbd.h>
-#include <linux/sched.h>
#include <linux/ext3_fs.h>
#include <linux/cryptohash.h>
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index beaf25f5112f..8a824f4ce5c6 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -947,7 +947,7 @@ out:
static int ext3_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- handle_t *handle = journal_current_handle();
+ handle_t *handle = ext3_journal_current_handle();
int ret = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
@@ -1717,7 +1717,7 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
/*
* Reacquire the handle: ext3_get_block() can restart the transaction
*/
- handle = journal_current_handle();
+ handle = ext3_journal_current_handle();
out_stop:
if (handle) {
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 4df39c4315e1..49159f13cc1f 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1618,21 +1618,6 @@ static int ext3_delete_entry (handle_t *handle,
return -ENOENT;
}
-/*
- * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
- * do not perform it in these functions. We perform it at the call site,
- * if it is needed.
- */
-static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
-{
- inc_nlink(inode);
-}
-
-static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
-{
- drop_nlink(inode);
-}
-
static int ext3_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
{
@@ -1642,7 +1627,7 @@ static int ext3_add_nondir(handle_t *handle,
d_instantiate(dentry, inode);
return 0;
}
- ext3_dec_count(handle, inode);
+ drop_nlink(inode);
iput(inode);
return err;
}
@@ -2163,7 +2148,7 @@ retry:
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
- ext3_dec_count(handle, inode);
+ drop_nlink(inode);
ext3_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
@@ -2191,6 +2176,12 @@ static int ext3_link (struct dentry * old_dentry,
if (inode->i_nlink >= EXT3_LINK_MAX)
return -EMLINK;
+ /*
+ * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
+ * otherwise has the potential to corrupt the orphan inode list.
+ */
+ if (inode->i_nlink == 0)
+ return -ENOENT;
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2202,7 +2193,7 @@ retry:
handle->h_sync = 1;
inode->i_ctime = CURRENT_TIME_SEC;
- ext3_inc_count(handle, inode);
+ inc_nlink(inode);
atomic_inc(&inode->i_count);
err = ext3_add_nondir(handle, dentry, inode);
@@ -2374,7 +2365,7 @@ end_rename:
/*
* directories can handle most operations...
*/
-struct inode_operations ext3_dir_inode_operations = {
+const struct inode_operations ext3_dir_inode_operations = {
.create = ext3_create,
.lookup = ext3_lookup,
.link = ext3_link,
@@ -2394,7 +2385,7 @@ struct inode_operations ext3_dir_inode_operations = {
.permission = ext3_permission,
};
-struct inode_operations ext3_special_inode_operations = {
+const struct inode_operations ext3_special_inode_operations = {
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index b73cba12f79c..ecf89904c113 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -11,7 +11,6 @@
#define EXT3FS_DEBUG
-#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/ext3_jbd.h>
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index b34886734a44..4a4fcd6868c7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -639,7 +639,7 @@ static struct quotactl_ops ext3_qctl_operations = {
};
#endif
-static struct super_operations ext3_sops = {
+static const struct super_operations ext3_sops = {
.alloc_inode = ext3_alloc_inode,
.destroy_inode = ext3_destroy_inode,
.read_inode = ext3_read_inode,
@@ -1459,10 +1459,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT3_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
+#ifdef CONFIG_EXT3_FS_XATTR
if (def_mount_opts & EXT3_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
+#endif
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
if (def_mount_opts & EXT3_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
+#endif
if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
@@ -2344,6 +2348,22 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
err = -EROFS;
goto restore_opts;
}
+
+ /*
+ * If we have an unprocessed orphan list hanging
+ * around from a previously readonly bdev mount,
+ * require a full umount/remount for now.
+ */
+ if (es->s_last_orphan) {
+ printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+ "remount RDWR because of unprocessed "
+ "orphan inode list. Please "
+ "umount/remount instead.\n",
+ sb->s_id);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
/*
* Mounting a RDONLY partition read-write, so reread
* and store the current valid flag. (It may have
diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c
index 4f79122cde67..ff7b4ccd8983 100644
--- a/fs/ext3/symlink.c
+++ b/fs/ext3/symlink.c
@@ -30,7 +30,7 @@ static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations ext3_symlink_inode_operations = {
+const struct inode_operations ext3_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -42,7 +42,7 @@ struct inode_operations ext3_symlink_inode_operations = {
#endif
};
-struct inode_operations ext3_fast_symlink_inode_operations = {
+const struct inode_operations ext3_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext3_follow_link,
#ifdef CONFIG_EXT3_FS_XATTR
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dc2724fa7622..7916b50f9a13 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -222,7 +222,7 @@ static int ext4_ext_space_block(struct inode *inode)
size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent);
-#ifdef AGRESSIVE_TEST
+#ifdef AGGRESSIVE_TEST
if (size > 6)
size = 6;
#endif
@@ -235,7 +235,7 @@ static int ext4_ext_space_block_idx(struct inode *inode)
size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
/ sizeof(struct ext4_extent_idx);
-#ifdef AGRESSIVE_TEST
+#ifdef AGGRESSIVE_TEST
if (size > 5)
size = 5;
#endif
@@ -249,7 +249,7 @@ static int ext4_ext_space_root(struct inode *inode)
size = sizeof(EXT4_I(inode)->i_data);
size -= sizeof(struct ext4_extent_header);
size /= sizeof(struct ext4_extent);
-#ifdef AGRESSIVE_TEST
+#ifdef AGGRESSIVE_TEST
if (size > 3)
size = 3;
#endif
@@ -263,7 +263,7 @@ static int ext4_ext_space_root_idx(struct inode *inode)
size = sizeof(EXT4_I(inode)->i_data);
size -= sizeof(struct ext4_extent_header);
size /= sizeof(struct ext4_extent_idx);
-#ifdef AGRESSIVE_TEST
+#ifdef AGGRESSIVE_TEST
if (size > 4)
size = 4;
#endif
@@ -1118,7 +1118,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
return 0;
-#ifdef AGRESSIVE_TEST
+#ifdef AGGRESSIVE_TEST
if (le16_to_cpu(ex1->ee_len) >= 4)
return 0;
#endif
@@ -1891,8 +1891,8 @@ void ext4_ext_init(struct super_block *sb)
if (test_opt(sb, EXTENTS)) {
printk("EXT4-fs: file extents enabled");
-#ifdef AGRESSIVE_TEST
- printk(", agressive tests");
+#ifdef AGGRESSIVE_TEST
+ printk(", aggressive tests");
#endif
#ifdef CHECK_BINSEARCH
printk(", check binsearch");
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3bbc24b58785..3c6c1fd2be90 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -125,7 +125,7 @@ const struct file_operations ext4_file_operations = {
.splice_write = generic_file_splice_write,
};
-struct inode_operations ext4_file_inode_operations = {
+const struct inode_operations ext4_file_inode_operations = {
.truncate = ext4_truncate,
.setattr = ext4_setattr,
#ifdef CONFIG_EXT4DEV_FS_XATTR
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index a67966385e06..1555024e3b36 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,7 +11,6 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/sched.h>
#include <linux/ext4_fs.h>
#include <linux/cryptohash.h>
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a127cc03c9fa..fbff4b9e122a 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -946,7 +946,7 @@ out:
static int ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- handle_t *handle = journal_current_handle();
+ handle_t *handle = ext4_journal_current_handle();
int ret = 0;
unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
@@ -1716,7 +1716,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
/*
* Reacquire the handle: ext4_get_block() can restart the transaction
*/
- handle = journal_current_handle();
+ handle = ext4_journal_current_handle();
out_stop:
if (handle) {
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index e5a74a5ac261..e7e1d79a7d75 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1616,21 +1616,6 @@ static int ext4_delete_entry (handle_t *handle,
return -ENOENT;
}
-/*
- * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we
- * do not perform it in these functions. We perform it at the call site,
- * if it is needed.
- */
-static inline void ext4_inc_count(handle_t *handle, struct inode *inode)
-{
- inc_nlink(inode);
-}
-
-static inline void ext4_dec_count(handle_t *handle, struct inode *inode)
-{
- drop_nlink(inode);
-}
-
static int ext4_add_nondir(handle_t *handle,
struct dentry *dentry, struct inode *inode)
{
@@ -1640,7 +1625,7 @@ static int ext4_add_nondir(handle_t *handle,
d_instantiate(dentry, inode);
return 0;
}
- ext4_dec_count(handle, inode);
+ drop_nlink(inode);
iput(inode);
return err;
}
@@ -2161,7 +2146,7 @@ retry:
err = __page_symlink(inode, symname, l,
mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
if (err) {
- ext4_dec_count(handle, inode);
+ drop_nlink(inode);
ext4_mark_inode_dirty(handle, inode);
iput (inode);
goto out_stop;
@@ -2189,6 +2174,12 @@ static int ext4_link (struct dentry * old_dentry,
if (inode->i_nlink >= EXT4_LINK_MAX)
return -EMLINK;
+ /*
+ * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
+ * otherwise has the potential to corrupt the orphan inode list.
+ */
+ if (inode->i_nlink == 0)
+ return -ENOENT;
retry:
handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
@@ -2200,7 +2191,7 @@ retry:
handle->h_sync = 1;
inode->i_ctime = CURRENT_TIME_SEC;
- ext4_inc_count(handle, inode);
+ inc_nlink(inode);
atomic_inc(&inode->i_count);
err = ext4_add_nondir(handle, dentry, inode);
@@ -2372,7 +2363,7 @@ end_rename:
/*
* directories can handle most operations...
*/
-struct inode_operations ext4_dir_inode_operations = {
+const struct inode_operations ext4_dir_inode_operations = {
.create = ext4_create,
.lookup = ext4_lookup,
.link = ext4_link,
@@ -2392,7 +2383,7 @@ struct inode_operations ext4_dir_inode_operations = {
.permission = ext4_permission,
};
-struct inode_operations ext4_special_inode_operations = {
+const struct inode_operations ext4_special_inode_operations = {
.setattr = ext4_setattr,
#ifdef CONFIG_EXT4DEV_FS_XATTR
.setxattr = generic_setxattr,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 4fe49c3661b2..ea99f6c97f56 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,7 +11,6 @@
#define EXT4FS_DEBUG
-#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/ext4_jbd2.h>
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 486a641ca71b..61c4718e4a53 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -690,7 +690,7 @@ static struct quotactl_ops ext4_qctl_operations = {
};
#endif
-static struct super_operations ext4_sops = {
+static const struct super_operations ext4_sops = {
.alloc_inode = ext4_alloc_inode,
.destroy_inode = ext4_destroy_inode,
.read_inode = ext4_read_inode,
@@ -1518,10 +1518,14 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
set_opt(sbi->s_mount_opt, GRPID);
if (def_mount_opts & EXT4_DEFM_UID16)
set_opt(sbi->s_mount_opt, NO_UID32);
+#ifdef CONFIG_EXT4DEV_FS_XATTR
if (def_mount_opts & EXT4_DEFM_XATTR_USER)
set_opt(sbi->s_mount_opt, XATTR_USER);
+#endif
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
if (def_mount_opts & EXT4_DEFM_ACL)
set_opt(sbi->s_mount_opt, POSIX_ACL);
+#endif
if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -2419,6 +2423,22 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
err = -EROFS;
goto restore_opts;
}
+
+ /*
+ * If we have an unprocessed orphan list hanging
+ * around from a previously readonly bdev mount,
+ * require a full umount/remount for now.
+ */
+ if (es->s_last_orphan) {
+ printk(KERN_WARNING "EXT4-fs: %s: couldn't "
+ "remount RDWR because of unprocessed "
+ "orphan inode list. Please "
+ "umount/remount instead.\n",
+ sb->s_id);
+ err = -EINVAL;
+ goto restore_opts;
+ }
+
/*
* Mounting a RDONLY partition read-write, so reread
* and store the current valid flag. (It may have
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index fcf527286d75..e6f9da4287c4 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -30,7 +30,7 @@ static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations ext4_symlink_inode_operations = {
+const struct inode_operations ext4_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -42,7 +42,7 @@ struct inode_operations ext4_symlink_inode_operations = {
#endif
};
-struct inode_operations ext4_fast_symlink_inode_operations = {
+const struct inode_operations ext4_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ext4_follow_link,
#ifdef CONFIG_EXT4DEV_FS_XATTR
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c1237b70c1fe..55d3c7461c5b 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -312,7 +312,7 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
}
EXPORT_SYMBOL_GPL(fat_getattr);
-struct inode_operations fat_file_inode_operations = {
+const struct inode_operations fat_file_inode_operations = {
.truncate = fat_truncate,
.setattr = fat_notify_change,
.getattr = fat_getattr,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a9e4688582a2..761073544217 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -618,7 +618,7 @@ int fat_sync_inode(struct inode *inode)
EXPORT_SYMBOL_GPL(fat_sync_inode);
static int fat_show_options(struct seq_file *m, struct vfsmount *mnt);
-static struct super_operations fat_sops = {
+static const struct super_operations fat_sops = {
.alloc_inode = fat_alloc_inode,
.destroy_inode = fat_destroy_inode,
.write_inode = fat_write_inode,
@@ -1151,7 +1151,7 @@ static int fat_read_root(struct inode *inode)
* Read the super block of an MS-DOS FS.
*/
int fat_fill_super(struct super_block *sb, void *data, int silent,
- struct inode_operations *fs_dir_inode_ops, int isvfat)
+ const struct inode_operations *fs_dir_inode_ops, int isvfat)
{
struct inode *root_inode = NULL;
struct buffer_head *bh;
diff --git a/fs/filesystems.c b/fs/filesystems.c
index e3fa77c6ed56..7a4f61aa05f8 100644
--- a/fs/filesystems.c
+++ b/fs/filesystems.c
@@ -12,7 +12,6 @@
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/sched.h> /* for 'current' */
#include <asm/uaccess.h>
/*
diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h
index 1cf1fe8466a2..91ccee8723f7 100644
--- a/fs/freevxfs/vxfs_extern.h
+++ b/fs/freevxfs/vxfs_extern.h
@@ -62,7 +62,7 @@ extern void vxfs_read_inode(struct inode *);
extern void vxfs_clear_inode(struct inode *);
/* vxfs_lookup.c */
-extern struct inode_operations vxfs_dir_inode_ops;
+extern const struct inode_operations vxfs_dir_inode_ops;
extern const struct file_operations vxfs_dir_operations;
/* vxfs_olt.c */
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index 4e25f3fbed86..24b5a775ff96 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -48,7 +48,7 @@ static int vxfs_immed_readpage(struct file *, struct page *);
* Unliked all other operations we do not go through the pagecache,
* but do all work directly on the inode.
*/
-struct inode_operations vxfs_immed_symlink_iops = {
+const struct inode_operations vxfs_immed_symlink_iops = {
.readlink = generic_readlink,
.follow_link = vxfs_immed_follow_link,
};
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 0b7ae897cb78..098a915fd9a1 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -44,7 +44,7 @@
extern const struct address_space_operations vxfs_aops;
extern const struct address_space_operations vxfs_immed_aops;
-extern struct inode_operations vxfs_immed_symlink_iops;
+extern const struct inode_operations vxfs_immed_symlink_iops;
struct kmem_cache *vxfs_inode_cachep;
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 3995d7fbedab..bf86e5444ea6 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -52,7 +52,7 @@
static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *);
static int vxfs_readdir(struct file *, void *, filldir_t);
-struct inode_operations vxfs_dir_inode_ops = {
+const struct inode_operations vxfs_dir_inode_ops = {
.lookup = vxfs_lookup,
};
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index ac28b0835ffc..647d600f0bc8 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -59,7 +59,7 @@ static void vxfs_put_super(struct super_block *);
static int vxfs_statfs(struct dentry *, struct kstatfs *);
static int vxfs_remount(struct super_block *, int *, char *);
-static struct super_operations vxfs_super_ops = {
+static const struct super_operations vxfs_super_ops = {
.read_inode = vxfs_read_inode,
.clear_inode = vxfs_clear_inode,
.put_super = vxfs_put_super,
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 1794305f9ed8..105d4a271e07 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -73,7 +73,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
struct fuse_conn *fc,
const char *name,
int mode, int nlink,
- struct inode_operations *iop,
+ const struct inode_operations *iop,
const struct file_operations *fop)
{
struct dentry *dentry;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 40080477ceb4..406bf61ed510 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1242,7 +1242,7 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
return err;
}
-static struct inode_operations fuse_dir_inode_operations = {
+static const struct inode_operations fuse_dir_inode_operations = {
.lookup = fuse_lookup,
.mkdir = fuse_mkdir,
.symlink = fuse_symlink,
@@ -1270,7 +1270,7 @@ static const struct file_operations fuse_dir_operations = {
.fsync = fuse_dir_fsync,
};
-static struct inode_operations fuse_common_inode_operations = {
+static const struct inode_operations fuse_common_inode_operations = {
.setattr = fuse_setattr,
.permission = fuse_permission,
.getattr = fuse_getattr,
@@ -1280,7 +1280,7 @@ static struct inode_operations fuse_common_inode_operations = {
.removexattr = fuse_removexattr,
};
-static struct inode_operations fuse_symlink_inode_operations = {
+static const struct inode_operations fuse_symlink_inode_operations = {
.setattr = fuse_setattr,
.follow_link = fuse_follow_link,
.put_link = fuse_put_link,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f63efe1337ec..2fd06927e851 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -69,7 +69,7 @@ void fuse_finish_open(struct inode *inode, struct file *file,
if (outarg->open_flags & FOPEN_DIRECT_IO)
file->f_op = &fuse_direct_io_file_operations;
if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
- invalidate_inode_pages(inode->i_mapping);
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
ff->fh = outarg->fh;
file->private_data = ff;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 12450d2b320e..5ab8e50e7808 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -112,7 +112,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr)
{
struct fuse_conn *fc = get_fuse_conn(inode);
if (S_ISREG(inode->i_mode) && i_size_read(inode) != attr->size)
- invalidate_inode_pages(inode->i_mapping);
+ invalidate_mapping_pages(inode->i_mapping, 0, -1);
inode->i_ino = attr->ino;
inode->i_mode = (inode->i_mode & S_IFMT) + (attr->mode & 07777);
@@ -446,7 +446,7 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
return fuse_iget(sb, 1, 0, &attr);
}
-static struct super_operations fuse_super_operations = {
+static const struct super_operations fuse_super_operations = {
.alloc_inode = fuse_alloc_inode,
.destroy_inode = fuse_destroy_inode,
.read_inode = fuse_read_inode,
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index 6a2ffa2db14f..de8e64c03f73 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -4,44 +4,43 @@ config GFS2_FS
select FS_POSIX_ACL
select CRC32
help
- A cluster filesystem.
+ A cluster filesystem.
- Allows a cluster of computers to simultaneously use a block device
- that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
- and writes to the block device like a local filesystem, but also uses
- a lock module to allow the computers coordinate their I/O so
- filesystem consistency is maintained. One of the nifty features of
- GFS is perfect consistency -- changes made to the filesystem on one
- machine show up immediately on all other machines in the cluster.
+ Allows a cluster of computers to simultaneously use a block device
+ that is shared between them (with FC, iSCSI, NBD, etc...). GFS reads
+ and writes to the block device like a local filesystem, but also uses
+ a lock module to allow the computers coordinate their I/O so
+ filesystem consistency is maintained. One of the nifty features of
+ GFS is perfect consistency -- changes made to the filesystem on one
+ machine show up immediately on all other machines in the cluster.
- To use the GFS2 filesystem, you will need to enable one or more of
- the below locking modules. Documentation and utilities for GFS2 can
- be found here: http://sources.redhat.com/cluster
+ To use the GFS2 filesystem, you will need to enable one or more of
+ the below locking modules. Documentation and utilities for GFS2 can
+ be found here: http://sources.redhat.com/cluster
config GFS2_FS_LOCKING_NOLOCK
tristate "GFS2 \"nolock\" locking module"
depends on GFS2_FS
help
- Single node locking module for GFS2.
+ Single node locking module for GFS2.
- Use this module if you want to use GFS2 on a single node without
- its clustering features. You can still take advantage of the
- large file support, and upgrade to running a full cluster later on
- if required.
+ Use this module if you want to use GFS2 on a single node without
+ its clustering features. You can still take advantage of the
+ large file support, and upgrade to running a full cluster later on
+ if required.
- If you will only be using GFS2 in cluster mode, you do not need this
- module.
+ If you will only be using GFS2 in cluster mode, you do not need this
+ module.
config GFS2_FS_LOCKING_DLM
tristate "GFS2 DLM locking module"
- depends on GFS2_FS && NET && INET && (IPV6 || IPV6=n)
+ depends on GFS2_FS && SYSFS && NET && INET && (IPV6 || IPV6=n)
select IP_SCTP if DLM_SCTP
select CONFIGFS_FS
select DLM
help
- Multiple node locking module for GFS2
-
- Most users of GFS2 will require this module. It provides the locking
- interface between GFS2 and the DLM, which is required to use GFS2
- in a cluster environment.
+ Multiple node locking module for GFS2
+ Most users of GFS2 will require this module. It provides the locking
+ interface between GFS2 and the DLM, which is required to use GFS2
+ in a cluster environment.
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8240c1ff94f4..c53a5d2d0590 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -773,7 +772,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_free_data(ip, bstart, blen);
}
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
@@ -848,7 +847,7 @@ static int do_grow(struct gfs2_inode *ip, u64 size)
}
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
@@ -963,7 +962,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (gfs2_is_stuffed(ip)) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
@@ -975,7 +974,7 @@ static int trunc_start(struct gfs2_inode *ip, u64 size)
if (!error) {
ip->i_di.di_size = size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1048,7 +1047,7 @@ static int trunc_end(struct gfs2_inode *ip)
ip->i_num.no_addr;
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
}
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 0fdcb7713cd9..82a1ac7895a2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -53,7 +53,6 @@
* but never before the maximum hash table size has been reached.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/buffer_head.h>
@@ -131,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_di.di_size < offset + size)
ip->i_di.di_size = offset + size;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -229,7 +228,7 @@ out:
if (ip->i_di.di_size < offset + copied)
ip->i_di.di_size = offset + copied;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1198,12 +1197,11 @@ static int compare_dents(const void *a, const void *b)
*/
static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
- void *opaque, gfs2_filldir_t filldir,
+ void *opaque, filldir_t filldir,
const struct gfs2_dirent **darr, u32 entries,
int *copied)
{
const struct gfs2_dirent *dent, *dent_next;
- struct gfs2_inum_host inum;
u64 off, off_next;
unsigned int x, y;
int run = 0;
@@ -1240,11 +1238,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
*offset = off;
}
- gfs2_inum_in(&inum, (char *)&dent->de_inum);
-
error = filldir(opaque, (const char *)(dent + 1),
be16_to_cpu(dent->de_name_len),
- off, &inum,
+ off, be64_to_cpu(dent->de_inum.no_addr),
be16_to_cpu(dent->de_type));
if (error)
return 1;
@@ -1262,8 +1258,8 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset,
}
static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir, int *copied,
- unsigned *depth, u64 leaf_no)
+ filldir_t filldir, int *copied, unsigned *depth,
+ u64 leaf_no)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *bh;
@@ -1343,7 +1339,7 @@ out:
*/
static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir)
+ filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
@@ -1402,7 +1398,7 @@ out:
}
int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
- gfs2_filldir_t filldir)
+ filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
struct dirent_gather g;
@@ -1568,7 +1564,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
break;
gfs2_trans_add_bh(ip->i_gl, bh, 1);
ip->i_di.di_entries++;
- ip->i_inode.i_mtime.tv_sec = ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
error = 0;
@@ -1654,7 +1650,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *name)
gfs2_consist_inode(dip);
gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_di.di_entries--;
- dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
mark_inode_dirty(&dip->i_inode);
@@ -1702,7 +1698,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
gfs2_trans_add_bh(dip->i_gl, bh, 1);
}
- dip->i_inode.i_mtime.tv_sec = dip->i_inode.i_ctime.tv_sec = get_seconds();
+ dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_dinode_out(dip, bh->b_data);
brelse(bh);
return 0;
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index b21b33668a5b..48fe89046bba 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -16,30 +16,13 @@ struct inode;
struct gfs2_inode;
struct gfs2_inum;
-/**
- * gfs2_filldir_t - Report a directory entry to the caller of gfs2_dir_read()
- * @opaque: opaque data used by the function
- * @name: the name of the directory entry
- * @length: the length of the name
- * @offset: the entry's offset in the directory
- * @inum: the inode number the entry points to
- * @type: the type of inode the entry points to
- *
- * Returns: 0 on success, 1 if buffer full
- */
-
-typedef int (*gfs2_filldir_t) (void *opaque,
- const char *name, unsigned int length,
- u64 offset,
- struct gfs2_inum_host *inum, unsigned int type);
-
int gfs2_dir_search(struct inode *dir, const struct qstr *filename,
struct gfs2_inum_host *inum, unsigned int *type);
int gfs2_dir_add(struct inode *inode, const struct qstr *filename,
const struct gfs2_inum_host *inum, unsigned int type);
int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename);
-int gfs2_dir_read(struct inode *inode, u64 * offset, void *opaque,
- gfs2_filldir_t filldir);
+int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque,
+ filldir_t filldir);
int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
struct gfs2_inum_host *new_inum, unsigned int new_type);
diff --git a/fs/gfs2/eaops.c b/fs/gfs2/eaops.c
index cd747c00f670..c1f44009853f 100644
--- a/fs/gfs2/eaops.c
+++ b/fs/gfs2/eaops.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c
index ebebbdcd7057..5b83ca6acab1 100644
--- a/fs/gfs2/eattr.c
+++ b/fs/gfs2/eattr.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -301,7 +300,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -718,7 +717,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
(er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -853,7 +852,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
(ip->i_inode.i_mode & S_IFMT) == (er->er_mode & S_IFMT));
ip->i_inode.i_mode = er->er_mode;
}
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1134,7 +1133,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 438146904b58..6618c1190252 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -19,6 +19,8 @@
#include <linux/gfs2_ondisk.h>
#include <linux/list.h>
#include <linux/lm_interface.h>
+#include <linux/wait.h>
+#include <linux/rwsem.h>
#include <asm/uaccess.h>
#include "gfs2.h"
@@ -33,11 +35,6 @@
#include "super.h"
#include "util.h"
-struct greedy {
- struct gfs2_holder gr_gh;
- struct delayed_work gr_work;
-};
-
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
};
@@ -47,6 +44,9 @@ typedef void (*glock_examiner) (struct gfs2_glock * gl);
static int gfs2_dump_lockstate(struct gfs2_sbd *sdp);
static int dump_glock(struct gfs2_glock *gl);
static int dump_inode(struct gfs2_inode *ip);
+static void gfs2_glock_xmote_th(struct gfs2_holder *gh);
+static void gfs2_glock_drop_th(struct gfs2_glock *gl);
+static DECLARE_RWSEM(gfs2_umount_flush_sem);
#define GFS2_GL_HASH_SHIFT 15
#define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT)
@@ -213,30 +213,6 @@ out:
}
/**
- * queue_empty - check to see if a glock's queue is empty
- * @gl: the glock
- * @head: the head of the queue to check
- *
- * This function protects the list in the event that a process already
- * has a holder on the list and is adding a second holder for itself.
- * The glmutex lock is what generally prevents processes from working
- * on the same glock at once, but the special case of adding a second
- * holder for yourself ("recursive" locking) doesn't involve locking
- * glmutex, making the spin lock necessary.
- *
- * Returns: 1 if the queue is empty
- */
-
-static inline int queue_empty(struct gfs2_glock *gl, struct list_head *head)
-{
- int empty;
- spin_lock(&gl->gl_spin);
- empty = list_empty(head);
- spin_unlock(&gl->gl_spin);
- return empty;
-}
-
-/**
* search_bucket() - Find struct gfs2_glock by lock number
* @bucket: the bucket to search
* @name: The lock name
@@ -395,11 +371,6 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
gh->gh_flags = flags;
gh->gh_error = 0;
gh->gh_iflags = 0;
- init_completion(&gh->gh_wait);
-
- if (gh->gh_state == LM_ST_EXCLUSIVE)
- gh->gh_flags |= GL_LOCAL_EXCL;
-
gfs2_glock_hold(gl);
}
@@ -417,9 +388,6 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder *
{
gh->gh_state = state;
gh->gh_flags = flags;
- if (gh->gh_state == LM_ST_EXCLUSIVE)
- gh->gh_flags |= GL_LOCAL_EXCL;
-
gh->gh_iflags &= 1 << HIF_ALLOCED;
gh->gh_ip = (unsigned long)__builtin_return_address(0);
}
@@ -479,6 +447,29 @@ static void gfs2_holder_put(struct gfs2_holder *gh)
kfree(gh);
}
+static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh)
+{
+ if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) {
+ gfs2_holder_put(gh);
+ return;
+ }
+ clear_bit(HIF_WAIT, &gh->gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh->gh_iflags, HIF_WAIT);
+}
+
+static int holder_wait(void *word)
+{
+ schedule();
+ return 0;
+}
+
+static void wait_on_holder(struct gfs2_holder *gh)
+{
+ might_sleep();
+ wait_on_bit(&gh->gh_iflags, HIF_WAIT, holder_wait, TASK_UNINTERRUPTIBLE);
+}
+
/**
* rq_mutex - process a mutex request in the queue
* @gh: the glock holder
@@ -493,7 +484,9 @@ static int rq_mutex(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
/* gh->gh_error never examined. */
set_bit(GLF_LOCK, &gl->gl_flags);
- complete(&gh->gh_wait);
+ clear_bit(HIF_WAIT, &gh->gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh->gh_iflags, HIF_WAIT);
return 1;
}
@@ -511,7 +504,6 @@ static int rq_promote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
if (!relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) {
if (list_empty(&gl->gl_holders)) {
@@ -526,7 +518,7 @@ static int rq_promote(struct gfs2_holder *gh)
gfs2_reclaim_glock(sdp);
}
- glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
+ gfs2_glock_xmote_th(gh);
spin_lock(&gl->gl_spin);
}
return 1;
@@ -537,11 +529,11 @@ static int rq_promote(struct gfs2_holder *gh)
set_bit(GLF_LOCK, &gl->gl_flags);
} else {
struct gfs2_holder *next_gh;
- if (gh->gh_flags & GL_LOCAL_EXCL)
+ if (gh->gh_state == LM_ST_EXCLUSIVE)
return 1;
next_gh = list_entry(gl->gl_holders.next, struct gfs2_holder,
gh_list);
- if (next_gh->gh_flags & GL_LOCAL_EXCL)
+ if (next_gh->gh_state == LM_ST_EXCLUSIVE)
return 1;
}
@@ -549,7 +541,7 @@ static int rq_promote(struct gfs2_holder *gh)
gh->gh_error = 0;
set_bit(HIF_HOLDER, &gh->gh_iflags);
- complete(&gh->gh_wait);
+ gfs2_holder_dispose_or_wake(gh);
return 0;
}
@@ -564,7 +556,6 @@ static int rq_promote(struct gfs2_holder *gh)
static int rq_demote(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
if (!list_empty(&gl->gl_holders))
return 1;
@@ -573,10 +564,7 @@ static int rq_demote(struct gfs2_holder *gh)
list_del_init(&gh->gh_list);
gh->gh_error = 0;
spin_unlock(&gl->gl_spin);
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
+ gfs2_holder_dispose_or_wake(gh);
spin_lock(&gl->gl_spin);
} else {
gl->gl_req_gh = gh;
@@ -585,9 +573,9 @@ static int rq_demote(struct gfs2_holder *gh)
if (gh->gh_state == LM_ST_UNLOCKED ||
gl->gl_state != LM_ST_EXCLUSIVE)
- glops->go_drop_th(gl);
+ gfs2_glock_drop_th(gl);
else
- glops->go_xmote_th(gl, gh->gh_state, gh->gh_flags);
+ gfs2_glock_xmote_th(gh);
spin_lock(&gl->gl_spin);
}
@@ -596,30 +584,6 @@ static int rq_demote(struct gfs2_holder *gh)
}
/**
- * rq_greedy - process a queued request to drop greedy status
- * @gh: the glock holder
- *
- * Returns: 1 if the queue is blocked
- */
-
-static int rq_greedy(struct gfs2_holder *gh)
-{
- struct gfs2_glock *gl = gh->gh_gl;
-
- list_del_init(&gh->gh_list);
- /* gh->gh_error never examined. */
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
-
- gfs2_holder_uninit(gh);
- kfree(container_of(gh, struct greedy, gr_gh));
-
- spin_lock(&gl->gl_spin);
-
- return 0;
-}
-
-/**
* run_queue - process holder structures on a glock
* @gl: the glock
*
@@ -649,8 +613,6 @@ static void run_queue(struct gfs2_glock *gl)
if (test_bit(HIF_DEMOTE, &gh->gh_iflags))
blocked = rq_demote(gh);
- else if (test_bit(HIF_GREEDY, &gh->gh_iflags))
- blocked = rq_greedy(gh);
else
gfs2_assert_warn(gl->gl_sbd, 0);
@@ -684,6 +646,8 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
gfs2_holder_init(gl, 0, 0, &gh);
set_bit(HIF_MUTEX, &gh.gh_iflags);
+ if (test_and_set_bit(HIF_WAIT, &gh.gh_iflags))
+ BUG();
spin_lock(&gl->gl_spin);
if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
@@ -691,11 +655,13 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl)
} else {
gl->gl_owner = current;
gl->gl_ip = (unsigned long)__builtin_return_address(0);
- complete(&gh.gh_wait);
+ clear_bit(HIF_WAIT, &gh.gh_iflags);
+ smp_mb();
+ wake_up_bit(&gh.gh_iflags, HIF_WAIT);
}
spin_unlock(&gl->gl_spin);
- wait_for_completion(&gh.gh_wait);
+ wait_on_holder(&gh);
gfs2_holder_uninit(&gh);
}
@@ -774,6 +740,7 @@ restart:
return;
set_bit(HIF_DEMOTE, &new_gh->gh_iflags);
set_bit(HIF_DEALLOC, &new_gh->gh_iflags);
+ set_bit(HIF_WAIT, &new_gh->gh_iflags);
goto restart;
}
@@ -825,7 +792,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
int op_done = 1;
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, !(ret & LM_OUT_ASYNC));
state_change(gl, ret & LM_OUT_ST_MASK);
@@ -908,12 +875,8 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_glock_put(gl);
- if (gh) {
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
- }
+ if (gh)
+ gfs2_holder_dispose_or_wake(gh);
}
/**
@@ -924,23 +887,26 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret)
*
*/
-void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags)
+void gfs2_glock_xmote_th(struct gfs2_holder *gh)
{
+ struct gfs2_glock *gl = gh->gh_gl;
struct gfs2_sbd *sdp = gl->gl_sbd;
+ int flags = gh->gh_flags;
+ unsigned state = gh->gh_state;
const struct gfs2_glock_operations *glops = gl->gl_ops;
int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB |
LM_FLAG_NOEXP | LM_FLAG_ANY |
LM_FLAG_PRIORITY);
unsigned int lck_ret;
+ if (glops->go_xmote_th)
+ glops->go_xmote_th(gl);
+
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, state != LM_ST_UNLOCKED);
gfs2_assert_warn(sdp, state != gl->gl_state);
- if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl);
-
gfs2_glock_hold(gl);
gl->gl_req_bh = xmote_bh;
@@ -971,10 +937,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
const struct gfs2_glock_operations *glops = gl->gl_ops;
struct gfs2_holder *gh = gl->gl_req_gh;
- clear_bit(GLF_PREFETCH, &gl->gl_flags);
-
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, !ret);
state_change(gl, LM_ST_UNLOCKED);
@@ -1001,12 +965,8 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
gfs2_glock_put(gl);
- if (gh) {
- if (test_bit(HIF_DEALLOC, &gh->gh_iflags))
- gfs2_holder_put(gh);
- else
- complete(&gh->gh_wait);
- }
+ if (gh)
+ gfs2_holder_dispose_or_wake(gh);
}
/**
@@ -1015,19 +975,19 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret)
*
*/
-void gfs2_glock_drop_th(struct gfs2_glock *gl)
+static void gfs2_glock_drop_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
const struct gfs2_glock_operations *glops = gl->gl_ops;
unsigned int ret;
+ if (glops->go_drop_th)
+ glops->go_drop_th(gl);
+
gfs2_assert_warn(sdp, test_bit(GLF_LOCK, &gl->gl_flags));
- gfs2_assert_warn(sdp, queue_empty(gl, &gl->gl_holders));
+ gfs2_assert_warn(sdp, list_empty(&gl->gl_holders));
gfs2_assert_warn(sdp, gl->gl_state != LM_ST_UNLOCKED);
- if (gl->gl_state == LM_ST_EXCLUSIVE && glops->go_sync)
- glops->go_sync(gl);
-
gfs2_glock_hold(gl);
gl->gl_req_bh = drop_bh;
@@ -1107,8 +1067,7 @@ static int glock_wait_internal(struct gfs2_holder *gh)
if (gh->gh_flags & LM_FLAG_PRIORITY)
do_cancels(gh);
- wait_for_completion(&gh->gh_wait);
-
+ wait_on_holder(gh);
if (gh->gh_error)
return gh->gh_error;
@@ -1164,6 +1123,8 @@ static void add_to_queue(struct gfs2_holder *gh)
struct gfs2_holder *existing;
BUG_ON(!gh->gh_owner);
+ if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
+ BUG();
existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner);
if (existing) {
@@ -1227,8 +1188,6 @@ restart:
}
}
- clear_bit(GLF_PREFETCH, &gl->gl_flags);
-
return error;
}
@@ -1321,98 +1280,6 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
}
/**
- * gfs2_glock_prefetch - Try to prefetch a glock
- * @gl: the glock
- * @state: the state to prefetch in
- * @flags: flags passed to go_xmote_th()
- *
- */
-
-static void gfs2_glock_prefetch(struct gfs2_glock *gl, unsigned int state,
- int flags)
-{
- const struct gfs2_glock_operations *glops = gl->gl_ops;
-
- spin_lock(&gl->gl_spin);
-
- if (test_bit(GLF_LOCK, &gl->gl_flags) || !list_empty(&gl->gl_holders) ||
- !list_empty(&gl->gl_waiters1) || !list_empty(&gl->gl_waiters2) ||
- !list_empty(&gl->gl_waiters3) ||
- relaxed_state_ok(gl->gl_state, state, flags)) {
- spin_unlock(&gl->gl_spin);
- return;
- }
-
- set_bit(GLF_PREFETCH, &gl->gl_flags);
- set_bit(GLF_LOCK, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
-
- glops->go_xmote_th(gl, state, flags);
-}
-
-static void greedy_work(struct work_struct *work)
-{
- struct greedy *gr = container_of(work, struct greedy, gr_work.work);
- struct gfs2_holder *gh = &gr->gr_gh;
- struct gfs2_glock *gl = gh->gh_gl;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
-
- clear_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
-
- if (glops->go_greedy)
- glops->go_greedy(gl);
-
- spin_lock(&gl->gl_spin);
-
- if (list_empty(&gl->gl_waiters2)) {
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- spin_unlock(&gl->gl_spin);
- gfs2_holder_uninit(gh);
- kfree(gr);
- } else {
- gfs2_glock_hold(gl);
- list_add_tail(&gh->gh_list, &gl->gl_waiters2);
- run_queue(gl);
- spin_unlock(&gl->gl_spin);
- gfs2_glock_put(gl);
- }
-}
-
-/**
- * gfs2_glock_be_greedy -
- * @gl:
- * @time:
- *
- * Returns: 0 if go_greedy will be called, 1 otherwise
- */
-
-int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time)
-{
- struct greedy *gr;
- struct gfs2_holder *gh;
-
- if (!time || gl->gl_sbd->sd_args.ar_localcaching ||
- test_and_set_bit(GLF_GREEDY, &gl->gl_flags))
- return 1;
-
- gr = kmalloc(sizeof(struct greedy), GFP_KERNEL);
- if (!gr) {
- clear_bit(GLF_GREEDY, &gl->gl_flags);
- return 1;
- }
- gh = &gr->gr_gh;
-
- gfs2_holder_init(gl, 0, 0, gh);
- set_bit(HIF_GREEDY, &gh->gh_iflags);
- INIT_DELAYED_WORK(&gr->gr_work, greedy_work);
-
- set_bit(GLF_SKIP_WAITERS2, &gl->gl_flags);
- schedule_delayed_work(&gr->gr_work, time);
-
- return 0;
-}
-
-/**
* gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
* @gh: the holder structure
*
@@ -1470,10 +1337,7 @@ static int glock_compare(const void *arg_a, const void *arg_b)
return 1;
if (a->ln_number < b->ln_number)
return -1;
- if (gh_a->gh_state == LM_ST_SHARED && gh_b->gh_state == LM_ST_EXCLUSIVE)
- return 1;
- if (!(gh_a->gh_flags & GL_LOCAL_EXCL) && (gh_b->gh_flags & GL_LOCAL_EXCL))
- return 1;
+ BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
return 0;
}
@@ -1618,34 +1482,6 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs)
}
/**
- * gfs2_glock_prefetch_num - prefetch a glock based on lock number
- * @sdp: the filesystem
- * @number: the lock number
- * @glops: the glock operations for the type of glock
- * @state: the state to acquire the glock in
- * @flags: modifier flags for the aquisition
- *
- * Returns: errno
- */
-
-void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- unsigned int state, int flags)
-{
- struct gfs2_glock *gl;
- int error;
-
- if (atomic_read(&sdp->sd_reclaim_count) <
- gfs2_tune_get(sdp, gt_reclaim_limit)) {
- error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
- if (!error) {
- gfs2_glock_prefetch(gl, state, flags);
- gfs2_glock_put(gl);
- }
- }
-}
-
-/**
* gfs2_lvb_hold - attach a LVB from a glock
* @gl: The glock in question
*
@@ -1703,8 +1539,6 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name,
if (!gl)
return;
- if (gl->gl_ops->go_callback)
- gl->gl_ops->go_callback(gl, state);
handle_callback(gl, state);
spin_lock(&gl->gl_spin);
@@ -1746,12 +1580,14 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
struct lm_async_cb *async = data;
struct gfs2_glock *gl;
+ down_read(&gfs2_umount_flush_sem);
gl = gfs2_glock_find(sdp, &async->lc_name);
if (gfs2_assert_warn(sdp, gl))
return;
if (!gfs2_assert_warn(sdp, gl->gl_req_bh))
gl->gl_req_bh(gl, async->lc_ret);
gfs2_glock_put(gl);
+ up_read(&gfs2_umount_flush_sem);
return;
}
@@ -1781,15 +1617,11 @@ void gfs2_glock_cb(void *cb_data, unsigned int type, void *data)
static int demote_ok(struct gfs2_glock *gl)
{
- struct gfs2_sbd *sdp = gl->gl_sbd;
const struct gfs2_glock_operations *glops = gl->gl_ops;
int demote = 1;
if (test_bit(GLF_STICKY, &gl->gl_flags))
demote = 0;
- else if (test_bit(GLF_PREFETCH, &gl->gl_flags))
- demote = time_after_eq(jiffies, gl->gl_stamp +
- gfs2_tune_get(sdp, gt_prefetch_secs) * HZ);
else if (glops->go_demote_ok)
demote = glops->go_demote_ok(gl);
@@ -1845,7 +1677,7 @@ void gfs2_reclaim_glock(struct gfs2_sbd *sdp)
atomic_inc(&sdp->sd_reclaimed);
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
handle_callback(gl, LM_ST_UNLOCKED);
gfs2_glmutex_unlock(gl);
@@ -1909,7 +1741,7 @@ static void scan_glock(struct gfs2_glock *gl)
return;
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED && demote_ok(gl))
goto out_schedule;
gfs2_glmutex_unlock(gl);
@@ -1958,7 +1790,7 @@ static void clear_glock(struct gfs2_glock *gl)
}
if (gfs2_glmutex_trylock(gl)) {
- if (queue_empty(gl, &gl->gl_holders) &&
+ if (list_empty(&gl->gl_holders) &&
gl->gl_state != LM_ST_UNLOCKED)
handle_callback(gl, LM_ST_UNLOCKED);
gfs2_glmutex_unlock(gl);
@@ -2000,7 +1832,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait)
t = jiffies;
}
+ down_write(&gfs2_umount_flush_sem);
invalidate_inodes(sdp->sd_vfs);
+ up_write(&gfs2_umount_flush_sem);
msleep(10);
}
}
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index fb39108fc05c..f50e40ceca43 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -20,7 +20,6 @@
#define LM_FLAG_ANY 0x00000008
#define LM_FLAG_PRIORITY 0x00000010 */
-#define GL_LOCAL_EXCL 0x00000020
#define GL_ASYNC 0x00000040
#define GL_EXACT 0x00000080
#define GL_SKIP 0x00000100
@@ -83,17 +82,11 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags,
void gfs2_holder_reinit(unsigned int state, unsigned flags,
struct gfs2_holder *gh);
void gfs2_holder_uninit(struct gfs2_holder *gh);
-
-void gfs2_glock_xmote_th(struct gfs2_glock *gl, unsigned int state, int flags);
-void gfs2_glock_drop_th(struct gfs2_glock *gl);
-
int gfs2_glock_nq(struct gfs2_holder *gh);
int gfs2_glock_poll(struct gfs2_holder *gh);
int gfs2_glock_wait(struct gfs2_holder *gh);
void gfs2_glock_dq(struct gfs2_holder *gh);
-int gfs2_glock_be_greedy(struct gfs2_glock *gl, unsigned int time);
-
void gfs2_glock_dq_uninit(struct gfs2_holder *gh);
int gfs2_glock_nq_num(struct gfs2_sbd *sdp,
u64 number, const struct gfs2_glock_operations *glops,
@@ -103,10 +96,6 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
-void gfs2_glock_prefetch_num(struct gfs2_sbd *sdp, u64 number,
- const struct gfs2_glock_operations *glops,
- unsigned int state, int flags);
-
/**
* gfs2_glock_nq_init - intialize a holder and enqueue it on a glock
* @gl: the glock
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index b068d10bcb6e..46af55355513 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -117,12 +116,14 @@ static void gfs2_pte_inval(struct gfs2_glock *gl)
static void meta_go_sync(struct gfs2_glock *gl)
{
+ if (gl->gl_state != LM_ST_EXCLUSIVE)
+ return;
+
if (test_and_clear_bit(GLF_DIRTY, &gl->gl_flags)) {
gfs2_log_flush(gl->gl_sbd, gl);
gfs2_meta_sync(gl);
gfs2_ail_empty_gl(gl);
}
-
}
/**
@@ -142,6 +143,37 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
}
/**
+ * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
+ * @gl: the glock protecting the inode
+ *
+ */
+
+static void inode_go_sync(struct gfs2_glock *gl)
+{
+ struct gfs2_inode *ip = gl->gl_object;
+
+ if (ip && !S_ISREG(ip->i_inode.i_mode))
+ ip = NULL;
+
+ if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
+ gfs2_log_flush(gl->gl_sbd, gl);
+ if (ip)
+ filemap_fdatawrite(ip->i_inode.i_mapping);
+ gfs2_meta_sync(gl);
+ if (ip) {
+ struct address_space *mapping = ip->i_inode.i_mapping;
+ int error = filemap_fdatawait(mapping);
+ if (error == -ENOSPC)
+ set_bit(AS_ENOSPC, &mapping->flags);
+ else if (error)
+ set_bit(AS_EIO, &mapping->flags);
+ }
+ clear_bit(GLF_DIRTY, &gl->gl_flags);
+ gfs2_ail_empty_gl(gl);
+ }
+}
+
+/**
* inode_go_xmote_th - promote/demote a glock
* @gl: the glock
* @state: the requested state
@@ -149,12 +181,12 @@ static void meta_go_inval(struct gfs2_glock *gl, int flags)
*
*/
-static void inode_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
- int flags)
+static void inode_go_xmote_th(struct gfs2_glock *gl)
{
if (gl->gl_state != LM_ST_UNLOCKED)
gfs2_pte_inval(gl);
- gfs2_glock_xmote_th(gl, state, flags);
+ if (gl->gl_state == LM_ST_EXCLUSIVE)
+ inode_go_sync(gl);
}
/**
@@ -189,38 +221,8 @@ static void inode_go_xmote_bh(struct gfs2_glock *gl)
static void inode_go_drop_th(struct gfs2_glock *gl)
{
gfs2_pte_inval(gl);
- gfs2_glock_drop_th(gl);
-}
-
-/**
- * inode_go_sync - Sync the dirty data and/or metadata for an inode glock
- * @gl: the glock protecting the inode
- *
- */
-
-static void inode_go_sync(struct gfs2_glock *gl)
-{
- struct gfs2_inode *ip = gl->gl_object;
-
- if (ip && !S_ISREG(ip->i_inode.i_mode))
- ip = NULL;
-
- if (test_bit(GLF_DIRTY, &gl->gl_flags)) {
- gfs2_log_flush(gl->gl_sbd, gl);
- if (ip)
- filemap_fdatawrite(ip->i_inode.i_mapping);
- gfs2_meta_sync(gl);
- if (ip) {
- struct address_space *mapping = ip->i_inode.i_mapping;
- int error = filemap_fdatawait(mapping);
- if (error == -ENOSPC)
- set_bit(AS_ENOSPC, &mapping->flags);
- else if (error)
- set_bit(AS_EIO, &mapping->flags);
- }
- clear_bit(GLF_DIRTY, &gl->gl_flags);
- gfs2_ail_empty_gl(gl);
- }
+ if (gl->gl_state == LM_ST_EXCLUSIVE)
+ inode_go_sync(gl);
}
/**
@@ -295,7 +297,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
if ((ip->i_di.di_flags & GFS2_DIF_TRUNC_IN_PROG) &&
(gl->gl_state == LM_ST_EXCLUSIVE) &&
- (gh->gh_flags & GL_LOCAL_EXCL))
+ (gh->gh_state == LM_ST_EXCLUSIVE))
error = gfs2_truncatei_resume(ip);
return error;
@@ -319,39 +321,6 @@ static void inode_go_unlock(struct gfs2_holder *gh)
}
/**
- * inode_greedy -
- * @gl: the glock
- *
- */
-
-static void inode_greedy(struct gfs2_glock *gl)
-{
- struct gfs2_sbd *sdp = gl->gl_sbd;
- struct gfs2_inode *ip = gl->gl_object;
- unsigned int quantum = gfs2_tune_get(sdp, gt_greedy_quantum);
- unsigned int max = gfs2_tune_get(sdp, gt_greedy_max);
- unsigned int new_time;
-
- spin_lock(&ip->i_spin);
-
- if (time_after(ip->i_last_pfault + quantum, jiffies)) {
- new_time = ip->i_greedy + quantum;
- if (new_time > max)
- new_time = max;
- } else {
- new_time = ip->i_greedy - quantum;
- if (!new_time || new_time > max)
- new_time = 1;
- }
-
- ip->i_greedy = new_time;
-
- spin_unlock(&ip->i_spin);
-
- iput(&ip->i_inode);
-}
-
-/**
* rgrp_go_demote_ok - Check to see if it's ok to unlock a RG's glock
* @gl: the glock
*
@@ -398,8 +367,7 @@ static void rgrp_go_unlock(struct gfs2_holder *gh)
*
*/
-static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
- int flags)
+static void trans_go_xmote_th(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
@@ -408,8 +376,6 @@ static void trans_go_xmote_th(struct gfs2_glock *gl, unsigned int state,
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
-
- gfs2_glock_xmote_th(gl, state, flags);
}
/**
@@ -461,8 +427,6 @@ static void trans_go_drop_th(struct gfs2_glock *gl)
gfs2_meta_syncfs(sdp);
gfs2_log_shutdown(sdp);
}
-
- gfs2_glock_drop_th(gl);
}
/**
@@ -478,8 +442,8 @@ static int quota_go_demote_ok(struct gfs2_glock *gl)
}
const struct gfs2_glock_operations gfs2_meta_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
+ .go_xmote_th = meta_go_sync,
+ .go_drop_th = meta_go_sync,
.go_type = LM_TYPE_META,
};
@@ -487,19 +451,14 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_xmote_th = inode_go_xmote_th,
.go_xmote_bh = inode_go_xmote_bh,
.go_drop_th = inode_go_drop_th,
- .go_sync = inode_go_sync,
.go_inval = inode_go_inval,
.go_demote_ok = inode_go_demote_ok,
.go_lock = inode_go_lock,
.go_unlock = inode_go_unlock,
- .go_greedy = inode_greedy,
.go_type = LM_TYPE_INODE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
- .go_sync = meta_go_sync,
.go_inval = meta_go_inval,
.go_demote_ok = rgrp_go_demote_ok,
.go_lock = rgrp_go_lock,
@@ -515,33 +474,23 @@ const struct gfs2_glock_operations gfs2_trans_glops = {
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_IOPEN,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_FLOCK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_NONDISK,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_demote_ok = quota_go_demote_ok,
.go_type = LM_TYPE_QUOTA,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
- .go_xmote_th = gfs2_glock_xmote_th,
- .go_drop_th = gfs2_glock_drop_th,
.go_type = LM_TYPE_JOURNAL,
};
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 734421edae85..12c80fd28db5 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -101,17 +101,14 @@ struct gfs2_bufdata {
};
struct gfs2_glock_operations {
- void (*go_xmote_th) (struct gfs2_glock *gl, unsigned int state, int flags);
+ void (*go_xmote_th) (struct gfs2_glock *gl);
void (*go_xmote_bh) (struct gfs2_glock *gl);
void (*go_drop_th) (struct gfs2_glock *gl);
void (*go_drop_bh) (struct gfs2_glock *gl);
- void (*go_sync) (struct gfs2_glock *gl);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
void (*go_unlock) (struct gfs2_holder *gh);
- void (*go_callback) (struct gfs2_glock *gl, unsigned int state);
- void (*go_greedy) (struct gfs2_glock *gl);
const int go_type;
};
@@ -120,7 +117,6 @@ enum {
HIF_MUTEX = 0,
HIF_PROMOTE = 1,
HIF_DEMOTE = 2,
- HIF_GREEDY = 3,
/* States */
HIF_ALLOCED = 4,
@@ -128,6 +124,7 @@ enum {
HIF_HOLDER = 6,
HIF_FIRST = 7,
HIF_ABORTED = 9,
+ HIF_WAIT = 10,
};
struct gfs2_holder {
@@ -140,17 +137,14 @@ struct gfs2_holder {
int gh_error;
unsigned long gh_iflags;
- struct completion gh_wait;
unsigned long gh_ip;
};
enum {
GLF_LOCK = 1,
GLF_STICKY = 2,
- GLF_PREFETCH = 3,
GLF_DIRTY = 5,
GLF_SKIP_WAITERS2 = 6,
- GLF_GREEDY = 7,
};
struct gfs2_glock {
@@ -167,7 +161,7 @@ struct gfs2_glock {
unsigned long gl_ip;
struct list_head gl_holders;
struct list_head gl_waiters1; /* HIF_MUTEX */
- struct list_head gl_waiters2; /* HIF_DEMOTE, HIF_GREEDY */
+ struct list_head gl_waiters2; /* HIF_DEMOTE */
struct list_head gl_waiters3; /* HIF_PROMOTE */
const struct gfs2_glock_operations *gl_ops;
@@ -236,7 +230,6 @@ struct gfs2_inode {
spinlock_t i_spin;
struct rw_semaphore i_rw_mutex;
- unsigned int i_greedy;
unsigned long i_last_pfault;
struct buffer_head *i_cache[GFS2_MAX_META_HEIGHT];
@@ -418,17 +411,12 @@ struct gfs2_tune {
unsigned int gt_atime_quantum; /* Min secs between atime updates */
unsigned int gt_new_files_jdata;
unsigned int gt_new_files_directio;
- unsigned int gt_max_atomic_write; /* Split big writes into this size */
unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */
unsigned int gt_lockdump_size;
unsigned int gt_stall_secs; /* Detects trouble! */
unsigned int gt_complain_secs;
unsigned int gt_reclaim_limit; /* Max num of glocks in reclaim list */
unsigned int gt_entries_per_readdir;
- unsigned int gt_prefetch_secs; /* Usage window for prefetched glocks */
- unsigned int gt_greedy_default;
- unsigned int gt_greedy_quantum;
- unsigned int gt_greedy_max;
unsigned int gt_statfs_quantum;
unsigned int gt_statfs_slow;
};
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index d122074c45e1..0d6831a40565 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -287,10 +287,8 @@ out:
*
* Returns: errno
*/
-
int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
{
- struct gfs2_sbd *sdp = ip->i_inode.i_sb->s_fs_info;
struct buffer_head *dibh;
u32 nlink;
int error;
@@ -315,42 +313,34 @@ int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
else
drop_nlink(&ip->i_inode);
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
mark_inode_dirty(&ip->i_inode);
- if (ip->i_inode.i_nlink == 0) {
- struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh, rg_gh;
-
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto out;
- error = -EIO;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
- if (!rgd)
- goto out_norgrp;
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
- if (error)
- goto out_norgrp;
-
+ if (ip->i_inode.i_nlink == 0)
gfs2_unlink_di(&ip->i_inode); /* mark inode unlinked */
- gfs2_glock_dq_uninit(&rg_gh);
-out_norgrp:
- gfs2_glock_dq_uninit(&ri_gh);
- }
-out:
+
return error;
}
struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
{
struct qstr qstr;
+ struct inode *inode;
gfs2_str2qstr(&qstr, name);
- return gfs2_lookupi(dip, &qstr, 1, NULL);
+ inode = gfs2_lookupi(dip, &qstr, 1, NULL);
+ /* gfs2_lookupi has inconsistent callers: vfs
+ * related routines expect NULL for no entry found,
+ * gfs2_lookup_simple callers expect ENOENT
+ * and do not check for NULL.
+ */
+ if (inode == NULL)
+ return ERR_PTR(-ENOENT);
+ else
+ return inode;
}
@@ -361,8 +351,10 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
* @is_root: If 1, ignore the caller's permissions
* @i_gh: An uninitialized holder for the new inode glock
*
- * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
- * @is_root is true.
+ * This can be called via the VFS filldir function when NFS is doing
+ * a readdirplus and the inode which its intending to stat isn't
+ * already in cache. In this case we must not take the directory glock
+ * again, since the readdir call will have already taken that lock.
*
* Returns: errno
*/
@@ -375,8 +367,9 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
struct gfs2_holder d_gh;
struct gfs2_inum_host inum;
unsigned int type;
- int error = 0;
+ int error;
struct inode *inode = NULL;
+ int unlock = 0;
if (!name->len || name->len > GFS2_FNAMESIZE)
return ERR_PTR(-ENAMETOOLONG);
@@ -388,9 +381,12 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
return dir;
}
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
- if (error)
- return ERR_PTR(error);
+ if (gfs2_glock_is_locked_by_me(dip->i_gl) == 0) {
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ if (error)
+ return ERR_PTR(error);
+ unlock = 1;
+ }
if (!is_root) {
error = permission(dir, MAY_EXEC, NULL);
@@ -405,10 +401,11 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
inode = gfs2_inode_lookup(sb, &inum, type);
out:
- gfs2_glock_dq_uninit(&d_gh);
+ if (unlock)
+ gfs2_glock_dq_uninit(&d_gh);
if (error == -ENOENT)
return NULL;
- return inode;
+ return inode ? inode : ERR_PTR(error);
}
static int pick_formal_ino_1(struct gfs2_sbd *sdp, u64 *formal_ino)
diff --git a/fs/gfs2/lm.c b/fs/gfs2/lm.c
index effe4a337c1d..cfcc39b86a53 100644
--- a/fs/gfs2/lm.c
+++ b/fs/gfs2/lm.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -104,15 +103,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...)
vprintk(fmt, args);
va_end(args);
- fs_err(sdp, "about to withdraw from the cluster\n");
+ fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
-
- fs_err(sdp, "waiting for outstanding I/O\n");
-
- /* FIXME: suspend dm device so oustanding bio's complete
- and all further io requests fail */
-
fs_err(sdp, "telling LM to withdraw\n");
gfs2_withdraw_lockproto(&sdp->sd_lockstruct);
fs_err(sdp, "withdrawn\n");
diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h
index 33af707a4d3f..a87c7bf3c568 100644
--- a/fs/gfs2/locking/dlm/lock_dlm.h
+++ b/fs/gfs2/locking/dlm/lock_dlm.h
@@ -36,7 +36,7 @@
#define GDLM_STRNAME_BYTES 24
#define GDLM_LVB_SIZE 32
-#define GDLM_DROP_COUNT 50000
+#define GDLM_DROP_COUNT 200000
#define GDLM_DROP_PERIOD 60
#define GDLM_NAME_LEN 128
diff --git a/fs/gfs2/locking/dlm/main.c b/fs/gfs2/locking/dlm/main.c
index 2194b1d5b5ec..a0e7eda643ed 100644
--- a/fs/gfs2/locking/dlm/main.c
+++ b/fs/gfs2/locking/dlm/main.c
@@ -11,9 +11,6 @@
#include "lock_dlm.h"
-extern int gdlm_drop_count;
-extern int gdlm_drop_period;
-
extern struct lm_lockops gdlm_ops;
static int __init init_lock_dlm(void)
@@ -40,9 +37,6 @@ static int __init init_lock_dlm(void)
return error;
}
- gdlm_drop_count = GDLM_DROP_COUNT;
- gdlm_drop_period = GDLM_DROP_PERIOD;
-
printk(KERN_INFO
"Lock_DLM (built %s %s) installed\n", __DATE__, __TIME__);
return 0;
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c
index cdd1694e889b..1d8faa3da8af 100644
--- a/fs/gfs2/locking/dlm/mount.c
+++ b/fs/gfs2/locking/dlm/mount.c
@@ -9,8 +9,6 @@
#include "lock_dlm.h"
-int gdlm_drop_count;
-int gdlm_drop_period;
const struct lm_lockops gdlm_ops;
@@ -24,8 +22,8 @@ static struct gdlm_ls *init_gdlm(lm_callback_t cb, struct gfs2_sbd *sdp,
if (!ls)
return NULL;
- ls->drop_locks_count = gdlm_drop_count;
- ls->drop_locks_period = gdlm_drop_period;
+ ls->drop_locks_count = GDLM_DROP_COUNT;
+ ls->drop_locks_period = GDLM_DROP_PERIOD;
ls->fscb = cb;
ls->sdp = sdp;
ls->fsflags = flags;
diff --git a/fs/gfs2/locking/dlm/plock.c b/fs/gfs2/locking/dlm/plock.c
index 3799f19b282f..1dd4215b83d0 100644
--- a/fs/gfs2/locking/dlm/plock.c
+++ b/fs/gfs2/locking/dlm/plock.c
@@ -264,7 +264,7 @@ static unsigned int dev_poll(struct file *file, poll_table *wait)
return 0;
}
-static struct file_operations dev_fops = {
+static const struct file_operations dev_fops = {
.read = dev_read,
.write = dev_write,
.poll = dev_poll,
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 29ae06f94944..4746b884662d 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -116,6 +116,17 @@ static ssize_t recover_status_show(struct gdlm_ls *ls, char *buf)
return sprintf(buf, "%d\n", ls->recover_jid_status);
}
+static ssize_t drop_count_show(struct gdlm_ls *ls, char *buf)
+{
+ return sprintf(buf, "%d\n", ls->drop_locks_count);
+}
+
+static ssize_t drop_count_store(struct gdlm_ls *ls, const char *buf, size_t len)
+{
+ ls->drop_locks_count = simple_strtol(buf, NULL, 0);
+ return len;
+}
+
struct gdlm_attr {
struct attribute attr;
ssize_t (*show)(struct gdlm_ls *, char *);
@@ -135,6 +146,7 @@ GDLM_ATTR(first_done, 0444, first_done_show, NULL);
GDLM_ATTR(recover, 0644, recover_show, recover_store);
GDLM_ATTR(recover_done, 0444, recover_done_show, NULL);
GDLM_ATTR(recover_status, 0444, recover_status_show, NULL);
+GDLM_ATTR(drop_count, 0644, drop_count_show, drop_count_store);
static struct attribute *gdlm_attrs[] = {
&gdlm_attr_proto_name.attr,
@@ -147,6 +159,7 @@ static struct attribute *gdlm_attrs[] = {
&gdlm_attr_recover.attr,
&gdlm_attr_recover_done.attr,
&gdlm_attr_recover_status.attr,
+ &gdlm_attr_drop_count.attr,
NULL,
};
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 4d7f94d8c7bd..16bb4b4561ae 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -69,13 +69,16 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
struct gfs2_trans *tr;
- if (!list_empty(&bd->bd_list_tr))
+ gfs2_log_lock(sdp);
+ if (!list_empty(&bd->bd_list_tr)) {
+ gfs2_log_unlock(sdp);
return;
-
+ }
tr = current->journal_info;
tr->tr_touched = 1;
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_log_unlock(sdp);
if (!list_empty(&le->le_list))
return;
@@ -84,7 +87,6 @@ static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
gfs2_meta_check(sdp, bd->bd_bh);
gfs2_pin(sdp, bd->bd_bh);
-
gfs2_log_lock(sdp);
sdp->sd_log_num_buf++;
list_add(&le->le_list, &sdp->sd_log_le_buf);
@@ -98,11 +100,13 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
struct list_head *head = &tr->tr_list_buf;
struct gfs2_bufdata *bd;
+ gfs2_log_lock(sdp);
while (!list_empty(head)) {
bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
list_del_init(&bd->bd_list_tr);
tr->tr_num_buf--;
}
+ gfs2_log_unlock(sdp);
gfs2_assert_warn(sdp, !tr->tr_num_buf);
}
@@ -462,13 +466,17 @@ static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
struct address_space *mapping = bd->bd_bh->b_page->mapping;
struct gfs2_inode *ip = GFS2_I(mapping->host);
+ gfs2_log_lock(sdp);
tr->tr_touched = 1;
if (list_empty(&bd->bd_list_tr) &&
(ip->i_di.di_flags & GFS2_DIF_JDATA)) {
tr->tr_num_buf++;
list_add(&bd->bd_list_tr, &tr->tr_list_buf);
+ gfs2_log_unlock(sdp);
gfs2_pin(sdp, bd->bd_bh);
tr->tr_num_buf_new++;
+ } else {
+ gfs2_log_unlock(sdp);
}
gfs2_trans_add_gl(bd->bd_gl);
gfs2_log_lock(sdp);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index 7c1a9e22a526..6e8a59809abf 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0e34d9918973..e62d4f620c58 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -282,8 +282,7 @@ void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh,
return;
}
- bd = kmem_cache_alloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
- memset(bd, 0, sizeof(struct gfs2_bufdata));
+ bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL),
bd->bd_bh = bh;
bd->bd_gl = gl;
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c
index ef3092e29607..32caecd20300 100644
--- a/fs/gfs2/mount.c
+++ b/fs/gfs2/mount.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/ondisk.c b/fs/gfs2/ondisk.c
index f2495f1e21ad..d9ecfd23a49e 100644
--- a/fs/gfs2/ondisk.c
+++ b/fs/gfs2/ondisk.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10d..56e33590b656 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/fs.h>
+#include <linux/writeback.h>
#include <linux/gfs2_ondisk.h>
#include <linux/lm_interface.h>
@@ -157,6 +158,32 @@ out_ignore:
}
/**
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: Write-back control
+ *
+ * For journaled files and/or ordered writes this just falls back to the
+ * kernel's default writepages path for now. We will probably want to change
+ * that eventually (i.e. when we look at allocate on flush).
+ *
+ * For the data=writeback case though we can already ignore buffer heads
+ * and write whole extents at once. This is a big reduction in the
+ * number of I/O requests we send and the bmap calls we make in this case.
+ */
+static int gfs2_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct inode *inode = mapping->host;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
+
+ if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
+ return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+
+ return generic_writepages(mapping, wbc);
+}
+
+/**
* stuffed_readpage - Fill in a Linux page with stuffed file data
* @ip: the inode
* @page: the page
@@ -256,7 +283,7 @@ out_unlock:
* the page lock and the glock) and return having done no I/O. Its
* obviously not something we'd want to do on too regular a basis.
* Any I/O we ignore at this time will be done via readpage later.
- * 2. We have to handle stuffed files here too.
+ * 2. We don't handle stuffed files here we let readpage do the honours.
* 3. mpage_readpages() does most of the heavy lifting in the common case.
* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_holder gh;
- unsigned page_idx;
- int ret;
+ int ret = 0;
int do_unlock = 0;
if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
goto out_unlock;
}
skip_lock:
- if (gfs2_is_stuffed(ip)) {
- struct pagevec lru_pvec;
- pagevec_init(&lru_pvec, 0);
- for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
- prefetchw(&page->flags);
- list_del(&page->lru);
- if (!add_to_page_cache(page, mapping,
- page->index, GFP_KERNEL)) {
- ret = stuffed_readpage(ip, page);
- unlock_page(page);
- if (!pagevec_add(&lru_pvec, page))
- __pagevec_lru_add(&lru_pvec);
- } else {
- page_cache_release(page);
- }
- }
- pagevec_lru_add(&lru_pvec);
- ret = 0;
- } else {
- /* What we really want to do .... */
+ if (!gfs2_is_stuffed(ip))
ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
- }
if (do_unlock) {
gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
error = gfs2_glock_nq_atime(&ip->i_gh);
if (unlikely(error)) {
- if (error == GLR_TRYFAILED)
+ if (error == GLR_TRYFAILED) {
+ unlock_page(page);
error = AOP_TRUNCATED_PAGE;
+ }
goto out_uninit;
}
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
return;
}
+/**
+ * gfs2_ok_for_dio - check that dio is valid on this file
+ * @ip: The inode
+ * @rw: READ or WRITE
+ * @offset: The offset at which we are reading or writing
+ *
+ * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
+ * 1 (to accept the i/o request)
+ */
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+{
+ /*
+ * Should we return an error here? I can't see that O_DIRECT for
+ * a journaled file makes any sense. For now we'll silently fall
+ * back to buffered I/O, likewise we do the same for stuffed
+ * files since they are (a) small and (b) unaligned.
+ */
+ if (gfs2_is_jdata(ip))
+ return 0;
+
+ if (gfs2_is_stuffed(ip))
+ return 0;
+
+ if (offset > i_size_read(&ip->i_inode))
+ return 0;
+ return 1;
+}
+
+
+
static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
struct gfs2_holder gh;
int rv;
- if (rw == READ)
- mutex_lock(&inode->i_mutex);
/*
- * Shared lock, even if its a write, since we do no allocation
- * on this path. All we need change is atime.
+ * Deferred lock, even if its a write, since we do no allocation
+ * on this path. All we need change is atime, and this lock mode
+ * ensures that other nodes have flushed their buffered read caches
+ * (i.e. their page cache entries for this inode). We do not,
+ * unfortunately have the option of only flushing a range like
+ * the VFS does.
*/
- gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+ gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
rv = gfs2_glock_nq_atime(&gh);
if (rv)
- goto out;
-
- if (offset > i_size_read(inode))
- goto out;
-
- /*
- * Should we return an error here? I can't see that O_DIRECT for
- * a journaled file makes any sense. For now we'll silently fall
- * back to buffered I/O, likewise we do the same for stuffed
- * files since they are (a) small and (b) unaligned.
- */
- if (gfs2_is_jdata(ip))
- goto out;
-
- if (gfs2_is_stuffed(ip))
- goto out;
-
- rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
- inode->i_sb->s_bdev,
- iov, offset, nr_segs,
- gfs2_get_block_direct, NULL);
+ return rv;
+ rv = gfs2_ok_for_dio(ip, rw, offset);
+ if (rv != 1)
+ goto out; /* dio not valid, fall back to buffered i/o */
+
+ rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
+ iov, offset, nr_segs,
+ gfs2_get_block_direct, NULL);
out:
gfs2_glock_dq_m(1, &gh);
gfs2_holder_uninit(&gh);
- if (rw == READ)
- mutex_unlock(&inode->i_mutex);
-
return rv;
}
@@ -763,6 +786,7 @@ out:
const struct address_space_operations gfs2_file_aops = {
.writepage = gfs2_writepage,
+ .writepages = gfs2_writepages,
.readpage = gfs2_readpage,
.readpages = gfs2_readpages,
.sync_page = block_sync_page,
diff --git a/fs/gfs2/ops_dentry.c b/fs/gfs2/ops_dentry.c
index d355899585d8..c6bac6b69420 100644
--- a/fs/gfs2/ops_dentry.c
+++ b/fs/gfs2/ops_dentry.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -46,6 +45,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
struct gfs2_inum_host inum;
unsigned int type;
int error;
+ int had_lock=0;
if (inode && is_bad_inode(inode))
goto invalid;
@@ -53,9 +53,12 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
if (sdp->sd_args.ar_localcaching)
goto valid;
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
- if (error)
- goto fail;
+ had_lock = gfs2_glock_is_locked_by_me(dip->i_gl);
+ if (!had_lock) {
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
+ if (error)
+ goto fail;
+ }
error = gfs2_dir_search(parent->d_inode, &dentry->d_name, &inum, &type);
switch (error) {
@@ -82,13 +85,15 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd)
}
valid_gunlock:
- gfs2_glock_dq_uninit(&d_gh);
+ if (!had_lock)
+ gfs2_glock_dq_uninit(&d_gh);
valid:
dput(parent);
return 1;
invalid_gunlock:
- gfs2_glock_dq_uninit(&d_gh);
+ if (!had_lock)
+ gfs2_glock_dq_uninit(&d_gh);
invalid:
if (inode && S_ISDIR(inode->i_mode)) {
if (have_submounts(dentry))
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index b4e7b8775315..1de05b63d43a 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -22,6 +21,7 @@
#include "glock.h"
#include "glops.h"
#include "inode.h"
+#include "ops_dentry.h"
#include "ops_export.h"
#include "rgrp.h"
#include "util.h"
@@ -112,13 +112,12 @@ struct get_name_filldir {
char *name;
};
-static int get_name_filldir(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum_host *inum,
- unsigned int type)
+static int get_name_filldir(void *opaque, const char *name, int length,
+ loff_t offset, u64 inum, unsigned int type)
{
- struct get_name_filldir *gnfd = (struct get_name_filldir *)opaque;
+ struct get_name_filldir *gnfd = opaque;
- if (!gfs2_inum_equal(inum, &gnfd->inum))
+ if (inum != gnfd->inum.no_addr)
return 0;
memcpy(gnfd->name, name, length);
@@ -189,6 +188,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
return ERR_PTR(-ENOMEM);
}
+ dentry->d_op = &gfs2_dops;
return dentry;
}
@@ -215,8 +215,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, void *inum_obj)
}
error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
- LM_ST_SHARED, LM_FLAG_ANY | GL_LOCAL_EXCL,
- &i_gh);
+ LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
return ERR_PTR(error);
@@ -269,6 +268,7 @@ out_inode:
return ERR_PTR(-ENOMEM);
}
+ dentry->d_op = &gfs2_dops;
return dentry;
fail_rgd:
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index faa07e4b97d0..b50180e22779 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -43,15 +42,6 @@
#include "util.h"
#include "eaops.h"
-/* For regular, non-NFS */
-struct filldir_reg {
- struct gfs2_sbd *fdr_sbd;
- int fdr_prefetch;
-
- filldir_t fdr_filldir;
- void *fdr_opaque;
-};
-
/*
* Most fields left uninitialised to catch anybody who tries to
* use them. f_flags set to prevent file_accessed() from touching
@@ -128,41 +118,6 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
}
/**
- * filldir_func - Report a directory entry to the caller of gfs2_dir_read()
- * @opaque: opaque data used by the function
- * @name: the name of the directory entry
- * @length: the length of the name
- * @offset: the entry's offset in the directory
- * @inum: the inode number the entry points to
- * @type: the type of inode the entry points to
- *
- * Returns: 0 on success, 1 if buffer full
- */
-
-static int filldir_func(void *opaque, const char *name, unsigned int length,
- u64 offset, struct gfs2_inum_host *inum,
- unsigned int type)
-{
- struct filldir_reg *fdr = (struct filldir_reg *)opaque;
- struct gfs2_sbd *sdp = fdr->fdr_sbd;
- int error;
-
- error = fdr->fdr_filldir(fdr->fdr_opaque, name, length, offset,
- inum->no_addr, type);
- if (error)
- return 1;
-
- if (fdr->fdr_prefetch && !(length == 1 && *name == '.')) {
- gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_inode_glops,
- LM_ST_SHARED, LM_FLAG_TRY | LM_FLAG_ANY);
- gfs2_glock_prefetch_num(sdp, inum->no_addr, &gfs2_iopen_glops,
- LM_ST_SHARED, LM_FLAG_TRY);
- }
-
- return 0;
-}
-
-/**
* gfs2_readdir - Read directory entries from a directory
* @file: The directory to read from
* @dirent: Buffer for dirents
@@ -175,16 +130,10 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
{
struct inode *dir = file->f_mapping->host;
struct gfs2_inode *dip = GFS2_I(dir);
- struct filldir_reg fdr;
struct gfs2_holder d_gh;
u64 offset = file->f_pos;
int error;
- fdr.fdr_sbd = GFS2_SB(dir);
- fdr.fdr_prefetch = 1;
- fdr.fdr_filldir = filldir;
- fdr.fdr_opaque = dirent;
-
gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh);
error = gfs2_glock_nq_atime(&d_gh);
if (error) {
@@ -192,7 +141,7 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir)
return error;
}
- error = gfs2_dir_read(dir, &offset, &fdr, filldir_func);
+ error = gfs2_dir_read(dir, &offset, dirent, filldir);
gfs2_glock_dq_uninit(&d_gh);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 636dda4c7d38..d85f6e05cb95 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -264,13 +263,23 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
+
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
- gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
+ gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -291,10 +300,12 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
out_end_trans:
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -449,13 +460,22 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
struct gfs2_inode *dip = GFS2_I(dir);
struct gfs2_sbd *sdp = GFS2_SB(dir);
struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
- struct gfs2_holder ghs[2];
+ struct gfs2_holder ghs[3];
+ struct gfs2_rgrpd *rgd;
+ struct gfs2_holder ri_gh;
int error;
+
+ error = gfs2_rindex_hold(sdp, &ri_gh);
+ if (error)
+ return error;
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
- error = gfs2_glock_nq_m(2, ghs);
+ rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
+ gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+
+ error = gfs2_glock_nq_m(3, ghs);
if (error)
goto out;
@@ -483,10 +503,12 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry)
gfs2_trans_end(sdp);
out_gunlock:
- gfs2_glock_dq_m(2, ghs);
+ gfs2_glock_dq_m(3, ghs);
out:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
+ gfs2_holder_uninit(ghs + 2);
+ gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -547,7 +569,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[4], r_gh;
+ struct gfs2_holder ghs[5], r_gh;
+ struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
int alloc_required;
@@ -587,6 +610,13 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (nip) {
gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh);
num_gh++;
+ /* grab the resource lock for unlink flag twiddling
+ * this is the case of the target file already existing
+ * so we unlink before doing the rename
+ */
+ nrgd = gfs2_blk2rgrpd(sdp, nip->i_num.no_addr);
+ if (nrgd)
+ gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++);
}
error = gfs2_glock_nq_m(num_gh, ghs);
@@ -684,12 +714,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
al->al_rgd->rd_ri.ri_length +
4 * RES_DINODE + 4 * RES_LEAF +
- RES_STATFS + RES_QUOTA, 0);
+ RES_STATFS + RES_QUOTA + 4, 0);
if (error)
goto out_ipreserv;
} else {
error = gfs2_trans_begin(sdp, 4 * RES_DINODE +
- 5 * RES_LEAF, 0);
+ 5 * RES_LEAF + 4, 0);
if (error)
goto out_gunlock;
}
@@ -728,7 +758,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error)
goto out_end_trans;
- ip->i_inode.i_ctime.tv_sec = get_seconds();
+ ip->i_inode.i_ctime = CURRENT_TIME_SEC;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1018,7 +1048,7 @@ static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
}
generic_fillattr(inode, stat);
- if (unlock);
+ if (unlock)
gfs2_glock_dq_uninit(&gh);
return 0;
@@ -1084,7 +1114,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
return gfs2_ea_remove(GFS2_I(dentry->d_inode), &er);
}
-struct inode_operations gfs2_file_iops = {
+const struct inode_operations gfs2_file_iops = {
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
@@ -1094,7 +1124,7 @@ struct inode_operations gfs2_file_iops = {
.removexattr = gfs2_removexattr,
};
-struct inode_operations gfs2_dev_iops = {
+const struct inode_operations gfs2_dev_iops = {
.permission = gfs2_permission,
.setattr = gfs2_setattr,
.getattr = gfs2_getattr,
@@ -1104,7 +1134,7 @@ struct inode_operations gfs2_dev_iops = {
.removexattr = gfs2_removexattr,
};
-struct inode_operations gfs2_dir_iops = {
+const struct inode_operations gfs2_dir_iops = {
.create = gfs2_create,
.lookup = gfs2_lookup,
.link = gfs2_link,
@@ -1123,7 +1153,7 @@ struct inode_operations gfs2_dir_iops = {
.removexattr = gfs2_removexattr,
};
-struct inode_operations gfs2_symlink_iops = {
+const struct inode_operations gfs2_symlink_iops = {
.readlink = gfs2_readlink,
.follow_link = gfs2_follow_link,
.permission = gfs2_permission,
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index b15acb4fd34c..34f0caac1a03 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -12,9 +12,9 @@
#include <linux/fs.h>
-extern struct inode_operations gfs2_file_iops;
-extern struct inode_operations gfs2_dir_iops;
-extern struct inode_operations gfs2_symlink_iops;
-extern struct inode_operations gfs2_dev_iops;
+extern const struct inode_operations gfs2_file_iops;
+extern const struct inode_operations gfs2_dir_iops;
+extern const struct inode_operations gfs2_symlink_iops;
+extern const struct inode_operations gfs2_dev_iops;
#endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c
index 7685b46f934b..b89999d3a767 100644
--- a/fs/gfs2/ops_super.c
+++ b/fs/gfs2/ops_super.c
@@ -173,6 +173,9 @@ static void gfs2_write_super_lockfs(struct super_block *sb)
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
+ if (test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
+ return;
+
for (;;) {
error = gfs2_freeze_fs(sdp);
if (!error)
@@ -426,6 +429,12 @@ static void gfs2_delete_inode(struct inode *inode)
}
error = gfs2_dinode_dealloc(ip);
+ /*
+ * Must do this before unlock to avoid trying to write back
+ * potentially dirty data now that inode no longer exists
+ * on disk.
+ */
+ truncate_inode_pages(&inode->i_data, 0);
out_unlock:
gfs2_glock_dq(&ip->i_iopen_gh);
@@ -443,14 +452,12 @@ out:
static struct inode *gfs2_alloc_inode(struct super_block *sb)
{
- struct gfs2_sbd *sdp = sb->s_fs_info;
struct gfs2_inode *ip;
ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
if (ip) {
ip->i_flags = 0;
ip->i_gl = NULL;
- ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
ip->i_last_pfault = jiffies;
}
return &ip->i_inode;
@@ -461,7 +468,7 @@ static void gfs2_destroy_inode(struct inode *inode)
kmem_cache_free(gfs2_inode_cachep, inode);
}
-struct super_operations gfs2_super_ops = {
+const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.destroy_inode = gfs2_destroy_inode,
.write_inode = gfs2_write_inode,
diff --git a/fs/gfs2/ops_super.h b/fs/gfs2/ops_super.h
index 9de73f042f78..442a274c6272 100644
--- a/fs/gfs2/ops_super.h
+++ b/fs/gfs2/ops_super.h
@@ -12,6 +12,6 @@
#include <linux/fs.h>
-extern struct super_operations gfs2_super_ops;
+extern const struct super_operations gfs2_super_ops;
#endif /* __OPS_SUPER_DOT_H__ */
diff --git a/fs/gfs2/ops_vm.c b/fs/gfs2/ops_vm.c
index 45a5f11fc39a..aa0dbd2aac1b 100644
--- a/fs/gfs2/ops_vm.c
+++ b/fs/gfs2/ops_vm.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -28,34 +27,13 @@
#include "trans.h"
#include "util.h"
-static void pfault_be_greedy(struct gfs2_inode *ip)
-{
- unsigned int time;
-
- spin_lock(&ip->i_spin);
- time = ip->i_greedy;
- ip->i_last_pfault = jiffies;
- spin_unlock(&ip->i_spin);
-
- igrab(&ip->i_inode);
- if (gfs2_glock_be_greedy(ip->i_gl, time))
- iput(&ip->i_inode);
-}
-
static struct page *gfs2_private_nopage(struct vm_area_struct *area,
unsigned long address, int *type)
{
struct gfs2_inode *ip = GFS2_I(area->vm_file->f_mapping->host);
- struct page *result;
set_bit(GIF_PAGED, &ip->i_flags);
-
- result = filemap_nopage(area, address, type);
-
- if (result && result != NOPAGE_OOM)
- pfault_be_greedy(ip);
-
- return result;
+ return filemap_nopage(area, address, type);
}
static int alloc_page_backing(struct gfs2_inode *ip, struct page *page)
@@ -167,7 +145,6 @@ static struct page *gfs2_sharewrite_nopage(struct vm_area_struct *area,
set_page_dirty(result);
}
- pfault_be_greedy(ip);
out:
gfs2_glock_dq_uninit(&i_gh);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index d0c806b85c86..8bc182c7e2ef 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ff0846528d54..8d9c08b5c4b6 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 43a24f2e5905..70f424fcf1cd 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -71,17 +71,12 @@ void gfs2_tune_init(struct gfs2_tune *gt)
gt->gt_atime_quantum = 3600;
gt->gt_new_files_jdata = 0;
gt->gt_new_files_directio = 0;
- gt->gt_max_atomic_write = 4 << 20;
gt->gt_max_readahead = 1 << 18;
gt->gt_lockdump_size = 131072;
gt->gt_stall_secs = 600;
gt->gt_complain_secs = 10;
gt->gt_reclaim_limit = 5000;
gt->gt_entries_per_readdir = 32;
- gt->gt_prefetch_secs = 10;
- gt->gt_greedy_default = HZ / 10;
- gt->gt_greedy_quantum = HZ / 40;
- gt->gt_greedy_max = HZ / 4;
gt->gt_statfs_quantum = 30;
gt->gt_statfs_slow = 0;
}
@@ -359,8 +354,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
- error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL, ji_gh);
+ error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
@@ -529,8 +523,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
struct gfs2_log_header_host head;
int error;
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL, &t_gh);
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &t_gh);
if (error)
return error;
@@ -583,9 +576,8 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
gfs2_quota_sync(sdp);
gfs2_statfs_sync(sdp);
- error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED,
- GL_LOCAL_EXCL | GL_NOCACHE,
- &t_gh);
+ error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE,
+ &t_gh);
if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags))
return error;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 983eaf1e06be..d01f9f0fda26 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -436,17 +436,12 @@ TUNE_ATTR(atime_quantum, 0);
TUNE_ATTR(max_readahead, 0);
TUNE_ATTR(complain_secs, 0);
TUNE_ATTR(reclaim_limit, 0);
-TUNE_ATTR(prefetch_secs, 0);
TUNE_ATTR(statfs_slow, 0);
TUNE_ATTR(new_files_jdata, 0);
TUNE_ATTR(new_files_directio, 0);
TUNE_ATTR(quota_simul_sync, 1);
TUNE_ATTR(quota_cache_secs, 1);
-TUNE_ATTR(max_atomic_write, 1);
TUNE_ATTR(stall_secs, 1);
-TUNE_ATTR(greedy_default, 1);
-TUNE_ATTR(greedy_quantum, 1);
-TUNE_ATTR(greedy_max, 1);
TUNE_ATTR(statfs_quantum, 1);
TUNE_ATTR_DAEMON(scand_secs, scand_process);
TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process);
@@ -465,15 +460,10 @@ static struct attribute *tune_attrs[] = {
&tune_attr_max_readahead.attr,
&tune_attr_complain_secs.attr,
&tune_attr_reclaim_limit.attr,
- &tune_attr_prefetch_secs.attr,
&tune_attr_statfs_slow.attr,
&tune_attr_quota_simul_sync.attr,
&tune_attr_quota_cache_secs.attr,
- &tune_attr_max_atomic_write.attr,
&tune_attr_stall_secs.attr,
- &tune_attr_greedy_default.attr,
- &tune_attr_greedy_quantum.attr,
- &tune_attr_greedy_max.attr,
&tune_attr_statfs_quantum.attr,
&tune_attr_scand_secs.attr,
&tune_attr_recoverd_secs.attr,
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index e5707a9f78c2..601eaa1b9ed6 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -7,7 +7,6 @@
* of the GNU General Public License version 2.
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index e2e0358da335..7c69b98a2e45 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -320,7 +320,7 @@ const struct file_operations hfs_dir_operations = {
.release = hfs_dir_release,
};
-struct inode_operations hfs_dir_inode_operations = {
+const struct inode_operations hfs_dir_inode_operations = {
.create = hfs_create,
.lookup = hfs_lookup,
.unlink = hfs_unlink,
diff --git a/fs/hfs/hfs.h b/fs/hfs/hfs.h
index 88099ab1a180..1445e3a56ed4 100644
--- a/fs/hfs/hfs.h
+++ b/fs/hfs/hfs.h
@@ -83,8 +83,6 @@
/*======== HFS structures as they appear on the disk ========*/
-#define __packed __attribute__ ((packed))
-
/* Pascal-style string of up to 31 characters */
struct hfs_name {
u8 len;
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 735332dfd1b8..147374b6f675 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -170,7 +170,7 @@ extern void hfs_cat_build_key(struct super_block *, btree_key *, u32, struct qst
/* dir.c */
extern const struct file_operations hfs_dir_operations;
-extern struct inode_operations hfs_dir_inode_operations;
+extern const struct inode_operations hfs_dir_inode_operations;
/* extent.c */
extern int hfs_ext_keycmp(const btree_key *, const btree_key *);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 5cb7f8fee8d6..fafcba593871 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -18,7 +18,7 @@
#include "btree.h"
static const struct file_operations hfs_file_operations;
-static struct inode_operations hfs_file_inode_operations;
+static const struct inode_operations hfs_file_inode_operations;
/*================ Variable-like macros ================*/
@@ -612,7 +612,7 @@ static const struct file_operations hfs_file_operations = {
.release = hfs_file_release,
};
-static struct inode_operations hfs_file_inode_operations = {
+static const struct inode_operations hfs_file_inode_operations = {
.lookup = hfs_file_lookup,
.truncate = hfs_file_truncate,
.setattr = hfs_inode_setattr,
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index a36987966004..623f509f1d47 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -154,7 +154,7 @@ static void hfs_destroy_inode(struct inode *inode)
kmem_cache_free(hfs_inode_cachep, HFS_I(inode));
}
-static struct super_operations hfs_super_operations = {
+static const struct super_operations hfs_super_operations = {
.alloc_inode = hfs_alloc_inode,
.destroy_inode = hfs_destroy_inode,
.write_inode = hfs_write_inode,
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index f2d7c49ce759..ba117c445e78 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -8,7 +8,6 @@
* Handling of catalog records
*/
-#include <linux/sched.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index e886ac8460d3..80b5682a2273 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -10,7 +10,6 @@
#include <linux/errno.h>
#include <linux/fs.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/random.h>
@@ -471,7 +470,7 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
return res;
}
-struct inode_operations hfsplus_dir_inode_operations = {
+const struct inode_operations hfsplus_dir_inode_operations = {
.lookup = hfsplus_lookup,
.create = hfsplus_create,
.link = hfsplus_link,
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 49205531a500..fe99fe8db61a 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -15,8 +15,6 @@
#include <linux/types.h>
-#define __packed __attribute__ ((packed))
-
/* Some constants */
#define HFSPLUS_SECTOR_SIZE 512
#define HFSPLUS_SECTOR_SHIFT 9
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 75e8c4d8aac3..642012ac3370 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -268,10 +268,10 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
return 0;
}
-extern struct inode_operations hfsplus_dir_inode_operations;
+extern const struct inode_operations hfsplus_dir_inode_operations;
extern struct file_operations hfsplus_dir_operations;
-static struct inode_operations hfsplus_file_inode_operations = {
+static const struct inode_operations hfsplus_file_inode_operations = {
.lookup = hfsplus_file_lookup,
.truncate = hfsplus_file_truncate,
.permission = hfsplus_permission,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 0f513c6bf843..1a97f9293447 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -11,7 +11,6 @@
#include <linux/init.h>
#include <linux/pagemap.h>
#include <linux/fs.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/vfs.h>
#include <linux/nls.h>
@@ -260,7 +259,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations hfsplus_sops = {
+static const struct super_operations hfsplus_sops = {
.alloc_inode = hfsplus_alloc_inode,
.destroy_inode = hfsplus_destroy_inode,
.read_inode = hfsplus_read_inode,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 69a376f35a68..e965eb11d76f 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -52,8 +52,8 @@ static int append = 0;
#define HOSTFS_SUPER_MAGIC 0x00c0ffee
-static struct inode_operations hostfs_iops;
-static struct inode_operations hostfs_dir_iops;
+static const struct inode_operations hostfs_iops;
+static const struct inode_operations hostfs_dir_iops;
static const struct address_space_operations hostfs_link_aops;
#ifndef MODULE
@@ -309,7 +309,7 @@ static void hostfs_read_inode(struct inode *inode)
read_inode(inode);
}
-static struct super_operations hostfs_sbops = {
+static const struct super_operations hostfs_sbops = {
.alloc_inode = hostfs_alloc_inode,
.drop_inode = generic_delete_inode,
.delete_inode = hostfs_delete_inode,
@@ -880,7 +880,7 @@ int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
return(0);
}
-static struct inode_operations hostfs_iops = {
+static const struct inode_operations hostfs_iops = {
.create = hostfs_create,
.link = hostfs_link,
.unlink = hostfs_unlink,
@@ -894,7 +894,7 @@ static struct inode_operations hostfs_iops = {
.getattr = hostfs_getattr,
};
-static struct inode_operations hostfs_dir_iops = {
+static const struct inode_operations hostfs_dir_iops = {
.create = hostfs_create,
.lookup = hostfs_lookup,
.link = hostfs_link,
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index fb4c8915010a..b4eafc0f1e54 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -132,7 +132,7 @@ const struct file_operations hpfs_file_ops =
.sendfile = generic_file_sendfile,
};
-struct inode_operations hpfs_file_iops =
+const struct inode_operations hpfs_file_iops =
{
.truncate = hpfs_truncate,
.setattr = hpfs_notify_change,
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 1c07aa82d327..42ff60ccf2a9 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -266,7 +266,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, char *, char *, int);
int hpfs_file_fsync(struct file *, struct dentry *, int);
extern const struct file_operations hpfs_file_ops;
-extern struct inode_operations hpfs_file_iops;
+extern const struct inode_operations hpfs_file_iops;
extern const struct address_space_operations hpfs_aops;
/* inode.c */
@@ -302,7 +302,7 @@ void hpfs_decide_conv(struct inode *, unsigned char *, unsigned);
/* namei.c */
-extern struct inode_operations hpfs_dir_iops;
+extern const struct inode_operations hpfs_dir_iops;
extern const struct address_space_operations hpfs_symlink_aops;
static inline struct hpfs_inode_info *hpfs_i(struct inode *inode)
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 2507e7393f3c..9953cf9a2f16 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -659,7 +659,7 @@ end1:
return err;
}
-struct inode_operations hpfs_dir_iops =
+const struct inode_operations hpfs_dir_iops =
{
.create = hpfs_create,
.lookup = hpfs_lookup,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index d4abc1a1d566..e0174e338526 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -426,7 +426,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
/* Super operations */
-static struct super_operations hpfs_sops =
+static const struct super_operations hpfs_sops =
{
.alloc_inode = hpfs_alloc_inode,
.destroy_inode = hpfs_destroy_inode,
diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
index afd340a45da4..affb7412125e 100644
--- a/fs/hppfs/hppfs_kern.c
+++ b/fs/hppfs/hppfs_kern.c
@@ -43,7 +43,7 @@ static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
#define HPPFS_SUPER_MAGIC 0xb00000ee
-static struct super_operations hppfs_sbops;
+static const struct super_operations hppfs_sbops;
static int is_pid(struct dentry *dentry)
{
@@ -212,7 +212,7 @@ static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
return(ERR_PTR(err));
}
-static struct inode_operations hppfs_file_iops = {
+static const struct inode_operations hppfs_file_iops = {
};
static ssize_t read_proc(struct file *file, char __user *buf, ssize_t count,
@@ -649,7 +649,7 @@ static void hppfs_destroy_inode(struct inode *inode)
kfree(HPPFS_I(inode));
}
-static struct super_operations hppfs_sbops = {
+static const struct super_operations hppfs_sbops = {
.alloc_inode = hppfs_alloc_inode,
.destroy_inode = hppfs_destroy_inode,
.read_inode = hppfs_read_inode,
@@ -693,11 +693,11 @@ static void* hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
return ret;
}
-static struct inode_operations hppfs_dir_iops = {
+static const struct inode_operations hppfs_dir_iops = {
.lookup = hppfs_lookup,
};
-static struct inode_operations hppfs_link_iops = {
+static const struct inode_operations hppfs_link_iops = {
.readlink = hppfs_readlink,
.follow_link = hppfs_follow_link,
};
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 4f4cd132b571..8c718a3d413f 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -33,11 +33,11 @@
/* some random number */
#define HUGETLBFS_MAGIC 0x958458f6
-static struct super_operations hugetlbfs_ops;
+static const struct super_operations hugetlbfs_ops;
static const struct address_space_operations hugetlbfs_aops;
const struct file_operations hugetlbfs_file_operations;
-static struct inode_operations hugetlbfs_dir_inode_operations;
-static struct inode_operations hugetlbfs_inode_operations;
+static const struct inode_operations hugetlbfs_dir_inode_operations;
+static const struct inode_operations hugetlbfs_inode_operations;
static struct backing_dev_info hugetlbfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
@@ -449,10 +449,13 @@ static int hugetlbfs_symlink(struct inode *dir,
}
/*
- * For direct-IO reads into hugetlb pages
+ * mark the head page dirty
*/
static int hugetlbfs_set_page_dirty(struct page *page)
{
+ struct page *head = (struct page *)page_private(page);
+
+ SetPageDirty(head);
return 0;
}
@@ -560,7 +563,7 @@ const struct file_operations hugetlbfs_file_operations = {
.get_unmapped_area = hugetlb_get_unmapped_area,
};
-static struct inode_operations hugetlbfs_dir_inode_operations = {
+static const struct inode_operations hugetlbfs_dir_inode_operations = {
.create = hugetlbfs_create,
.lookup = simple_lookup,
.link = simple_link,
@@ -573,11 +576,11 @@ static struct inode_operations hugetlbfs_dir_inode_operations = {
.setattr = hugetlbfs_setattr,
};
-static struct inode_operations hugetlbfs_inode_operations = {
+static const struct inode_operations hugetlbfs_inode_operations = {
.setattr = hugetlbfs_setattr,
};
-static struct super_operations hugetlbfs_ops = {
+static const struct super_operations hugetlbfs_ops = {
.alloc_inode = hugetlbfs_alloc_inode,
.destroy_inode = hugetlbfs_destroy_inode,
.statfs = hugetlbfs_statfs,
diff --git a/fs/inode.c b/fs/inode.c
index bf21dc6d0dbd..5abb097ab1b0 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -414,7 +414,8 @@ static void prune_icache(int nr_to_scan)
__iget(inode);
spin_unlock(&inode_lock);
if (remove_inode_buffers(inode))
- reap += invalidate_inode_pages(&inode->i_data);
+ reap += invalidate_mapping_pages(&inode->i_data,
+ 0, -1);
iput(inode);
spin_lock(&inode_lock);
@@ -709,7 +710,7 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
spin_lock(&inode_lock);
- if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
+ if (!(inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)))
__iget(inode);
else
/*
@@ -999,7 +1000,7 @@ EXPORT_SYMBOL(remove_inode_hash);
*/
void generic_delete_inode(struct inode *inode)
{
- struct super_operations *op = inode->i_sb->s_op;
+ const struct super_operations *op = inode->i_sb->s_op;
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
@@ -1092,7 +1093,7 @@ EXPORT_SYMBOL_GPL(generic_drop_inode);
*/
static inline void iput_final(struct inode *inode)
{
- struct super_operations *op = inode->i_sb->s_op;
+ const struct super_operations *op = inode->i_sb->s_op;
void (*drop)(struct inode *) = generic_drop_inode;
if (op && op->drop_inode)
@@ -1112,7 +1113,7 @@ static inline void iput_final(struct inode *inode)
void iput(struct inode *inode)
{
if (inode) {
- struct super_operations *op = inode->i_sb->s_op;
+ const struct super_operations *op = inode->i_sb->s_op;
BUG_ON(inode->i_state == I_CLEAR);
@@ -1160,11 +1161,9 @@ void touch_atime(struct vfsmount *mnt, struct dentry *dentry)
struct inode *inode = dentry->d_inode;
struct timespec now;
- if (IS_RDONLY(inode))
- return;
if (inode->i_flags & S_NOATIME)
return;
- if (inode->i_sb->s_flags & MS_NOATIME)
+ if (IS_NOATIME(inode))
return;
if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode))
return;
@@ -1252,33 +1251,6 @@ int inode_needs_sync(struct inode *inode)
EXPORT_SYMBOL(inode_needs_sync);
-/*
- * Quota functions that want to walk the inode lists..
- */
-#ifdef CONFIG_QUOTA
-
-void remove_dquot_ref(struct super_block *sb, int type,
- struct list_head *tofree_head)
-{
- struct inode *inode;
-
- if (!sb->dq_op)
- return; /* nothing to do */
- spin_lock(&inode_lock); /* This lock is for inodes code */
-
- /*
- * We don't have to lock against quota code - test IS_QUOTAINIT is
- * just for speedup...
- */
- list_for_each_entry(inode, &sb->s_inodes, i_sb_list)
- if (!IS_NOQUOTA(inode))
- remove_inode_dquot_ref(inode, type, tofree_head);
-
- spin_unlock(&inode_lock);
-}
-
-#endif
-
int inode_wait(void *word)
{
schedule();
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 55f6da55b7c0..9f2224f65a18 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -455,8 +455,16 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
break;
kevent = inotify_dev_get_event(dev);
- if (event_size + kevent->event.len > count)
+ if (event_size + kevent->event.len > count) {
+ if (ret == 0 && count > 0) {
+ /*
+ * could not get a single event because we
+ * didn't have enough buffer space.
+ */
+ ret = -EINVAL;
+ }
break;
+ }
if (copy_to_user(buf, &kevent->event, event_size)) {
ret = -EFAULT;
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 89e8da112a75..10d2c211d18b 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -60,6 +60,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
int data = IOPRIO_PRIO_DATA(ioprio);
struct task_struct *p, *g;
struct user_struct *user;
+ struct pid *pgrp;
int ret;
switch (class) {
@@ -98,12 +99,14 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio)
break;
case IOPRIO_WHO_PGRP:
if (!who)
- who = process_group(current);
- do_each_task_pid(who, PIDTYPE_PGID, p) {
+ pgrp = task_pgrp(current);
+ else
+ pgrp = find_pid(who);
+ do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
ret = set_task_ioprio(p, ioprio);
if (ret)
break;
- } while_each_task_pid(who, PIDTYPE_PGID, p);
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
if (!who)
@@ -167,6 +170,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
{
struct task_struct *g, *p;
struct user_struct *user;
+ struct pid *pgrp;
int ret = -ESRCH;
int tmpio;
@@ -182,8 +186,10 @@ asmlinkage long sys_ioprio_get(int which, int who)
break;
case IOPRIO_WHO_PGRP:
if (!who)
- who = process_group(current);
- do_each_task_pid(who, PIDTYPE_PGID, p) {
+ pgrp = task_pgrp(current);
+ else
+ pgrp = find_pid(who);
+ do_each_pid_task(pgrp, PIDTYPE_PGID, p) {
tmpio = get_task_ioprio(p);
if (tmpio < 0)
continue;
@@ -191,7 +197,7 @@ asmlinkage long sys_ioprio_get(int which, int who)
ret = tmpio;
else
ret = ioprio_best(ret, tmpio);
- } while_each_task_pid(who, PIDTYPE_PGID, p);
+ } while_each_pid_task(pgrp, PIDTYPE_PGID, p);
break;
case IOPRIO_WHO_USER:
if (!who)
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 4af2548f97a9..0e94c31cad9b 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -24,7 +24,7 @@ const struct file_operations isofs_dir_operations =
/*
* directories can handle most operations...
*/
-struct inode_operations isofs_dir_inode_operations =
+const struct inode_operations isofs_dir_inode_operations =
{
.lookup = isofs_lookup,
};
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index ea55b6c469ec..64a96cdfe3a4 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -106,7 +106,7 @@ static int isofs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations isofs_sops = {
+static const struct super_operations isofs_sops = {
.alloc_inode = isofs_alloc_inode,
.destroy_inode = isofs_destroy_inode,
.read_inode = isofs_read_inode,
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index e6308c8b5735..efe2872cd4e3 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -174,7 +174,7 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de,
}
}
-extern struct inode_operations isofs_dir_inode_operations;
+extern const struct inode_operations isofs_dir_inode_operations;
extern const struct file_operations isofs_dir_operations;
extern const struct address_space_operations isofs_symlink_aops;
extern struct export_operations isofs_export_ops;
diff --git a/fs/jffs/Makefile b/fs/jffs/Makefile
deleted file mode 100644
index 9c1c0bb59696..000000000000
--- a/fs/jffs/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-#
-# Makefile for the linux Journalling Flash FileSystem (JFFS) routines.
-#
-# $Id: Makefile,v 1.11 2001/09/25 20:59:41 dwmw2 Exp $
-#
-
-obj-$(CONFIG_JFFS_FS) += jffs.o
-
-jffs-y := jffs_fm.o intrep.o inode-v23.o
-jffs-$(CONFIG_JFFS_PROC_FS) += jffs_proc.o
-jffs-objs := $(jffs-y)
diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c
deleted file mode 100644
index 43baa1afa021..000000000000
--- a/fs/jffs/inode-v23.c
+++ /dev/null
@@ -1,1847 +0,0 @@
-/*
- * JFFS -- Journalling Flash File System, Linux implementation.
- *
- * Copyright (C) 1999, 2000 Axis Communications AB.
- *
- * Created by Finn Hakansson <finn@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: inode-v23.c,v 1.70 2001/10/02 09:16:02 dwmw2 Exp $
- *
- * Ported to Linux 2.3.x and MTD:
- * Copyright (C) 2000 Alexander Larsson (alex@cendio.se), Cendio Systems AB
- *
- * Copyright 2000, 2001 Red Hat, Inc.
- */
-
-/* inode.c -- Contains the code that is called from the VFS. */
-
-/* TODO-ALEX:
- * uid and gid are just 16 bit.
- * jffs_file_write reads from user-space pointers without xx_from_user
- * maybe other stuff do to.
- */
-
-#include <linux/time.h>
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/jffs.h>
-#include <linux/fs.h>
-#include <linux/smp_lock.h>
-#include <linux/ioctl.h>
-#include <linux/stat.h>
-#include <linux/blkdev.h>
-#include <linux/quotaops.h>
-#include <linux/highmem.h>
-#include <linux/vfs.h>
-#include <linux/mutex.h>
-#include <asm/byteorder.h>
-#include <asm/uaccess.h>
-
-#include "jffs_fm.h"
-#include "intrep.h"
-#ifdef CONFIG_JFFS_PROC_FS
-#include "jffs_proc.h"
-#endif
-
-static int jffs_remove(struct inode *dir, struct dentry *dentry, int type);
-
-static struct super_operations jffs_ops;
-static const struct file_operations jffs_file_operations;
-static struct inode_operations jffs_file_inode_operations;
-static const struct file_operations jffs_dir_operations;
-static struct inode_operations jffs_dir_inode_operations;
-static const struct address_space_operations jffs_address_operations;
-
-struct kmem_cache *node_cache = NULL;
-struct kmem_cache *fm_cache = NULL;
-
-/* Called by the VFS at mount time to initialize the whole file system. */
-static int jffs_fill_super(struct super_block *sb, void *data, int silent)
-{
- struct inode *root_inode;
- struct jffs_control *c;
-
- sb->s_flags |= MS_NODIRATIME;
-
- D1(printk(KERN_NOTICE "JFFS: Trying to mount device %s.\n",
- sb->s_id));
-
- if (MAJOR(sb->s_dev) != MTD_BLOCK_MAJOR) {
- printk(KERN_WARNING "JFFS: Trying to mount a "
- "non-mtd device.\n");
- return -EINVAL;
- }
-
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
- sb->s_fs_info = (void *) 0;
- sb->s_maxbytes = 0xFFFFFFFF;
-
- /* Build the file system. */
- if (jffs_build_fs(sb) < 0) {
- goto jffs_sb_err1;
- }
-
- /*
- * set up enough so that we can read an inode
- */
- sb->s_magic = JFFS_MAGIC_SB_BITMASK;
- sb->s_op = &jffs_ops;
-
- root_inode = iget(sb, JFFS_MIN_INO);
- if (!root_inode)
- goto jffs_sb_err2;
-
- /* Get the root directory of this file system. */
- if (!(sb->s_root = d_alloc_root(root_inode))) {
- goto jffs_sb_err3;
- }
-
- c = (struct jffs_control *) sb->s_fs_info;
-
-#ifdef CONFIG_JFFS_PROC_FS
- /* Set up the jffs proc file system. */
- if (jffs_register_jffs_proc_dir(MINOR(sb->s_dev), c) < 0) {
- printk(KERN_WARNING "JFFS: Failed to initialize the JFFS "
- "proc file system for device %s.\n",
- sb->s_id);
- }
-#endif
-
- /* Set the Garbage Collection thresholds */
-
- /* GC if free space goes below 5% of the total size */
- c->gc_minfree_threshold = c->fmc->flash_size / 20;
-
- if (c->gc_minfree_threshold < c->fmc->sector_size)
- c->gc_minfree_threshold = c->fmc->sector_size;
-
- /* GC if dirty space exceeds 33% of the total size. */
- c->gc_maxdirty_threshold = c->fmc->flash_size / 3;
-
- if (c->gc_maxdirty_threshold < c->fmc->sector_size)
- c->gc_maxdirty_threshold = c->fmc->sector_size;
-
-
- c->thread_pid = kernel_thread (jffs_garbage_collect_thread,
- (void *) c,
- CLONE_KERNEL);
- D1(printk(KERN_NOTICE "JFFS: GC thread pid=%d.\n", (int) c->thread_pid));
-
- D1(printk(KERN_NOTICE "JFFS: Successfully mounted device %s.\n",
- sb->s_id));
- return 0;
-
-jffs_sb_err3:
- iput(root_inode);
-jffs_sb_err2:
- jffs_cleanup_control((struct jffs_control *)sb->s_fs_info);
-jffs_sb_err1:
- printk(KERN_WARNING "JFFS: Failed to mount device %s.\n",
- sb->s_id);
- return -EINVAL;
-}
-
-
-/* This function is called when the file system is umounted. */
-static void
-jffs_put_super(struct super_block *sb)
-{
- struct jffs_control *c = (struct jffs_control *) sb->s_fs_info;
-
- D2(printk("jffs_put_super()\n"));
-
-#ifdef CONFIG_JFFS_PROC_FS
- jffs_unregister_jffs_proc_dir(c);
-#endif
-
- if (c->gc_task) {
- D1(printk (KERN_NOTICE "jffs_put_super(): Telling gc thread to die.\n"));
- send_sig(SIGKILL, c->gc_task, 1);
- }
- wait_for_completion(&c->gc_thread_comp);
-
- D1(printk (KERN_NOTICE "jffs_put_super(): Successfully waited on thread.\n"));
-
- jffs_cleanup_control((struct jffs_control *)sb->s_fs_info);
- D1(printk(KERN_NOTICE "JFFS: Successfully unmounted device %s.\n",
- sb->s_id));
-}
-
-
-/* This function is called when user commands like chmod, chgrp and
- chown are executed. System calls like trunc() results in a call
- to this function. */
-static int
-jffs_setattr(struct dentry *dentry, struct iattr *iattr)
-{
- struct inode *inode = dentry->d_inode;
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_fmcontrol *fmc;
- struct jffs_file *f;
- struct jffs_node *new_node;
- int update_all;
- int res = 0;
- int recoverable = 0;
-
- lock_kernel();
-
- if ((res = inode_change_ok(inode, iattr)))
- goto out;
-
- c = (struct jffs_control *)inode->i_sb->s_fs_info;
- fmc = c->fmc;
-
- D3(printk (KERN_NOTICE "notify_change(): down biglock\n"));
- mutex_lock(&fmc->biglock);
-
- f = jffs_find_file(c, inode->i_ino);
-
- ASSERT(if (!f) {
- printk("jffs_setattr(): Invalid inode number: %lu\n",
- inode->i_ino);
- D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
- mutex_unlock(&fmc->biglock);
- res = -EINVAL;
- goto out;
- });
-
- D1(printk("***jffs_setattr(): file: \"%s\", ino: %u\n",
- f->name, f->ino));
-
- update_all = iattr->ia_valid & ATTR_FORCE;
-
- if ( (update_all || iattr->ia_valid & ATTR_SIZE)
- && (iattr->ia_size + 128 < f->size) ) {
- /* We're shrinking the file by more than 128 bytes.
- We'll be able to GC and recover this space, so
- allow it to go into the reserved space. */
- recoverable = 1;
- }
-
- if (!(new_node = jffs_alloc_node())) {
- D(printk("jffs_setattr(): Allocation failed!\n"));
- D3(printk (KERN_NOTICE "notify_change(): up biglock\n"));
- mutex_unlock(&fmc->biglock);
- res = -ENOMEM;
- goto out;
- }
-
- new_node->data_offset = 0;
- new_node->removed_size = 0;
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = f->ino;
- raw_inode.pino = f->pino;
- raw_inode.mode = f->mode;
- raw_inode.uid = f->uid;
- raw_inode.gid = f->gid;
- raw_inode.atime = f->atime;
- raw_inode.mtime = f->mtime;
- raw_inode.ctime = f->ctime;
- raw_inode.dsize = 0;
- raw_inode.offset = 0;
- raw_inode.rsize = 0;
- raw_inode.dsize = 0;
- raw_inode.nsize = f->nsize;
- raw_inode.nlink = f->nlink;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- if (update_all || iattr->ia_valid & ATTR_MODE) {
- raw_inode.mode = iattr->ia_mode;
- inode->i_mode = iattr->ia_mode;
- }
- if (update_all || iattr->ia_valid & ATTR_UID) {
- raw_inode.uid = iattr->ia_uid;
- inode->i_uid = iattr->ia_uid;
- }
- if (update_all || iattr->ia_valid & ATTR_GID) {
- raw_inode.gid = iattr->ia_gid;
- inode->i_gid = iattr->ia_gid;
- }
- if (update_all || iattr->ia_valid & ATTR_SIZE) {
- int len;
- D1(printk("jffs_notify_change(): Changing size "
- "to %lu bytes!\n", (long)iattr->ia_size));
- raw_inode.offset = iattr->ia_size;
-
- /* Calculate how many bytes need to be removed from
- the end. */
- if (f->size < iattr->ia_size) {
- len = 0;
- }
- else {
- len = f->size - iattr->ia_size;
- }
-
- raw_inode.rsize = len;
-
- /* The updated node will be a removal node, with
- base at the new size and size of the nbr of bytes
- to be removed. */
- new_node->data_offset = iattr->ia_size;
- new_node->removed_size = len;
- inode->i_size = iattr->ia_size;
- inode->i_blocks = (inode->i_size + 511) >> 9;
-
- if (len) {
- invalidate_inode_pages(inode->i_mapping);
- }
- inode->i_ctime = CURRENT_TIME_SEC;
- inode->i_mtime = inode->i_ctime;
- }
- if (update_all || iattr->ia_valid & ATTR_ATIME) {
- raw_inode.atime = iattr->ia_atime.tv_sec;
- inode->i_atime = iattr->ia_atime;
- }
- if (update_all || iattr->ia_valid & ATTR_MTIME) {
- raw_inode.mtime = iattr->ia_mtime.tv_sec;
- inode->i_mtime = iattr->ia_mtime;
- }
- if (update_all || iattr->ia_valid & ATTR_CTIME) {
- raw_inode.ctime = iattr->ia_ctime.tv_sec;
- inode->i_ctime = iattr->ia_ctime;
- }
-
- /* Write this node to the flash. */
- if ((res = jffs_write_node(c, new_node, &raw_inode, f->name, NULL, recoverable, f)) < 0) {
- D(printk("jffs_notify_change(): The write failed!\n"));
- jffs_free_node(new_node);
- D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- goto out;
- }
-
- jffs_insert_node(c, f, &raw_inode, NULL, new_node);
-
- mark_inode_dirty(inode);
- D3(printk (KERN_NOTICE "n_c(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
-out:
- unlock_kernel();
- return res;
-} /* jffs_notify_change() */
-
-
-static struct inode *
-jffs_new_inode(const struct inode * dir, struct jffs_raw_inode *raw_inode,
- int * err)
-{
- struct super_block * sb;
- struct inode * inode;
- struct jffs_control *c;
- struct jffs_file *f;
-
- sb = dir->i_sb;
- inode = new_inode(sb);
- if (!inode) {
- *err = -ENOMEM;
- return NULL;
- }
-
- c = (struct jffs_control *)sb->s_fs_info;
-
- inode->i_ino = raw_inode->ino;
- inode->i_mode = raw_inode->mode;
- inode->i_nlink = raw_inode->nlink;
- inode->i_uid = raw_inode->uid;
- inode->i_gid = raw_inode->gid;
- inode->i_size = raw_inode->dsize;
- inode->i_atime.tv_sec = raw_inode->atime;
- inode->i_mtime.tv_sec = raw_inode->mtime;
- inode->i_ctime.tv_sec = raw_inode->ctime;
- inode->i_ctime.tv_nsec = 0;
- inode->i_mtime.tv_nsec = 0;
- inode->i_atime.tv_nsec = 0;
- inode->i_blocks = (inode->i_size + 511) >> 9;
-
- f = jffs_find_file(c, raw_inode->ino);
-
- inode->i_private = (void *)f;
- insert_inode_hash(inode);
-
- return inode;
-}
-
-/* Get statistics of the file system. */
-static int
-jffs_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- struct jffs_control *c = (struct jffs_control *) dentry->d_sb->s_fs_info;
- struct jffs_fmcontrol *fmc;
-
- lock_kernel();
-
- fmc = c->fmc;
-
- D2(printk("jffs_statfs()\n"));
-
- buf->f_type = JFFS_MAGIC_SB_BITMASK;
- buf->f_bsize = PAGE_CACHE_SIZE;
- buf->f_blocks = (fmc->flash_size / PAGE_CACHE_SIZE)
- - (fmc->min_free_size / PAGE_CACHE_SIZE);
- buf->f_bfree = (jffs_free_size1(fmc) + jffs_free_size2(fmc) +
- fmc->dirty_size - fmc->min_free_size)
- >> PAGE_CACHE_SHIFT;
- buf->f_bavail = buf->f_bfree;
-
- /* Find out how many files there are in the filesystem. */
- buf->f_files = jffs_foreach_file(c, jffs_file_count);
- buf->f_ffree = buf->f_bfree;
- /* buf->f_fsid = 0; */
- buf->f_namelen = JFFS_MAX_NAME_LEN;
-
- unlock_kernel();
-
- return 0;
-}
-
-
-/* Rename a file. */
-static int
-jffs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_file *old_dir_f;
- struct jffs_file *new_dir_f;
- struct jffs_file *del_f;
- struct jffs_file *f;
- struct jffs_node *node;
- struct inode *inode;
- int result = 0;
- __u32 rename_data = 0;
-
- D2(printk("***jffs_rename()\n"));
-
- D(printk("jffs_rename(): old_dir: 0x%p, old name: 0x%p, "
- "new_dir: 0x%p, new name: 0x%p\n",
- old_dir, old_dentry->d_name.name,
- new_dir, new_dentry->d_name.name));
-
- lock_kernel();
- c = (struct jffs_control *)old_dir->i_sb->s_fs_info;
- ASSERT(if (!c) {
- printk(KERN_ERR "jffs_rename(): The old_dir inode "
- "didn't have a reference to a jffs_file struct\n");
- unlock_kernel();
- return -EIO;
- });
-
- result = -ENOTDIR;
- if (!(old_dir_f = old_dir->i_private)) {
- D(printk("jffs_rename(): Old dir invalid.\n"));
- goto jffs_rename_end;
- }
-
- /* Try to find the file to move. */
- result = -ENOENT;
- if (!(f = jffs_find_child(old_dir_f, old_dentry->d_name.name,
- old_dentry->d_name.len))) {
- goto jffs_rename_end;
- }
-
- /* Find the new directory. */
- result = -ENOTDIR;
- if (!(new_dir_f = new_dir->i_private)) {
- D(printk("jffs_rename(): New dir invalid.\n"));
- goto jffs_rename_end;
- }
- D3(printk (KERN_NOTICE "rename(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- /* Create a node and initialize as much as needed. */
- result = -ENOMEM;
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_rename(): Allocation failed: node == 0\n"));
- goto jffs_rename_end;
- }
- node->data_offset = 0;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = f->ino;
- raw_inode.pino = new_dir_f->ino;
-/* raw_inode.version = f->highest_version + 1; */
- raw_inode.mode = f->mode;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = current->fsgid;
-#if 0
- raw_inode.uid = f->uid;
- raw_inode.gid = f->gid;
-#endif
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = f->ctime;
- raw_inode.offset = 0;
- raw_inode.dsize = 0;
- raw_inode.rsize = 0;
- raw_inode.nsize = new_dentry->d_name.len;
- raw_inode.nlink = f->nlink;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- /* See if there already exists a file with the same name as
- new_name. */
- if ((del_f = jffs_find_child(new_dir_f, new_dentry->d_name.name,
- new_dentry->d_name.len))) {
- raw_inode.rename = 1;
- raw_inode.dsize = sizeof(__u32);
- rename_data = del_f->ino;
- }
-
- /* Write the new node to the flash memory. */
- if ((result = jffs_write_node(c, node, &raw_inode,
- new_dentry->d_name.name,
- (unsigned char*)&rename_data, 0, f)) < 0) {
- D(printk("jffs_rename(): Failed to write node to flash.\n"));
- jffs_free_node(node);
- goto jffs_rename_end;
- }
- raw_inode.dsize = 0;
-
- if (raw_inode.rename) {
- /* The file with the same name must be deleted. */
- //FIXME deadlock down(&c->fmc->gclock);
- if ((result = jffs_remove(new_dir, new_dentry,
- del_f->mode)) < 0) {
- /* This is really bad. */
- printk(KERN_ERR "JFFS: An error occurred in "
- "rename().\n");
- }
- // up(&c->fmc->gclock);
- }
-
- if (old_dir_f != new_dir_f) {
- /* Remove the file from its old position in the
- filesystem tree. */
- jffs_unlink_file_from_tree(f);
- }
-
- /* Insert the new node into the file system. */
- if ((result = jffs_insert_node(c, f, &raw_inode,
- new_dentry->d_name.name, node)) < 0) {
- D(printk(KERN_ERR "jffs_rename(): jffs_insert_node() "
- "failed!\n"));
- }
-
- if (old_dir_f != new_dir_f) {
- /* Insert the file to its new position in the
- file system. */
- jffs_insert_file_into_tree(f);
- }
-
- /* This is a kind of update of the inode we're about to make
- here. This is what they do in ext2fs. Kind of. */
- if ((inode = iget(new_dir->i_sb, f->ino))) {
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- iput(inode);
- }
-
-jffs_rename_end:
- D3(printk (KERN_NOTICE "rename(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return result;
-} /* jffs_rename() */
-
-
-/* Read the contents of a directory. Used by programs like `ls'
- for instance. */
-static int
-jffs_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
- struct jffs_file *f;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
- int j;
- int ddino;
- lock_kernel();
- D3(printk (KERN_NOTICE "readdir(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- D2(printk("jffs_readdir(): inode: 0x%p, filp: 0x%p\n", inode, filp));
- if (filp->f_pos == 0) {
- D3(printk("jffs_readdir(): \".\" %lu\n", inode->i_ino));
- if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) {
- D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return 0;
- }
- filp->f_pos = 1;
- }
- if (filp->f_pos == 1) {
- if (inode->i_ino == JFFS_MIN_INO) {
- ddino = JFFS_MIN_INO;
- }
- else {
- ddino = ((struct jffs_file *)
- inode->i_private)->pino;
- }
- D3(printk("jffs_readdir(): \"..\" %u\n", ddino));
- if (filldir(dirent, "..", 2, filp->f_pos, ddino, DT_DIR) < 0) {
- D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return 0;
- }
- filp->f_pos++;
- }
- f = ((struct jffs_file *)inode->i_private)->children;
-
- j = 2;
- while(f && (f->deleted || j++ < filp->f_pos )) {
- f = f->sibling_next;
- }
-
- while (f) {
- D3(printk("jffs_readdir(): \"%s\" ino: %u\n",
- (f->name ? f->name : ""), f->ino));
- if (filldir(dirent, f->name, f->nsize,
- filp->f_pos , f->ino, DT_UNKNOWN) < 0) {
- D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return 0;
- }
- filp->f_pos++;
- do {
- f = f->sibling_next;
- } while(f && f->deleted);
- }
- D3(printk (KERN_NOTICE "readdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return filp->f_pos;
-} /* jffs_readdir() */
-
-
-/* Find a file in a directory. If the file exists, return its
- corresponding dentry. */
-static struct dentry *
-jffs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
- struct jffs_file *d;
- struct jffs_file *f;
- struct jffs_control *c = (struct jffs_control *)dir->i_sb->s_fs_info;
- int len;
- int r = 0;
- const char *name;
- struct inode *inode = NULL;
-
- len = dentry->d_name.len;
- name = dentry->d_name.name;
-
- lock_kernel();
-
- D3({
- char *s = kmalloc(len + 1, GFP_KERNEL);
- memcpy(s, name, len);
- s[len] = '\0';
- printk("jffs_lookup(): dir: 0x%p, name: \"%s\"\n", dir, s);
- kfree(s);
- });
-
- D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- r = -ENAMETOOLONG;
- if (len > JFFS_MAX_NAME_LEN) {
- goto jffs_lookup_end;
- }
-
- r = -EACCES;
- if (!(d = (struct jffs_file *)dir->i_private)) {
- D(printk("jffs_lookup(): No such inode! (%lu)\n",
- dir->i_ino));
- goto jffs_lookup_end;
- }
-
- /* Get the corresponding inode to the file. */
-
- /* iget calls jffs_read_inode, so we need to drop the biglock
- before calling iget. Unfortunately, the GC has a tendency
- to sneak in here, because iget sometimes calls schedule ().
- */
-
- if ((len == 1) && (name[0] == '.')) {
- D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- if (!(inode = iget(dir->i_sb, d->ino))) {
- D(printk("jffs_lookup(): . iget() ==> NULL\n"));
- goto jffs_lookup_end_no_biglock;
- }
- D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- } else if ((len == 2) && (name[0] == '.') && (name[1] == '.')) {
- D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- if (!(inode = iget(dir->i_sb, d->pino))) {
- D(printk("jffs_lookup(): .. iget() ==> NULL\n"));
- goto jffs_lookup_end_no_biglock;
- }
- D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- } else if ((f = jffs_find_child(d, name, len))) {
- D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- if (!(inode = iget(dir->i_sb, f->ino))) {
- D(printk("jffs_lookup(): iget() ==> NULL\n"));
- goto jffs_lookup_end_no_biglock;
- }
- D3(printk (KERN_NOTICE "lookup(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- } else {
- D3(printk("jffs_lookup(): Couldn't find the file. "
- "f = 0x%p, name = \"%s\", d = 0x%p, d->ino = %u\n",
- f, name, d, d->ino));
- inode = NULL;
- }
-
- d_add(dentry, inode);
- D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return NULL;
-
-jffs_lookup_end:
- D3(printk (KERN_NOTICE "lookup(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
-
-jffs_lookup_end_no_biglock:
- unlock_kernel();
- return ERR_PTR(r);
-} /* jffs_lookup() */
-
-
-/* Try to read a page of data from a file. */
-static int
-jffs_do_readpage_nolock(struct file *file, struct page *page)
-{
- void *buf;
- unsigned long read_len;
- int result;
- struct inode *inode = (struct inode*)page->mapping->host;
- struct jffs_file *f = (struct jffs_file *)inode->i_private;
- struct jffs_control *c = (struct jffs_control *)inode->i_sb->s_fs_info;
- int r;
- loff_t offset;
-
- D2(printk("***jffs_readpage(): file = \"%s\", page->index = %lu\n",
- (f->name ? f->name : ""), (long)page->index));
-
- get_page(page);
- /* Don't SetPageLocked(page), should be locked already */
- ClearPageUptodate(page);
- ClearPageError(page);
-
- D3(printk (KERN_NOTICE "readpage(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- read_len = 0;
- result = 0;
- offset = page_offset(page);
-
- kmap(page);
- buf = page_address(page);
- if (offset < inode->i_size) {
- read_len = min_t(long, inode->i_size - offset, PAGE_SIZE);
- r = jffs_read_data(f, buf, offset, read_len);
- if (r != read_len) {
- result = -EIO;
- D(
- printk("***jffs_readpage(): Read error! "
- "Wanted to read %lu bytes but only "
- "read %d bytes.\n", read_len, r);
- );
- }
-
- }
-
- /* This handles the case of partial or no read in above */
- if(read_len < PAGE_SIZE)
- memset(buf + read_len, 0, PAGE_SIZE - read_len);
- flush_dcache_page(page);
- kunmap(page);
-
- D3(printk (KERN_NOTICE "readpage(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
-
- if (result) {
- SetPageError(page);
- }else {
- SetPageUptodate(page);
- }
-
- page_cache_release(page);
-
- D3(printk("jffs_readpage(): Leaving...\n"));
-
- return result;
-} /* jffs_do_readpage_nolock() */
-
-static int jffs_readpage(struct file *file, struct page *page)
-{
- int ret = jffs_do_readpage_nolock(file, page);
- unlock_page(page);
- return ret;
-}
-
-/* Create a new directory. */
-static int
-jffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_node *node;
- struct jffs_file *dir_f;
- struct inode *inode;
- int dir_mode;
- int result = 0;
- int err;
-
- D1({
- int len = dentry->d_name.len;
- char *_name = kmalloc(len + 1, GFP_KERNEL);
- memcpy(_name, dentry->d_name.name, len);
- _name[len] = '\0';
- printk("***jffs_mkdir(): dir = 0x%p, name = \"%s\", "
- "len = %d, mode = 0x%08x\n", dir, _name, len, mode);
- kfree(_name);
- });
-
- lock_kernel();
- dir_f = dir->i_private;
-
- ASSERT(if (!dir_f) {
- printk(KERN_ERR "jffs_mkdir(): No reference to a "
- "jffs_file struct in inode.\n");
- unlock_kernel();
- return -EIO;
- });
-
- c = dir_f->c;
- D3(printk (KERN_NOTICE "mkdir(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- dir_mode = S_IFDIR | (mode & (S_IRWXUGO|S_ISVTX)
- & ~current->fs->umask);
- if (dir->i_mode & S_ISGID) {
- dir_mode |= S_ISGID;
- }
-
- /* Create a node and initialize it as much as needed. */
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_mkdir(): Allocation failed: node == 0\n"));
- result = -ENOMEM;
- goto jffs_mkdir_end;
- }
- node->data_offset = 0;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = c->next_ino++;
- raw_inode.pino = dir_f->ino;
- raw_inode.version = 1;
- raw_inode.mode = dir_mode;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
- /* raw_inode.gid = current->fsgid; */
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = raw_inode.atime;
- raw_inode.offset = 0;
- raw_inode.dsize = 0;
- raw_inode.rsize = 0;
- raw_inode.nsize = dentry->d_name.len;
- raw_inode.nlink = 1;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- /* Write the new node to the flash. */
- if ((result = jffs_write_node(c, node, &raw_inode,
- dentry->d_name.name, NULL, 0, NULL)) < 0) {
- D(printk("jffs_mkdir(): jffs_write_node() failed.\n"));
- jffs_free_node(node);
- goto jffs_mkdir_end;
- }
-
- /* Insert the new node into the file system. */
- if ((result = jffs_insert_node(c, NULL, &raw_inode, dentry->d_name.name,
- node)) < 0) {
- goto jffs_mkdir_end;
- }
-
- inode = jffs_new_inode(dir, &raw_inode, &err);
- if (inode == NULL) {
- result = err;
- goto jffs_mkdir_end;
- }
-
- inode->i_op = &jffs_dir_inode_operations;
- inode->i_fop = &jffs_dir_operations;
-
- mark_inode_dirty(dir);
- d_instantiate(dentry, inode);
-
- result = 0;
-jffs_mkdir_end:
- D3(printk (KERN_NOTICE "mkdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return result;
-} /* jffs_mkdir() */
-
-
-/* Remove a directory. */
-static int
-jffs_rmdir(struct inode *dir, struct dentry *dentry)
-{
- struct jffs_control *c = (struct jffs_control *)dir->i_sb->s_fs_info;
- int ret;
- D3(printk("***jffs_rmdir()\n"));
- D3(printk (KERN_NOTICE "rmdir(): down biglock\n"));
- lock_kernel();
- mutex_lock(&c->fmc->biglock);
- ret = jffs_remove(dir, dentry, S_IFDIR);
- D3(printk (KERN_NOTICE "rmdir(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return ret;
-}
-
-
-/* Remove any kind of file except for directories. */
-static int
-jffs_unlink(struct inode *dir, struct dentry *dentry)
-{
- struct jffs_control *c = (struct jffs_control *)dir->i_sb->s_fs_info;
- int ret;
-
- lock_kernel();
- D3(printk("***jffs_unlink()\n"));
- D3(printk (KERN_NOTICE "unlink(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- ret = jffs_remove(dir, dentry, 0);
- D3(printk (KERN_NOTICE "unlink(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return ret;
-}
-
-
-/* Remove a JFFS entry, i.e. plain files, directories, etc. Here we
- shouldn't test for free space on the device. */
-static int
-jffs_remove(struct inode *dir, struct dentry *dentry, int type)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_file *dir_f; /* The file-to-remove's parent. */
- struct jffs_file *del_f; /* The file to remove. */
- struct jffs_node *del_node;
- struct inode *inode = NULL;
- int result = 0;
-
- D1({
- int len = dentry->d_name.len;
- const char *name = dentry->d_name.name;
- char *_name = kmalloc(len + 1, GFP_KERNEL);
- memcpy(_name, name, len);
- _name[len] = '\0';
- printk("***jffs_remove(): file = \"%s\", ino = %ld\n", _name, dentry->d_inode->i_ino);
- kfree(_name);
- });
-
- dir_f = dir->i_private;
- c = dir_f->c;
-
- result = -ENOENT;
- if (!(del_f = jffs_find_child(dir_f, dentry->d_name.name,
- dentry->d_name.len))) {
- D(printk("jffs_remove(): jffs_find_child() failed.\n"));
- goto jffs_remove_end;
- }
-
- if (S_ISDIR(type)) {
- struct jffs_file *child = del_f->children;
- while(child) {
- if( !child->deleted ) {
- result = -ENOTEMPTY;
- goto jffs_remove_end;
- }
- child = child->sibling_next;
- }
- }
- else if (S_ISDIR(del_f->mode)) {
- D(printk("jffs_remove(): node is a directory "
- "but it shouldn't be.\n"));
- result = -EPERM;
- goto jffs_remove_end;
- }
-
- inode = dentry->d_inode;
-
- result = -EIO;
- if (del_f->ino != inode->i_ino)
- goto jffs_remove_end;
-
- if (!inode->i_nlink) {
- printk("Deleting nonexistent file inode: %lu, nlink: %d\n",
- inode->i_ino, inode->i_nlink);
- inode->i_nlink=1;
- }
-
- /* Create a node for the deletion. */
- result = -ENOMEM;
- if (!(del_node = jffs_alloc_node())) {
- D(printk("jffs_remove(): Allocation failed!\n"));
- goto jffs_remove_end;
- }
- del_node->data_offset = 0;
- del_node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = del_f->ino;
- raw_inode.pino = del_f->pino;
-/* raw_inode.version = del_f->highest_version + 1; */
- raw_inode.mode = del_f->mode;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = current->fsgid;
- raw_inode.atime = get_seconds();
- raw_inode.mtime = del_f->mtime;
- raw_inode.ctime = raw_inode.atime;
- raw_inode.offset = 0;
- raw_inode.dsize = 0;
- raw_inode.rsize = 0;
- raw_inode.nsize = 0;
- raw_inode.nlink = del_f->nlink;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 1;
-
- /* Write the new node to the flash memory. */
- if (jffs_write_node(c, del_node, &raw_inode, NULL, NULL, 1, del_f) < 0) {
- jffs_free_node(del_node);
- result = -EIO;
- goto jffs_remove_end;
- }
-
- /* Update the file. This operation will make the file disappear
- from the in-memory file system structures. */
- jffs_insert_node(c, del_f, &raw_inode, NULL, del_node);
-
- dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
- mark_inode_dirty(dir);
- inode->i_ctime = dir->i_ctime;
- inode_dec_link_count(inode);
-
- d_delete(dentry); /* This also frees the inode */
-
- result = 0;
-jffs_remove_end:
- return result;
-} /* jffs_remove() */
-
-
-static int
-jffs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_file *dir_f;
- struct jffs_node *node = NULL;
- struct jffs_control *c;
- struct inode *inode;
- int result = 0;
- u16 data = old_encode_dev(rdev);
- int err;
-
- D1(printk("***jffs_mknod()\n"));
-
- if (!old_valid_dev(rdev))
- return -EINVAL;
- lock_kernel();
- dir_f = dir->i_private;
- c = dir_f->c;
-
- D3(printk (KERN_NOTICE "mknod(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- /* Create and initialize a new node. */
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_mknod(): Allocation failed!\n"));
- result = -ENOMEM;
- goto jffs_mknod_err;
- }
- node->data_offset = 0;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = c->next_ino++;
- raw_inode.pino = dir_f->ino;
- raw_inode.version = 1;
- raw_inode.mode = mode;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
- /* raw_inode.gid = current->fsgid; */
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = raw_inode.atime;
- raw_inode.offset = 0;
- raw_inode.dsize = 2;
- raw_inode.rsize = 0;
- raw_inode.nsize = dentry->d_name.len;
- raw_inode.nlink = 1;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- /* Write the new node to the flash. */
- if ((err = jffs_write_node(c, node, &raw_inode, dentry->d_name.name,
- (unsigned char *)&data, 0, NULL)) < 0) {
- D(printk("jffs_mknod(): jffs_write_node() failed.\n"));
- result = err;
- goto jffs_mknod_err;
- }
-
- /* Insert the new node into the file system. */
- if ((err = jffs_insert_node(c, NULL, &raw_inode, dentry->d_name.name,
- node)) < 0) {
- result = err;
- goto jffs_mknod_end;
- }
-
- inode = jffs_new_inode(dir, &raw_inode, &err);
- if (inode == NULL) {
- result = err;
- goto jffs_mknod_end;
- }
-
- init_special_inode(inode, mode, rdev);
-
- d_instantiate(dentry, inode);
-
- goto jffs_mknod_end;
-
-jffs_mknod_err:
- if (node) {
- jffs_free_node(node);
- }
-
-jffs_mknod_end:
- D3(printk (KERN_NOTICE "mknod(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return result;
-} /* jffs_mknod() */
-
-
-static int
-jffs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_file *dir_f;
- struct jffs_node *node;
- struct inode *inode;
-
- int symname_len = strlen(symname);
- int err;
-
- lock_kernel();
- D1({
- int len = dentry->d_name.len;
- char *_name = kmalloc(len + 1, GFP_KERNEL);
- char *_symname = kmalloc(symname_len + 1, GFP_KERNEL);
- memcpy(_name, dentry->d_name.name, len);
- _name[len] = '\0';
- memcpy(_symname, symname, symname_len);
- _symname[symname_len] = '\0';
- printk("***jffs_symlink(): dir = 0x%p, "
- "dentry->dname.name = \"%s\", "
- "symname = \"%s\"\n", dir, _name, _symname);
- kfree(_name);
- kfree(_symname);
- });
-
- dir_f = dir->i_private;
- ASSERT(if (!dir_f) {
- printk(KERN_ERR "jffs_symlink(): No reference to a "
- "jffs_file struct in inode.\n");
- unlock_kernel();
- return -EIO;
- });
-
- c = dir_f->c;
-
- /* Create a node and initialize it as much as needed. */
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_symlink(): Allocation failed: node = NULL\n"));
- unlock_kernel();
- return -ENOMEM;
- }
- D3(printk (KERN_NOTICE "symlink(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- node->data_offset = 0;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = c->next_ino++;
- raw_inode.pino = dir_f->ino;
- raw_inode.version = 1;
- raw_inode.mode = S_IFLNK | S_IRWXUGO;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = raw_inode.atime;
- raw_inode.offset = 0;
- raw_inode.dsize = symname_len;
- raw_inode.rsize = 0;
- raw_inode.nsize = dentry->d_name.len;
- raw_inode.nlink = 1;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- /* Write the new node to the flash. */
- if ((err = jffs_write_node(c, node, &raw_inode, dentry->d_name.name,
- (const unsigned char *)symname, 0, NULL)) < 0) {
- D(printk("jffs_symlink(): jffs_write_node() failed.\n"));
- jffs_free_node(node);
- goto jffs_symlink_end;
- }
-
- /* Insert the new node into the file system. */
- if ((err = jffs_insert_node(c, NULL, &raw_inode, dentry->d_name.name,
- node)) < 0) {
- goto jffs_symlink_end;
- }
-
- inode = jffs_new_inode(dir, &raw_inode, &err);
- if (inode == NULL) {
- goto jffs_symlink_end;
- }
- err = 0;
- inode->i_op = &page_symlink_inode_operations;
- inode->i_mapping->a_ops = &jffs_address_operations;
-
- d_instantiate(dentry, inode);
- jffs_symlink_end:
- D3(printk (KERN_NOTICE "symlink(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return err;
-} /* jffs_symlink() */
-
-
-/* Create an inode inside a JFFS directory (dir) and return it.
- *
- * By the time this is called, we already have created
- * the directory cache entry for the new file, but it
- * is so far negative - it has no inode.
- *
- * If the create succeeds, we fill in the inode information
- * with d_instantiate().
- */
-static int
-jffs_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *nd)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_node *node;
- struct jffs_file *dir_f; /* JFFS representation of the directory. */
- struct inode *inode;
- int err;
-
- lock_kernel();
- D1({
- int len = dentry->d_name.len;
- char *s = kmalloc(len + 1, GFP_KERNEL);
- memcpy(s, dentry->d_name.name, len);
- s[len] = '\0';
- printk("jffs_create(): dir: 0x%p, name: \"%s\"\n", dir, s);
- kfree(s);
- });
-
- dir_f = dir->i_private;
- ASSERT(if (!dir_f) {
- printk(KERN_ERR "jffs_create(): No reference to a "
- "jffs_file struct in inode.\n");
- unlock_kernel();
- return -EIO;
- });
-
- c = dir_f->c;
-
- /* Create a node and initialize as much as needed. */
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_create(): Allocation failed: node == 0\n"));
- unlock_kernel();
- return -ENOMEM;
- }
- D3(printk (KERN_NOTICE "create(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- node->data_offset = 0;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = c->next_ino++;
- raw_inode.pino = dir_f->ino;
- raw_inode.version = 1;
- raw_inode.mode = mode;
- raw_inode.uid = current->fsuid;
- raw_inode.gid = (dir->i_mode & S_ISGID) ? dir->i_gid : current->fsgid;
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = raw_inode.atime;
- raw_inode.offset = 0;
- raw_inode.dsize = 0;
- raw_inode.rsize = 0;
- raw_inode.nsize = dentry->d_name.len;
- raw_inode.nlink = 1;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- /* Write the new node to the flash. */
- if ((err = jffs_write_node(c, node, &raw_inode,
- dentry->d_name.name, NULL, 0, NULL)) < 0) {
- D(printk("jffs_create(): jffs_write_node() failed.\n"));
- jffs_free_node(node);
- goto jffs_create_end;
- }
-
- /* Insert the new node into the file system. */
- if ((err = jffs_insert_node(c, NULL, &raw_inode, dentry->d_name.name,
- node)) < 0) {
- goto jffs_create_end;
- }
-
- /* Initialize an inode. */
- inode = jffs_new_inode(dir, &raw_inode, &err);
- if (inode == NULL) {
- goto jffs_create_end;
- }
- err = 0;
- inode->i_op = &jffs_file_inode_operations;
- inode->i_fop = &jffs_file_operations;
- inode->i_mapping->a_ops = &jffs_address_operations;
- inode->i_mapping->nrpages = 0;
-
- d_instantiate(dentry, inode);
- jffs_create_end:
- D3(printk (KERN_NOTICE "create(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- unlock_kernel();
- return err;
-} /* jffs_create() */
-
-
-/* Write, append or rewrite data to an existing file. */
-static ssize_t
-jffs_file_write(struct file *filp, const char *buf, size_t count,
- loff_t *ppos)
-{
- struct jffs_raw_inode raw_inode;
- struct jffs_control *c;
- struct jffs_file *f;
- struct jffs_node *node;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- int recoverable = 0;
- size_t written = 0;
- __u32 thiscount = count;
- loff_t pos = *ppos;
- int err;
-
- inode = filp->f_path.dentry->d_inode;
-
- D2(printk("***jffs_file_write(): inode: 0x%p (ino: %lu), "
- "filp: 0x%p, buf: 0x%p, count: %d\n",
- inode, inode->i_ino, filp, buf, count));
-
-#if 0
- if (inode->i_sb->s_flags & MS_RDONLY) {
- D(printk("jffs_file_write(): MS_RDONLY\n"));
- err = -EROFS;
- goto out_isem;
- }
-#endif
- err = -EINVAL;
-
- if (!S_ISREG(inode->i_mode)) {
- D(printk("jffs_file_write(): inode->i_mode == 0x%08x\n",
- inode->i_mode));
- goto out_isem;
- }
-
- if (!(f = inode->i_private)) {
- D(printk("jffs_file_write(): inode->i_private = 0x%p\n",
- inode->i_private));
- goto out_isem;
- }
-
- c = f->c;
-
- /*
- * This will never trigger with sane page sizes. leave it in
- * anyway, since I'm thinking about how to merge larger writes
- * (the current idea is to poke a thread that does the actual
- * I/O and starts by doing a mutex_lock(&inode->i_mutex). then we
- * would need to get the page cache pages and have a list of
- * I/O requests and do write-merging here.
- * -- prumpf
- */
- thiscount = min(c->fmc->max_chunk_size - sizeof(struct jffs_raw_inode), count);
-
- D3(printk (KERN_NOTICE "file_write(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- /* Urgh. POSIX says we can do short writes if we feel like it.
- * In practice, we can't. Nothing will cope. So we loop until
- * we're done.
- *
- * <_Anarchy_> posix and reality are not interconnected on this issue
- */
- while (count) {
- /* Things are going to be written so we could allocate and
- initialize the necessary data structures now. */
- if (!(node = jffs_alloc_node())) {
- D(printk("jffs_file_write(): node == 0\n"));
- err = -ENOMEM;
- goto out;
- }
-
- node->data_offset = pos;
- node->removed_size = 0;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = f->ino;
- raw_inode.pino = f->pino;
-
- raw_inode.mode = f->mode;
-
- raw_inode.uid = f->uid;
- raw_inode.gid = f->gid;
- raw_inode.atime = get_seconds();
- raw_inode.mtime = raw_inode.atime;
- raw_inode.ctime = f->ctime;
- raw_inode.offset = pos;
- raw_inode.dsize = thiscount;
- raw_inode.rsize = 0;
- raw_inode.nsize = f->nsize;
- raw_inode.nlink = f->nlink;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = 0;
-
- if (pos < f->size) {
- node->removed_size = raw_inode.rsize = min(thiscount, (__u32)(f->size - pos));
-
- /* If this node is going entirely over the top of old data,
- we can allow it to go into the reserved space, because
- we know that GC can reclaim the space later.
- */
- if (pos + thiscount < f->size) {
- /* If all the data we're overwriting are _real_,
- not just holes, then:
- recoverable = 1;
- */
- }
- }
-
- /* Write the new node to the flash. */
- /* NOTE: We would be quite happy if jffs_write_node() wrote a
- smaller node than we were expecting. There's no need for it
- to waste the space at the end of the flash just because it's
- a little smaller than what we asked for. But that's a whole
- new can of worms which I'm not going to open this week.
- -- dwmw2.
- */
- if ((err = jffs_write_node(c, node, &raw_inode, f->name,
- (const unsigned char *)buf,
- recoverable, f)) < 0) {
- D(printk("jffs_file_write(): jffs_write_node() failed.\n"));
- jffs_free_node(node);
- goto out;
- }
-
- written += err;
- buf += err;
- count -= err;
- pos += err;
-
- /* Insert the new node into the file system. */
- if ((err = jffs_insert_node(c, f, &raw_inode, NULL, node)) < 0) {
- goto out;
- }
-
- D3(printk("jffs_file_write(): new f_pos %ld.\n", (long)pos));
-
- thiscount = min(c->fmc->max_chunk_size - sizeof(struct jffs_raw_inode), count);
- }
- out:
- D3(printk (KERN_NOTICE "file_write(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
-
- /* Fix things in the real inode. */
- if (pos > inode->i_size) {
- inode->i_size = pos;
- inode->i_blocks = (inode->i_size + 511) >> 9;
- }
- inode->i_ctime = inode->i_mtime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- invalidate_inode_pages(inode->i_mapping);
-
- out_isem:
- return err;
-} /* jffs_file_write() */
-
-static int
-jffs_prepare_write(struct file *filp, struct page *page,
- unsigned from, unsigned to)
-{
- /* FIXME: we should detect some error conditions here */
-
- /* Bugger that. We should make sure the page is uptodate */
- if (!PageUptodate(page) && (from || to < PAGE_CACHE_SIZE))
- return jffs_do_readpage_nolock(filp, page);
-
- return 0;
-} /* jffs_prepare_write() */
-
-static int
-jffs_commit_write(struct file *filp, struct page *page,
- unsigned from, unsigned to)
-{
- void *addr = page_address(page) + from;
- /* XXX: PAGE_CACHE_SHIFT or PAGE_SHIFT */
- loff_t pos = page_offset(page) + from;
-
- return jffs_file_write(filp, addr, to-from, &pos);
-} /* jffs_commit_write() */
-
-/* This is our ioctl() routine. */
-static int
-jffs_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
- unsigned long arg)
-{
- struct jffs_control *c;
- int ret = 0;
-
- D2(printk("***jffs_ioctl(): cmd = 0x%08x, arg = 0x%08lx\n",
- cmd, arg));
-
- if (!(c = (struct jffs_control *)inode->i_sb->s_fs_info)) {
- printk(KERN_ERR "JFFS: Bad inode in ioctl() call. "
- "(cmd = 0x%08x)\n", cmd);
- return -EIO;
- }
- D3(printk (KERN_NOTICE "ioctl(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
-
- switch (cmd) {
- case JFFS_PRINT_HASH:
- jffs_print_hash_table(c);
- break;
- case JFFS_PRINT_TREE:
- jffs_print_tree(c->root, 0);
- break;
- case JFFS_GET_STATUS:
- {
- struct jffs_flash_status fst;
- struct jffs_fmcontrol *fmc = c->fmc;
- printk("Flash status -- ");
- if (!access_ok(VERIFY_WRITE,
- (struct jffs_flash_status __user *)arg,
- sizeof(struct jffs_flash_status))) {
- D(printk("jffs_ioctl(): Bad arg in "
- "JFFS_GET_STATUS ioctl!\n"));
- ret = -EFAULT;
- break;
- }
- fst.size = fmc->flash_size;
- fst.used = fmc->used_size;
- fst.dirty = fmc->dirty_size;
- fst.begin = fmc->head->offset;
- fst.end = fmc->tail->offset + fmc->tail->size;
- printk("size: %d, used: %d, dirty: %d, "
- "begin: %d, end: %d\n",
- fst.size, fst.used, fst.dirty,
- fst.begin, fst.end);
- if (copy_to_user((struct jffs_flash_status __user *)arg,
- &fst,
- sizeof(struct jffs_flash_status))) {
- ret = -EFAULT;
- }
- }
- break;
- default:
- ret = -ENOTTY;
- }
- D3(printk (KERN_NOTICE "ioctl(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- return ret;
-} /* jffs_ioctl() */
-
-
-static const struct address_space_operations jffs_address_operations = {
- .readpage = jffs_readpage,
- .prepare_write = jffs_prepare_write,
- .commit_write = jffs_commit_write,
-};
-
-static int jffs_fsync(struct file *f, struct dentry *d, int datasync)
-{
- /* We currently have O_SYNC operations at all times.
- Do nothing.
- */
- return 0;
-}
-
-
-static const struct file_operations jffs_file_operations =
-{
- .open = generic_file_open,
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .aio_read = generic_file_aio_read,
- .write = do_sync_write,
- .aio_write = generic_file_aio_write,
- .ioctl = jffs_ioctl,
- .mmap = generic_file_readonly_mmap,
- .fsync = jffs_fsync,
- .sendfile = generic_file_sendfile,
-};
-
-
-static struct inode_operations jffs_file_inode_operations =
-{
- .lookup = jffs_lookup, /* lookup */
- .setattr = jffs_setattr,
-};
-
-
-static const struct file_operations jffs_dir_operations =
-{
- .readdir = jffs_readdir,
-};
-
-
-static struct inode_operations jffs_dir_inode_operations =
-{
- .create = jffs_create,
- .lookup = jffs_lookup,
- .unlink = jffs_unlink,
- .symlink = jffs_symlink,
- .mkdir = jffs_mkdir,
- .rmdir = jffs_rmdir,
- .mknod = jffs_mknod,
- .rename = jffs_rename,
- .setattr = jffs_setattr,
-};
-
-
-/* Initialize an inode for the VFS. */
-static void
-jffs_read_inode(struct inode *inode)
-{
- struct jffs_file *f;
- struct jffs_control *c;
-
- D3(printk("jffs_read_inode(): inode->i_ino == %lu\n", inode->i_ino));
-
- if (!inode->i_sb) {
- D(printk("jffs_read_inode(): !inode->i_sb ==> "
- "No super block!\n"));
- return;
- }
- c = (struct jffs_control *)inode->i_sb->s_fs_info;
- D3(printk (KERN_NOTICE "read_inode(): down biglock\n"));
- mutex_lock(&c->fmc->biglock);
- if (!(f = jffs_find_file(c, inode->i_ino))) {
- D(printk("jffs_read_inode(): No such inode (%lu).\n",
- inode->i_ino));
- D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
- return;
- }
- inode->i_private = f;
- inode->i_mode = f->mode;
- inode->i_nlink = f->nlink;
- inode->i_uid = f->uid;
- inode->i_gid = f->gid;
- inode->i_size = f->size;
- inode->i_atime.tv_sec = f->atime;
- inode->i_mtime.tv_sec = f->mtime;
- inode->i_ctime.tv_sec = f->ctime;
- inode->i_atime.tv_nsec =
- inode->i_mtime.tv_nsec =
- inode->i_ctime.tv_nsec = 0;
-
- inode->i_blocks = (inode->i_size + 511) >> 9;
- if (S_ISREG(inode->i_mode)) {
- inode->i_op = &jffs_file_inode_operations;
- inode->i_fop = &jffs_file_operations;
- inode->i_mapping->a_ops = &jffs_address_operations;
- }
- else if (S_ISDIR(inode->i_mode)) {
- inode->i_op = &jffs_dir_inode_operations;
- inode->i_fop = &jffs_dir_operations;
- }
- else if (S_ISLNK(inode->i_mode)) {
- inode->i_op = &page_symlink_inode_operations;
- inode->i_mapping->a_ops = &jffs_address_operations;
- }
- else {
- /* If the node is a device of some sort, then the number of
- the device should be read from the flash memory and then
- added to the inode's i_rdev member. */
- u16 val;
- jffs_read_data(f, (char *)&val, 0, 2);
- init_special_inode(inode, inode->i_mode,
- old_decode_dev(val));
- }
-
- D3(printk (KERN_NOTICE "read_inode(): up biglock\n"));
- mutex_unlock(&c->fmc->biglock);
-}
-
-
-static void
-jffs_delete_inode(struct inode *inode)
-{
- struct jffs_file *f;
- struct jffs_control *c;
- D3(printk("jffs_delete_inode(): inode->i_ino == %lu\n",
- inode->i_ino));
-
- truncate_inode_pages(&inode->i_data, 0);
- lock_kernel();
- inode->i_size = 0;
- inode->i_blocks = 0;
- inode->i_private = NULL;
- clear_inode(inode);
- if (inode->i_nlink == 0) {
- c = (struct jffs_control *) inode->i_sb->s_fs_info;
- f = (struct jffs_file *) jffs_find_file (c, inode->i_ino);
- jffs_possibly_delete_file(f);
- }
-
- unlock_kernel();
-}
-
-
-static void
-jffs_write_super(struct super_block *sb)
-{
- struct jffs_control *c = (struct jffs_control *)sb->s_fs_info;
- lock_kernel();
- jffs_garbage_collect_trigger(c);
- unlock_kernel();
-}
-
-static int jffs_remount(struct super_block *sb, int *flags, char *data)
-{
- *flags |= MS_NODIRATIME;
- return 0;
-}
-
-static struct super_operations jffs_ops =
-{
- .read_inode = jffs_read_inode,
- .delete_inode = jffs_delete_inode,
- .put_super = jffs_put_super,
- .write_super = jffs_write_super,
- .statfs = jffs_statfs,
- .remount_fs = jffs_remount,
-};
-
-static int jffs_get_sb(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
- return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super,
- mnt);
-}
-
-static struct file_system_type jffs_fs_type = {
- .owner = THIS_MODULE,
- .name = "jffs",
- .get_sb = jffs_get_sb,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-static int __init
-init_jffs_fs(void)
-{
- printk(KERN_INFO "JFFS version " JFFS_VERSION_STRING
- ", (C) 1999, 2000 Axis Communications AB\n");
-
-#ifdef CONFIG_JFFS_PROC_FS
- jffs_proc_root = proc_mkdir("jffs", proc_root_fs);
- if (!jffs_proc_root) {
- printk(KERN_WARNING "cannot create /proc/jffs entry\n");
- }
-#endif
- fm_cache = kmem_cache_create("jffs_fm", sizeof(struct jffs_fm),
- 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
- NULL, NULL);
- if (!fm_cache) {
- return -ENOMEM;
- }
-
- node_cache = kmem_cache_create("jffs_node",sizeof(struct jffs_node),
- 0,
- SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD,
- NULL, NULL);
- if (!node_cache) {
- kmem_cache_destroy(fm_cache);
- return -ENOMEM;
- }
-
- return register_filesystem(&jffs_fs_type);
-}
-
-static void __exit
-exit_jffs_fs(void)
-{
- unregister_filesystem(&jffs_fs_type);
- kmem_cache_destroy(fm_cache);
- kmem_cache_destroy(node_cache);
-}
-
-module_init(init_jffs_fs)
-module_exit(exit_jffs_fs)
-
-MODULE_DESCRIPTION("The Journalling Flash File System");
-MODULE_AUTHOR("Axis Communications AB.");
-MODULE_LICENSE("GPL");
diff --git a/fs/jffs/intrep.c b/fs/jffs/intrep.c
deleted file mode 100644
index 6dd18911b44c..000000000000
--- a/fs/jffs/intrep.c
+++ /dev/null
@@ -1,3449 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 1999, 2000 Axis Communications, Inc.
- *
- * Created by Finn Hakansson <finn@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: intrep.c,v 1.102 2001/09/23 23:28:36 dwmw2 Exp $
- *
- * Ported to Linux 2.3.x and MTD:
- * Copyright (C) 2000 Alexander Larsson (alex@cendio.se), Cendio Systems AB
- *
- */
-
-/* This file contains the code for the internal structure of the
- Journaling Flash File System, JFFS. */
-
-/*
- * Todo list:
- *
- * memcpy_to_flash() and memcpy_from_flash() functions.
- *
- * Implementation of hard links.
- *
- * Organize the source code in a better way. Against the VFS we could
- * have jffs_ext.c, and against the block device jffs_int.c.
- * A better file-internal organization too.
- *
- * A better checksum algorithm.
- *
- * Consider endianness stuff. ntohl() etc.
- *
- * Are we handling the atime, mtime, ctime members of the inode right?
- *
- * Remove some duplicated code. Take a look at jffs_write_node() and
- * jffs_rewrite_data() for instance.
- *
- * Implement more meaning of the nlink member in various data structures.
- * nlink could be used in conjunction with hard links for instance.
- *
- * Better memory management. Allocate data structures in larger chunks
- * if possible.
- *
- * If too much meta data is stored, a garbage collect should be issued.
- * We have experienced problems with too much meta data with for instance
- * log files.
- *
- * Improve the calls to jffs_ioctl(). We would like to retrieve more
- * information to be able to debug (or to supervise) JFFS during run-time.
- *
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/jffs.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/pagemap.h>
-#include <linux/mutex.h>
-#include <asm/byteorder.h>
-#include <linux/smp_lock.h>
-#include <linux/time.h>
-#include <linux/ctype.h>
-#include <linux/freezer.h>
-
-#include "intrep.h"
-#include "jffs_fm.h"
-
-long no_jffs_node = 0;
-static long no_jffs_file = 0;
-#if defined(JFFS_MEMORY_DEBUG) && JFFS_MEMORY_DEBUG
-long no_jffs_control = 0;
-long no_jffs_raw_inode = 0;
-long no_jffs_node_ref = 0;
-long no_jffs_fm = 0;
-long no_jffs_fmcontrol = 0;
-long no_hash = 0;
-long no_name = 0;
-#endif
-
-static int jffs_scan_flash(struct jffs_control *c);
-static int jffs_update_file(struct jffs_file *f, struct jffs_node *node);
-static int jffs_build_file(struct jffs_file *f);
-static int jffs_free_file(struct jffs_file *f);
-static int jffs_free_node_list(struct jffs_file *f);
-static int jffs_garbage_collect_now(struct jffs_control *c);
-static int jffs_insert_file_into_hash(struct jffs_file *f);
-static int jffs_remove_redundant_nodes(struct jffs_file *f);
-
-/* Is there enough space on the flash? */
-static inline int JFFS_ENOUGH_SPACE(struct jffs_control *c, __u32 space)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
-
- while (1) {
- if ((fmc->flash_size - (fmc->used_size + fmc->dirty_size))
- >= fmc->min_free_size + space) {
- return 1;
- }
- if (fmc->dirty_size < fmc->sector_size)
- return 0;
-
- if (jffs_garbage_collect_now(c)) {
- D1(printk("JFFS_ENOUGH_SPACE: jffs_garbage_collect_now() failed.\n"));
- return 0;
- }
- }
-}
-
-#if CONFIG_JFFS_FS_VERBOSE > 0
-static __u8
-flash_read_u8(struct mtd_info *mtd, loff_t from)
-{
- size_t retlen;
- __u8 ret;
- int res;
-
- res = MTD_READ(mtd, from, 1, &retlen, &ret);
- if (retlen != 1) {
- printk("Didn't read a byte in flash_read_u8(). Returned %d\n", res);
- return 0;
- }
-
- return ret;
-}
-
-static void
-jffs_hexdump(struct mtd_info *mtd, loff_t pos, int size)
-{
- char line[16];
- int j = 0;
-
- while (size > 0) {
- int i;
-
- printk("%ld:", (long) pos);
- for (j = 0; j < 16; j++) {
- line[j] = flash_read_u8(mtd, pos++);
- }
- for (i = 0; i < j; i++) {
- if (!(i & 1)) {
- printk(" %.2x", line[i] & 0xff);
- }
- else {
- printk("%.2x", line[i] & 0xff);
- }
- }
-
- /* Print empty space */
- for (; i < 16; i++) {
- if (!(i & 1)) {
- printk(" ");
- }
- else {
- printk(" ");
- }
- }
- printk(" ");
-
- for (i = 0; i < j; i++) {
- if (isgraph(line[i])) {
- printk("%c", line[i]);
- }
- else {
- printk(".");
- }
- }
- printk("\n");
- size -= 16;
- }
-}
-
-/* Print the contents of a node. */
-static void
-jffs_print_node(struct jffs_node *n)
-{
- D(printk("jffs_node: 0x%p\n", n));
- D(printk("{\n"));
- D(printk(" 0x%08x, /* version */\n", n->version));
- D(printk(" 0x%08x, /* data_offset */\n", n->data_offset));
- D(printk(" 0x%08x, /* data_size */\n", n->data_size));
- D(printk(" 0x%08x, /* removed_size */\n", n->removed_size));
- D(printk(" 0x%08x, /* fm_offset */\n", n->fm_offset));
- D(printk(" 0x%02x, /* name_size */\n", n->name_size));
- D(printk(" 0x%p, /* fm, fm->offset: %u */\n",
- n->fm, (n->fm ? n->fm->offset : 0)));
- D(printk(" 0x%p, /* version_prev */\n", n->version_prev));
- D(printk(" 0x%p, /* version_next */\n", n->version_next));
- D(printk(" 0x%p, /* range_prev */\n", n->range_prev));
- D(printk(" 0x%p, /* range_next */\n", n->range_next));
- D(printk("}\n"));
-}
-
-#endif
-
-/* Print the contents of a raw inode. */
-static void
-jffs_print_raw_inode(struct jffs_raw_inode *raw_inode)
-{
- D(printk("jffs_raw_inode: inode number: %u\n", raw_inode->ino));
- D(printk("{\n"));
- D(printk(" 0x%08x, /* magic */\n", raw_inode->magic));
- D(printk(" 0x%08x, /* ino */\n", raw_inode->ino));
- D(printk(" 0x%08x, /* pino */\n", raw_inode->pino));
- D(printk(" 0x%08x, /* version */\n", raw_inode->version));
- D(printk(" 0x%08x, /* mode */\n", raw_inode->mode));
- D(printk(" 0x%04x, /* uid */\n", raw_inode->uid));
- D(printk(" 0x%04x, /* gid */\n", raw_inode->gid));
- D(printk(" 0x%08x, /* atime */\n", raw_inode->atime));
- D(printk(" 0x%08x, /* mtime */\n", raw_inode->mtime));
- D(printk(" 0x%08x, /* ctime */\n", raw_inode->ctime));
- D(printk(" 0x%08x, /* offset */\n", raw_inode->offset));
- D(printk(" 0x%08x, /* dsize */\n", raw_inode->dsize));
- D(printk(" 0x%08x, /* rsize */\n", raw_inode->rsize));
- D(printk(" 0x%02x, /* nsize */\n", raw_inode->nsize));
- D(printk(" 0x%02x, /* nlink */\n", raw_inode->nlink));
- D(printk(" 0x%02x, /* spare */\n",
- raw_inode->spare));
- D(printk(" %u, /* rename */\n",
- raw_inode->rename));
- D(printk(" %u, /* deleted */\n",
- raw_inode->deleted));
- D(printk(" 0x%02x, /* accurate */\n",
- raw_inode->accurate));
- D(printk(" 0x%08x, /* dchksum */\n", raw_inode->dchksum));
- D(printk(" 0x%04x, /* nchksum */\n", raw_inode->nchksum));
- D(printk(" 0x%04x, /* chksum */\n", raw_inode->chksum));
- D(printk("}\n"));
-}
-
-#define flash_safe_acquire(arg)
-#define flash_safe_release(arg)
-
-
-static int
-flash_safe_read(struct mtd_info *mtd, loff_t from,
- u_char *buf, size_t count)
-{
- size_t retlen;
- int res;
-
- D3(printk(KERN_NOTICE "flash_safe_read(%p, %08x, %p, %08x)\n",
- mtd, (unsigned int) from, buf, count));
-
- res = mtd->read(mtd, from, count, &retlen, buf);
- if (retlen != count) {
- panic("Didn't read all bytes in flash_safe_read(). Returned %d\n", res);
- }
- return res?res:retlen;
-}
-
-
-static __u32
-flash_read_u32(struct mtd_info *mtd, loff_t from)
-{
- size_t retlen;
- __u32 ret;
- int res;
-
- res = mtd->read(mtd, from, 4, &retlen, (unsigned char *)&ret);
- if (retlen != 4) {
- printk("Didn't read all bytes in flash_read_u32(). Returned %d\n", res);
- return 0;
- }
-
- return ret;
-}
-
-
-static int
-flash_safe_write(struct mtd_info *mtd, loff_t to,
- const u_char *buf, size_t count)
-{
- size_t retlen;
- int res;
-
- D3(printk(KERN_NOTICE "flash_safe_write(%p, %08x, %p, %08x)\n",
- mtd, (unsigned int) to, buf, count));
-
- res = mtd->write(mtd, to, count, &retlen, buf);
- if (retlen != count) {
- printk("Didn't write all bytes in flash_safe_write(). Returned %d\n", res);
- }
- return res?res:retlen;
-}
-
-
-static int
-flash_safe_writev(struct mtd_info *mtd, const struct kvec *vecs,
- unsigned long iovec_cnt, loff_t to)
-{
- size_t retlen, retlen_a;
- int i;
- int res;
-
- D3(printk(KERN_NOTICE "flash_safe_writev(%p, %08x, %p)\n",
- mtd, (unsigned int) to, vecs));
-
- if (mtd->writev) {
- res = mtd->writev(mtd, vecs, iovec_cnt, to, &retlen);
- return res ? res : retlen;
- }
- /* Not implemented writev. Repeatedly use write - on the not so
- unreasonable assumption that the mtd driver doesn't care how
- many write cycles we use. */
- res=0;
- retlen=0;
-
- for (i=0; !res && i<iovec_cnt; i++) {
- res = mtd->write(mtd, to, vecs[i].iov_len, &retlen_a,
- vecs[i].iov_base);
- if (retlen_a != vecs[i].iov_len) {
- printk("Didn't write all bytes in flash_safe_writev(). Returned %d\n", res);
- if (i != iovec_cnt-1)
- return -EIO;
- }
- /* If res is non-zero, retlen_a is undefined, but we don't
- care because in that case it's not going to be
- returned anyway.
- */
- to += retlen_a;
- retlen += retlen_a;
- }
- return res?res:retlen;
-}
-
-
-static int
-flash_memset(struct mtd_info *mtd, loff_t to,
- const u_char c, size_t size)
-{
- static unsigned char pattern[64];
- int i;
-
- /* fill up pattern */
-
- for(i = 0; i < 64; i++)
- pattern[i] = c;
-
- /* write as many 64-byte chunks as we can */
-
- while (size >= 64) {
- flash_safe_write(mtd, to, pattern, 64);
- size -= 64;
- to += 64;
- }
-
- /* and the rest */
-
- if(size)
- flash_safe_write(mtd, to, pattern, size);
-
- return size;
-}
-
-
-static void
-intrep_erase_callback(struct erase_info *done)
-{
- wait_queue_head_t *wait_q;
-
- wait_q = (wait_queue_head_t *)done->priv;
-
- wake_up(wait_q);
-}
-
-
-static int
-flash_erase_region(struct mtd_info *mtd, loff_t start,
- size_t size)
-{
- struct erase_info *erase;
- DECLARE_WAITQUEUE(wait, current);
- wait_queue_head_t wait_q;
-
- erase = kmalloc(sizeof(struct erase_info), GFP_KERNEL);
- if (!erase)
- return -ENOMEM;
-
- init_waitqueue_head(&wait_q);
-
- erase->mtd = mtd;
- erase->callback = intrep_erase_callback;
- erase->addr = start;
- erase->len = size;
- erase->priv = (u_long)&wait_q;
-
- /* FIXME: Use TASK_INTERRUPTIBLE and deal with being interrupted */
- set_current_state(TASK_UNINTERRUPTIBLE);
- add_wait_queue(&wait_q, &wait);
-
- if (mtd->erase(mtd, erase) < 0) {
- set_current_state(TASK_RUNNING);
- remove_wait_queue(&wait_q, &wait);
- kfree(erase);
-
- printk(KERN_WARNING "flash: erase of region [0x%lx, 0x%lx] "
- "totally failed\n", (long)start, (long)start + size);
-
- return -1;
- }
-
- schedule(); /* Wait for flash to finish. */
- remove_wait_queue(&wait_q, &wait);
-
- kfree(erase);
-
- return 0;
-}
-
-/* This routine calculates checksums in JFFS. */
-static __u32
-jffs_checksum(const void *data, int size)
-{
- __u32 sum = 0;
- __u8 *ptr = (__u8 *)data;
- while (size-- > 0) {
- sum += *ptr++;
- }
- D3(printk(", result: 0x%08x\n", sum));
- return sum;
-}
-
-
-static int
-jffs_checksum_flash(struct mtd_info *mtd, loff_t start, int size, __u32 *result)
-{
- __u32 sum = 0;
- loff_t ptr = start;
- __u8 *read_buf;
- int i, length;
-
- /* Allocate read buffer */
- read_buf = kmalloc(sizeof(__u8) * 4096, GFP_KERNEL);
- if (!read_buf) {
- printk(KERN_NOTICE "kmalloc failed in jffs_checksum_flash()\n");
- return -ENOMEM;
- }
- /* Loop until checksum done */
- while (size) {
- /* Get amount of data to read */
- if (size < 4096)
- length = size;
- else
- length = 4096;
-
- /* Perform flash read */
- D3(printk(KERN_NOTICE "jffs_checksum_flash\n"));
- flash_safe_read(mtd, ptr, &read_buf[0], length);
-
- /* Compute checksum */
- for (i=0; i < length ; i++)
- sum += read_buf[i];
-
- /* Update pointer and size */
- size -= length;
- ptr += length;
- }
-
- /* Free read buffer */
- kfree(read_buf);
-
- /* Return result */
- D3(printk("checksum result: 0x%08x\n", sum));
- *result = sum;
- return 0;
-}
-
-static __inline__ void jffs_fm_write_lock(struct jffs_fmcontrol *fmc)
-{
- // down(&fmc->wlock);
-}
-
-static __inline__ void jffs_fm_write_unlock(struct jffs_fmcontrol *fmc)
-{
- // up(&fmc->wlock);
-}
-
-
-/* Create and initialize a new struct jffs_file. */
-static struct jffs_file *
-jffs_create_file(struct jffs_control *c,
- const struct jffs_raw_inode *raw_inode)
-{
- struct jffs_file *f;
-
- if (!(f = kzalloc(sizeof(*f), GFP_KERNEL))) {
- D(printk("jffs_create_file(): Failed!\n"));
- return NULL;
- }
- no_jffs_file++;
- f->ino = raw_inode->ino;
- f->pino = raw_inode->pino;
- f->nlink = raw_inode->nlink;
- f->deleted = raw_inode->deleted;
- f->c = c;
-
- return f;
-}
-
-
-/* Build a control block for the file system. */
-static struct jffs_control *
-jffs_create_control(struct super_block *sb)
-{
- struct jffs_control *c;
- register int s = sizeof(struct jffs_control);
- int i;
- D(char *t = 0);
-
- D2(printk("jffs_create_control()\n"));
-
- if (!(c = kmalloc(s, GFP_KERNEL))) {
- goto fail_control;
- }
- DJM(no_jffs_control++);
- c->root = NULL;
- c->gc_task = NULL;
- c->hash_len = JFFS_HASH_SIZE;
- s = sizeof(struct list_head) * c->hash_len;
- if (!(c->hash = kmalloc(s, GFP_KERNEL))) {
- goto fail_hash;
- }
- DJM(no_hash++);
- for (i = 0; i < c->hash_len; i++)
- INIT_LIST_HEAD(&c->hash[i]);
- if (!(c->fmc = jffs_build_begin(c, MINOR(sb->s_dev)))) {
- goto fail_fminit;
- }
- c->next_ino = JFFS_MIN_INO + 1;
- c->delete_list = (struct jffs_delete_list *) 0;
- return c;
-
-fail_fminit:
- D(t = "c->fmc");
-fail_hash:
- kfree(c);
- DJM(no_jffs_control--);
- D(t = t ? t : "c->hash");
-fail_control:
- D(t = t ? t : "control");
- D(printk("jffs_create_control(): Allocation failed: (%s)\n", t));
- return (struct jffs_control *)0;
-}
-
-
-/* Clean up all data structures associated with the file system. */
-void
-jffs_cleanup_control(struct jffs_control *c)
-{
- D2(printk("jffs_cleanup_control()\n"));
-
- if (!c) {
- D(printk("jffs_cleanup_control(): c == NULL !!!\n"));
- return;
- }
-
- while (c->delete_list) {
- struct jffs_delete_list *delete_list_element;
- delete_list_element = c->delete_list;
- c->delete_list = c->delete_list->next;
- kfree(delete_list_element);
- }
-
- /* Free all files and nodes. */
- if (c->hash) {
- jffs_foreach_file(c, jffs_free_node_list);
- jffs_foreach_file(c, jffs_free_file);
- kfree(c->hash);
- DJM(no_hash--);
- }
- jffs_cleanup_fmcontrol(c->fmc);
- kfree(c);
- DJM(no_jffs_control--);
- D3(printk("jffs_cleanup_control(): Leaving...\n"));
-}
-
-
-/* This function adds a virtual root node to the in-RAM representation.
- Called by jffs_build_fs(). */
-static int
-jffs_add_virtual_root(struct jffs_control *c)
-{
- struct jffs_file *root;
- struct jffs_node *node;
-
- D2(printk("jffs_add_virtual_root(): "
- "Creating a virtual root directory.\n"));
-
- if (!(root = kzalloc(sizeof(struct jffs_file), GFP_KERNEL))) {
- return -ENOMEM;
- }
- no_jffs_file++;
- if (!(node = jffs_alloc_node())) {
- kfree(root);
- no_jffs_file--;
- return -ENOMEM;
- }
- DJM(no_jffs_node++);
- memset(node, 0, sizeof(struct jffs_node));
- node->ino = JFFS_MIN_INO;
- root->ino = JFFS_MIN_INO;
- root->mode = S_IFDIR | S_IRWXU | S_IRGRP
- | S_IXGRP | S_IROTH | S_IXOTH;
- root->atime = root->mtime = root->ctime = get_seconds();
- root->nlink = 1;
- root->c = c;
- root->version_head = root->version_tail = node;
- jffs_insert_file_into_hash(root);
- return 0;
-}
-
-
-/* This is where the file system is built and initialized. */
-int
-jffs_build_fs(struct super_block *sb)
-{
- struct jffs_control *c;
- int err = 0;
-
- D2(printk("jffs_build_fs()\n"));
-
- if (!(c = jffs_create_control(sb))) {
- return -ENOMEM;
- }
- c->building_fs = 1;
- c->sb = sb;
- if ((err = jffs_scan_flash(c)) < 0) {
- if(err == -EAGAIN){
- /* scan_flash() wants us to try once more. A flipping
- bits sector was detect in the middle of the scan flash.
- Clean up old allocated memory before going in.
- */
- D1(printk("jffs_build_fs: Cleaning up all control structures,"
- " reallocating them and trying mount again.\n"));
- jffs_cleanup_control(c);
- if (!(c = jffs_create_control(sb))) {
- return -ENOMEM;
- }
- c->building_fs = 1;
- c->sb = sb;
-
- if ((err = jffs_scan_flash(c)) < 0) {
- goto jffs_build_fs_fail;
- }
- }else{
- goto jffs_build_fs_fail;
- }
- }
-
- /* Add a virtual root node if no one exists. */
- if (!jffs_find_file(c, JFFS_MIN_INO)) {
- if ((err = jffs_add_virtual_root(c)) < 0) {
- goto jffs_build_fs_fail;
- }
- }
-
- while (c->delete_list) {
- struct jffs_file *f;
- struct jffs_delete_list *delete_list_element;
-
- if ((f = jffs_find_file(c, c->delete_list->ino))) {
- f->deleted = 1;
- }
- delete_list_element = c->delete_list;
- c->delete_list = c->delete_list->next;
- kfree(delete_list_element);
- }
-
- /* Remove deleted nodes. */
- if ((err = jffs_foreach_file(c, jffs_possibly_delete_file)) < 0) {
- printk(KERN_ERR "JFFS: Failed to remove deleted nodes.\n");
- goto jffs_build_fs_fail;
- }
- /* Remove redundant nodes. (We are not interested in the
- return value in this case.) */
- jffs_foreach_file(c, jffs_remove_redundant_nodes);
- /* Try to build a tree from all the nodes. */
- if ((err = jffs_foreach_file(c, jffs_insert_file_into_tree)) < 0) {
- printk("JFFS: Failed to build tree.\n");
- goto jffs_build_fs_fail;
- }
- /* Compute the sizes of all files in the filesystem. Adjust if
- necessary. */
- if ((err = jffs_foreach_file(c, jffs_build_file)) < 0) {
- printk("JFFS: Failed to build file system.\n");
- goto jffs_build_fs_fail;
- }
- sb->s_fs_info = (void *)c;
- c->building_fs = 0;
-
- D1(jffs_print_hash_table(c));
- D1(jffs_print_tree(c->root, 0));
-
- return 0;
-
-jffs_build_fs_fail:
- jffs_cleanup_control(c);
- return err;
-} /* jffs_build_fs() */
-
-
-/*
- This checks for sectors that were being erased in their previous
- lifetimes and for some reason or the other (power fail etc.),
- the erase cycles never completed.
- As the flash array would have reverted back to read status,
- these sectors are detected by the symptom of the "flipping bits",
- i.e. bits being read back differently from the same location in
- flash if read multiple times.
- The only solution to this is to re-erase the entire
- sector.
- Unfortunately detecting "flipping bits" is not a simple exercise
- as a bit may be read back at 1 or 0 depending on the alignment
- of the stars in the universe.
- The level of confidence is in direct proportion to the number of
- scans done. By power fail testing I (Vipin) have been able to
- proove that reading twice is not enough.
- Maybe 4 times? Change NUM_REREADS to a higher number if you want
- a (even) higher degree of confidence in your mount process.
- A higher number would of course slow down your mount.
-*/
-static int check_partly_erased_sectors(struct jffs_fmcontrol *fmc){
-
-#define NUM_REREADS 4 /* see note above */
-#define READ_AHEAD_BYTES 4096 /* must be a multiple of 4,
- usually set to kernel page size */
-
- __u8 *read_buf1;
- __u8 *read_buf2;
-
- int err = 0;
- int retlen;
- int i;
- int cnt;
- __u32 offset;
- loff_t pos = 0;
- loff_t end = fmc->flash_size;
-
-
- /* Allocate read buffers */
- read_buf1 = kmalloc(sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
- if (!read_buf1)
- return -ENOMEM;
-
- read_buf2 = kmalloc(sizeof(__u8) * READ_AHEAD_BYTES, GFP_KERNEL);
- if (!read_buf2) {
- kfree(read_buf1);
- return -ENOMEM;
- }
-
- CHECK_NEXT:
- while(pos < end){
-
- D1(printk("check_partly_erased_sector():checking sector which contains"
- " offset 0x%x for flipping bits..\n", (__u32)pos));
-
- retlen = flash_safe_read(fmc->mtd, pos,
- &read_buf1[0], READ_AHEAD_BYTES);
- retlen &= ~3;
-
- for(cnt = 0; cnt < NUM_REREADS; cnt++){
- (void)flash_safe_read(fmc->mtd, pos,
- &read_buf2[0], READ_AHEAD_BYTES);
-
- for (i=0 ; i < retlen ; i+=4) {
- /* buffers MUST match, double word for word! */
- if(*((__u32 *) &read_buf1[i]) !=
- *((__u32 *) &read_buf2[i])
- ){
- /* flipping bits detected, time to erase sector */
- /* This will help us log some statistics etc. */
- D1(printk("Flipping bits detected in re-read round:%i of %i\n",
- cnt, NUM_REREADS));
- D1(printk("check_partly_erased_sectors:flipping bits detected"
- " @offset:0x%x(0x%x!=0x%x)\n",
- (__u32)pos+i, *((__u32 *) &read_buf1[i]),
- *((__u32 *) &read_buf2[i])));
-
- /* calculate start of present sector */
- offset = (((__u32)pos+i)/(__u32)fmc->sector_size) * (__u32)fmc->sector_size;
-
- D1(printk("check_partly_erased_sector():erasing sector starting 0x%x.\n",
- offset));
-
- if (flash_erase_region(fmc->mtd,
- offset, fmc->sector_size) < 0) {
- printk(KERN_ERR "JFFS: Erase of flash failed. "
- "offset = %u, erase_size = %d\n",
- offset , fmc->sector_size);
-
- err = -EIO;
- goto returnBack;
-
- }else{
- D1(printk("JFFS: Erase of flash sector @0x%x successful.\n",
- offset));
- /* skip ahead to the next sector */
- pos = (((__u32)pos+i)/(__u32)fmc->sector_size) * (__u32)fmc->sector_size;
- pos += fmc->sector_size;
- goto CHECK_NEXT;
- }
- }
- }
- }
- pos += READ_AHEAD_BYTES;
- }
-
- returnBack:
- kfree(read_buf1);
- kfree(read_buf2);
-
- D2(printk("check_partly_erased_sector():Done checking all sectors till offset 0x%x for flipping bits.\n",
- (__u32)pos));
-
- return err;
-
-}/* end check_partly_erased_sectors() */
-
-
-
-/* Scan the whole flash memory in order to find all nodes in the
- file systems. */
-static int
-jffs_scan_flash(struct jffs_control *c)
-{
- char name[JFFS_MAX_NAME_LEN + 2];
- struct jffs_raw_inode raw_inode;
- struct jffs_node *node = NULL;
- struct jffs_fmcontrol *fmc = c->fmc;
- __u32 checksum;
- __u8 tmp_accurate;
- __u16 tmp_chksum;
- __u32 deleted_file;
- loff_t pos = 0;
- loff_t start;
- loff_t test_start;
- loff_t end = fmc->flash_size;
- __u8 *read_buf;
- int i, len, retlen;
- __u32 offset;
-
- __u32 free_chunk_size1;
- __u32 free_chunk_size2;
-
-
-#define NUMFREEALLOWED 2 /* 2 chunks of at least erase size space allowed */
- int num_free_space = 0; /* Flag err if more than TWO
- free blocks found. This is NOT allowed
- by the current jffs design.
- */
- int num_free_spc_not_accp = 0; /* For debugging purposed keep count
- of how much free space was rejected and
- marked dirty
- */
-
- D1(printk("jffs_scan_flash(): start pos = 0x%lx, end = 0x%lx\n",
- (long)pos, (long)end));
-
- flash_safe_acquire(fmc->mtd);
-
- /*
- check and make sure that any sector does not suffer
- from the "partly erased, bit flipping syndrome" (TM Vipin :)
- If so, offending sectors will be erased.
- */
- if(check_partly_erased_sectors(fmc) < 0){
-
- flash_safe_release(fmc->mtd);
- return -EIO; /* bad, bad, bad error. Cannot continue.*/
- }
-
- /* Allocate read buffer */
- read_buf = kmalloc(sizeof(__u8) * 4096, GFP_KERNEL);
- if (!read_buf) {
- flash_safe_release(fmc->mtd);
- return -ENOMEM;
- }
-
- /* Start the scan. */
- while (pos < end) {
- deleted_file = 0;
-
- /* Remember the position from where we started this scan. */
- start = pos;
-
- switch (flash_read_u32(fmc->mtd, pos)) {
- case JFFS_EMPTY_BITMASK:
- /* We have found 0xffffffff at this position. We have to
- scan the rest of the flash till the end or till
- something else than 0xffffffff is found.
- Keep going till we do not find JFFS_EMPTY_BITMASK
- anymore */
-
- D1(printk("jffs_scan_flash(): 0xffffffff at pos 0x%lx.\n",
- (long)pos));
-
- while(pos < end){
-
- len = end - pos < 4096 ? end - pos : 4096;
-
- retlen = flash_safe_read(fmc->mtd, pos,
- &read_buf[0], len);
-
- retlen &= ~3;
-
- for (i=0 ; i < retlen ; i+=4, pos += 4) {
- if(*((__u32 *) &read_buf[i]) !=
- JFFS_EMPTY_BITMASK)
- break;
- }
- if (i == retlen)
- continue;
- else
- break;
- }
-
- D1(printk("jffs_scan_flash():0xffffffff ended at pos 0x%lx.\n",
- (long)pos));
-
- /* If some free space ends in the middle of a sector,
- treat it as dirty rather than clean.
- This is to handle the case where one thread
- allocated space for a node, but didn't get to
- actually _write_ it before power was lost, leaving
- a gap in the log. Shifting all node writes into
- a single kernel thread will fix the original problem.
- */
- if ((__u32) pos % fmc->sector_size) {
- /* If there was free space in previous
- sectors, don't mark that dirty too -
- only from the beginning of this sector
- (or from start)
- */
-
- test_start = pos & ~(fmc->sector_size-1); /* end of last sector */
-
- if (start < test_start) {
-
- /* free space started in the previous sector! */
-
- if((num_free_space < NUMFREEALLOWED) &&
- ((unsigned int)(test_start - start) >= fmc->sector_size)){
-
- /*
- Count it in if we are still under NUMFREEALLOWED *and* it is
- at least 1 erase sector in length. This will keep us from
- picking any little ole' space as "free".
- */
-
- D1(printk("Reducing end of free space to 0x%x from 0x%x\n",
- (unsigned int)test_start, (unsigned int)pos));
-
- D1(printk("Free space accepted: Starting 0x%x for 0x%x bytes\n",
- (unsigned int) start,
- (unsigned int)(test_start - start)));
-
- /* below, space from "start" to "pos" will be marked dirty. */
- start = test_start;
-
- /* Being in here means that we have found at least an entire
- erase sector size of free space ending on a sector boundary.
- Keep track of free spaces accepted.
- */
- num_free_space++;
- }else{
- num_free_spc_not_accp++;
- D1(printk("Free space (#%i) found but *Not* accepted: Starting"
- " 0x%x for 0x%x bytes\n",
- num_free_spc_not_accp, (unsigned int)start,
- (unsigned int)((unsigned int)(pos & ~(fmc->sector_size-1)) - (unsigned int)start)));
-
- }
-
- }
- if((((__u32)(pos - start)) != 0)){
-
- D1(printk("Dirty space: Starting 0x%x for 0x%x bytes\n",
- (unsigned int) start, (unsigned int) (pos - start)));
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- }else{
- /* "Flipping bits" detected. This means that our scan for them
- did not catch this offset. See check_partly_erased_sectors() for
- more info.
- */
-
- D1(printk("jffs_scan_flash():wants to allocate dirty flash "
- "space for 0 bytes.\n"));
- D1(printk("jffs_scan_flash(): Flipping bits! We will free "
- "all allocated memory, erase this sector and remount\n"));
-
- /* calculate start of present sector */
- offset = (((__u32)pos)/(__u32)fmc->sector_size) * (__u32)fmc->sector_size;
-
- D1(printk("jffs_scan_flash():erasing sector starting 0x%x.\n",
- offset));
-
- if (flash_erase_region(fmc->mtd,
- offset, fmc->sector_size) < 0) {
- printk(KERN_ERR "JFFS: Erase of flash failed. "
- "offset = %u, erase_size = %d\n",
- offset , fmc->sector_size);
-
- flash_safe_release(fmc->mtd);
- kfree(read_buf);
- return -1; /* bad, bad, bad! */
-
- }
- flash_safe_release(fmc->mtd);
- kfree(read_buf);
-
- return -EAGAIN; /* erased offending sector. Try mount one more time please. */
- }
- }else{
- /* Being in here means that we have found free space that ends on an erase sector
- boundary.
- Count it in if we are still under NUMFREEALLOWED *and* it is at least 1 erase
- sector in length. This will keep us from picking any little ole' space as "free".
- */
- if((num_free_space < NUMFREEALLOWED) &&
- ((unsigned int)(pos - start) >= fmc->sector_size)){
- /* We really don't do anything to mark space as free, except *not*
- mark it dirty and just advance the "pos" location pointer.
- It will automatically be picked up as free space.
- */
- num_free_space++;
- D1(printk("Free space accepted: Starting 0x%x for 0x%x bytes\n",
- (unsigned int) start, (unsigned int) (pos - start)));
- }else{
- num_free_spc_not_accp++;
- D1(printk("Free space (#%i) found but *Not* accepted: Starting "
- "0x%x for 0x%x bytes\n", num_free_spc_not_accp,
- (unsigned int) start,
- (unsigned int) (pos - start)));
-
- /* Mark this space as dirty. We already have our free space. */
- D1(printk("Dirty space: Starting 0x%x for 0x%x bytes\n",
- (unsigned int) start, (unsigned int) (pos - start)));
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- }
-
- }
- if(num_free_space > NUMFREEALLOWED){
- printk(KERN_WARNING "jffs_scan_flash(): Found free space "
- "number %i. Only %i free space is allowed.\n",
- num_free_space, NUMFREEALLOWED);
- }
- continue;
-
- case JFFS_DIRTY_BITMASK:
- /* We have found 0x00000000 at this position. Scan as far
- as possible to find out how much is dirty. */
- D1(printk("jffs_scan_flash(): 0x00000000 at pos 0x%lx.\n",
- (long)pos));
- for (; pos < end
- && JFFS_DIRTY_BITMASK == flash_read_u32(fmc->mtd, pos);
- pos += 4);
- D1(printk("jffs_scan_flash(): 0x00 ended at "
- "pos 0x%lx.\n", (long)pos));
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- continue;
-
- case JFFS_MAGIC_BITMASK:
- /* We have probably found a new raw inode. */
- break;
-
- default:
- bad_inode:
- /* We're f*cked. This is not solved yet. We have
- to scan for the magic pattern. */
- D1(printk("*************** Dirty flash memory or "
- "bad inode: "
- "hexdump(pos = 0x%lx, len = 128):\n",
- (long)pos));
- D1(jffs_hexdump(fmc->mtd, pos, 128));
-
- for (pos += 4; pos < end; pos += 4) {
- switch (flash_read_u32(fmc->mtd, pos)) {
- case JFFS_MAGIC_BITMASK:
- case JFFS_EMPTY_BITMASK:
- /* handle these in the main switch() loop */
- goto cont_scan;
-
- default:
- break;
- }
- }
-
- cont_scan:
- /* First, mark as dirty the region
- which really does contain crap. */
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start),
- NULL);
-
- continue;
- }/* switch */
-
- /* We have found the beginning of an inode. Create a
- node for it unless there already is one available. */
- if (!node) {
- if (!(node = jffs_alloc_node())) {
- /* Free read buffer */
- kfree(read_buf);
-
- /* Release the flash device */
- flash_safe_release(fmc->mtd);
-
- return -ENOMEM;
- }
- DJM(no_jffs_node++);
- }
-
- /* Read the next raw inode. */
-
- flash_safe_read(fmc->mtd, pos, (u_char *) &raw_inode,
- sizeof(struct jffs_raw_inode));
-
- /* When we compute the checksum for the inode, we never
- count the 'accurate' or the 'checksum' fields. */
- tmp_accurate = raw_inode.accurate;
- tmp_chksum = raw_inode.chksum;
- raw_inode.accurate = 0;
- raw_inode.chksum = 0;
- checksum = jffs_checksum(&raw_inode,
- sizeof(struct jffs_raw_inode));
- raw_inode.accurate = tmp_accurate;
- raw_inode.chksum = tmp_chksum;
-
- D3(printk("*** We have found this raw inode at pos 0x%lx "
- "on the flash:\n", (long)pos));
- D3(jffs_print_raw_inode(&raw_inode));
-
- if (checksum != raw_inode.chksum) {
- D1(printk("jffs_scan_flash(): Bad checksum: "
- "checksum = %u, "
- "raw_inode.chksum = %u\n",
- checksum, raw_inode.chksum));
- pos += sizeof(struct jffs_raw_inode);
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- /* Reuse this unused struct jffs_node. */
- continue;
- }
-
- /* Check the raw inode read so far. Start with the
- maximum length of the filename. */
- if (raw_inode.nsize > JFFS_MAX_NAME_LEN) {
- printk(KERN_WARNING "jffs_scan_flash: Found a "
- "JFFS node with name too large\n");
- goto bad_inode;
- }
-
- if (raw_inode.rename && raw_inode.dsize != sizeof(__u32)) {
- printk(KERN_WARNING "jffs_scan_flash: Found a "
- "rename node with dsize %u.\n",
- raw_inode.dsize);
- jffs_print_raw_inode(&raw_inode);
- goto bad_inode;
- }
-
- /* The node's data segment should not exceed a
- certain length. */
- if (raw_inode.dsize > fmc->max_chunk_size) {
- printk(KERN_WARNING "jffs_scan_flash: Found a "
- "JFFS node with dsize (0x%x) > max_chunk_size (0x%x)\n",
- raw_inode.dsize, fmc->max_chunk_size);
- goto bad_inode;
- }
-
- pos += sizeof(struct jffs_raw_inode);
-
- /* This shouldn't be necessary because a node that
- violates the flash boundaries shouldn't be written
- in the first place. */
- if (pos >= end) {
- goto check_node;
- }
-
- /* Read the name. */
- *name = 0;
- if (raw_inode.nsize) {
- flash_safe_read(fmc->mtd, pos, name, raw_inode.nsize);
- name[raw_inode.nsize] = '\0';
- pos += raw_inode.nsize
- + JFFS_GET_PAD_BYTES(raw_inode.nsize);
- D3(printk("name == \"%s\"\n", name));
- checksum = jffs_checksum(name, raw_inode.nsize);
- if (checksum != raw_inode.nchksum) {
- D1(printk("jffs_scan_flash(): Bad checksum: "
- "checksum = %u, "
- "raw_inode.nchksum = %u\n",
- checksum, raw_inode.nchksum));
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- /* Reuse this unused struct jffs_node. */
- continue;
- }
- if (pos >= end) {
- goto check_node;
- }
- }
-
- /* Read the data, if it exists, in order to be sure it
- matches the checksum. */
- if (raw_inode.dsize) {
- if (raw_inode.rename) {
- deleted_file = flash_read_u32(fmc->mtd, pos);
- }
- if (jffs_checksum_flash(fmc->mtd, pos, raw_inode.dsize, &checksum)) {
- printk("jffs_checksum_flash() failed to calculate a checksum\n");
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- /* Reuse this unused struct jffs_node. */
- continue;
- }
- pos += raw_inode.dsize
- + JFFS_GET_PAD_BYTES(raw_inode.dsize);
-
- if (checksum != raw_inode.dchksum) {
- D1(printk("jffs_scan_flash(): Bad checksum: "
- "checksum = %u, "
- "raw_inode.dchksum = %u\n",
- checksum, raw_inode.dchksum));
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- /* Reuse this unused struct jffs_node. */
- continue;
- }
- }
-
- check_node:
-
- /* Remember the highest inode number in the whole file
- system. This information will be used when assigning
- new files new inode numbers. */
- if (c->next_ino <= raw_inode.ino) {
- c->next_ino = raw_inode.ino + 1;
- }
-
- if (raw_inode.accurate) {
- int err;
- node->data_offset = raw_inode.offset;
- node->data_size = raw_inode.dsize;
- node->removed_size = raw_inode.rsize;
- /* Compute the offset to the actual data in the
- on-flash node. */
- node->fm_offset
- = sizeof(struct jffs_raw_inode)
- + raw_inode.nsize
- + JFFS_GET_PAD_BYTES(raw_inode.nsize);
- node->fm = jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start),
- node);
- if (!node->fm) {
- D(printk("jffs_scan_flash(): !node->fm\n"));
- jffs_free_node(node);
- DJM(no_jffs_node--);
-
- /* Free read buffer */
- kfree(read_buf);
-
- /* Release the flash device */
- flash_safe_release(fmc->mtd);
-
- return -ENOMEM;
- }
- if ((err = jffs_insert_node(c, NULL, &raw_inode,
- name, node)) < 0) {
- printk("JFFS: Failed to handle raw inode. "
- "(err = %d)\n", err);
- break;
- }
- if (raw_inode.rename) {
- struct jffs_delete_list *dl
- = (struct jffs_delete_list *)
- kmalloc(sizeof(struct jffs_delete_list),
- GFP_KERNEL);
- if (!dl) {
- D(printk("jffs_scan_flash: !dl\n"));
- jffs_free_node(node);
- DJM(no_jffs_node--);
-
- /* Release the flash device */
- flash_safe_release(fmc->flash_part);
-
- /* Free read buffer */
- kfree(read_buf);
-
- return -ENOMEM;
- }
- dl->ino = deleted_file;
- dl->next = c->delete_list;
- c->delete_list = dl;
- node->data_size = 0;
- }
- D3(jffs_print_node(node));
- node = NULL; /* Don't free the node! */
- }
- else {
- jffs_fmalloced(fmc, (__u32) start,
- (__u32) (pos - start), NULL);
- D3(printk("jffs_scan_flash(): Just found an obsolete "
- "raw_inode. Continuing the scan...\n"));
- /* Reuse this unused struct jffs_node. */
- }
- }
-
- if (node) {
- jffs_free_node(node);
- DJM(no_jffs_node--);
- }
- jffs_build_end(fmc);
-
- /* Free read buffer */
- kfree(read_buf);
-
- if(!num_free_space){
- printk(KERN_WARNING "jffs_scan_flash(): Did not find even a single "
- "chunk of free space. This is BAD!\n");
- }
-
- /* Return happy */
- D3(printk("jffs_scan_flash(): Leaving...\n"));
- flash_safe_release(fmc->mtd);
-
- /* This is to trap the "free size accounting screwed error. */
- free_chunk_size1 = jffs_free_size1(fmc);
- free_chunk_size2 = jffs_free_size2(fmc);
-
- if (free_chunk_size1 + free_chunk_size2 != fmc->free_size) {
-
- printk(KERN_WARNING "jffs_scan_falsh():Free size accounting screwed\n");
- printk(KERN_WARNING "jfffs_scan_flash():free_chunk_size1 == 0x%x, "
- "free_chunk_size2 == 0x%x, fmc->free_size == 0x%x\n",
- free_chunk_size1, free_chunk_size2, fmc->free_size);
-
- return -1; /* Do NOT mount f/s so that we can inspect what happened.
- Mounting this screwed up f/s will screw us up anyway.
- */
- }
-
- return 0; /* as far as we are concerned, we are happy! */
-} /* jffs_scan_flash() */
-
-
-/* Insert any kind of node into the file system. Take care of data
- insertions and deletions. Also remove redundant information. The
- memory allocated for the `name' is regarded as "given away" in the
- caller's perspective. */
-int
-jffs_insert_node(struct jffs_control *c, struct jffs_file *f,
- const struct jffs_raw_inode *raw_inode,
- const char *name, struct jffs_node *node)
-{
- int update_name = 0;
- int insert_into_tree = 0;
-
- D2(printk("jffs_insert_node(): ino = %u, version = %u, "
- "name = \"%s\", deleted = %d\n",
- raw_inode->ino, raw_inode->version,
- ((name && *name) ? name : ""), raw_inode->deleted));
-
- /* If there doesn't exist an associated jffs_file, then
- create, initialize and insert one into the file system. */
- if (!f && !(f = jffs_find_file(c, raw_inode->ino))) {
- if (!(f = jffs_create_file(c, raw_inode))) {
- return -ENOMEM;
- }
- jffs_insert_file_into_hash(f);
- insert_into_tree = 1;
- }
- node->ino = raw_inode->ino;
- node->version = raw_inode->version;
- node->data_size = raw_inode->dsize;
- node->fm_offset = sizeof(struct jffs_raw_inode) + raw_inode->nsize
- + JFFS_GET_PAD_BYTES(raw_inode->nsize);
- node->name_size = raw_inode->nsize;
-
- /* Now insert the node at the correct position into the file's
- version list. */
- if (!f->version_head) {
- /* This is the first node. */
- f->version_head = node;
- f->version_tail = node;
- node->version_prev = NULL;
- node->version_next = NULL;
- f->highest_version = node->version;
- update_name = 1;
- f->mode = raw_inode->mode;
- f->uid = raw_inode->uid;
- f->gid = raw_inode->gid;
- f->atime = raw_inode->atime;
- f->mtime = raw_inode->mtime;
- f->ctime = raw_inode->ctime;
- }
- else if ((f->highest_version < node->version)
- || (node->version == 0)) {
- /* Insert at the end of the list. I.e. this node is the
- newest one so far. */
- node->version_prev = f->version_tail;
- node->version_next = NULL;
- f->version_tail->version_next = node;
- f->version_tail = node;
- f->highest_version = node->version;
- update_name = 1;
- f->pino = raw_inode->pino;
- f->mode = raw_inode->mode;
- f->uid = raw_inode->uid;
- f->gid = raw_inode->gid;
- f->atime = raw_inode->atime;
- f->mtime = raw_inode->mtime;
- f->ctime = raw_inode->ctime;
- }
- else if (f->version_head->version > node->version) {
- /* Insert at the bottom of the list. */
- node->version_prev = NULL;
- node->version_next = f->version_head;
- f->version_head->version_prev = node;
- f->version_head = node;
- if (!f->name) {
- update_name = 1;
- }
- }
- else {
- struct jffs_node *n;
- int newer_name = 0;
- /* Search for the insertion position starting from
- the tail (newest node). */
- for (n = f->version_tail; n; n = n->version_prev) {
- if (n->version < node->version) {
- node->version_prev = n;
- node->version_next = n->version_next;
- node->version_next->version_prev = node;
- n->version_next = node;
- if (!newer_name) {
- update_name = 1;
- }
- break;
- }
- if (n->name_size) {
- newer_name = 1;
- }
- }
- }
-
- /* Deletion is irreversible. If any 'deleted' node is ever
- written, the file is deleted */
- if (raw_inode->deleted)
- f->deleted = raw_inode->deleted;
-
- /* Perhaps update the name. */
- if (raw_inode->nsize && update_name && name && *name && (name != f->name)) {
- if (f->name) {
- kfree(f->name);
- DJM(no_name--);
- }
- if (!(f->name = kmalloc(raw_inode->nsize + 1,
- GFP_KERNEL))) {
- return -ENOMEM;
- }
- DJM(no_name++);
- memcpy(f->name, name, raw_inode->nsize);
- f->name[raw_inode->nsize] = '\0';
- f->nsize = raw_inode->nsize;
- D3(printk("jffs_insert_node(): Updated the name of "
- "the file to \"%s\".\n", name));
- }
-
- if (!c->building_fs) {
- D3(printk("jffs_insert_node(): ---------------------------"
- "------------------------------------------- 1\n"));
- if (insert_into_tree) {
- jffs_insert_file_into_tree(f);
- }
- /* Once upon a time, we would call jffs_possibly_delete_file()
- here. That causes an oops if someone's still got the file
- open, so now we only do it in jffs_delete_inode()
- -- dwmw2
- */
- if (node->data_size || node->removed_size) {
- jffs_update_file(f, node);
- }
- jffs_remove_redundant_nodes(f);
-
- jffs_garbage_collect_trigger(c);
-
- D3(printk("jffs_insert_node(): ---------------------------"
- "------------------------------------------- 2\n"));
- }
-
- return 0;
-} /* jffs_insert_node() */
-
-
-/* Unlink a jffs_node from the version list it is in. */
-static inline void
-jffs_unlink_node_from_version_list(struct jffs_file *f,
- struct jffs_node *node)
-{
- if (node->version_prev) {
- node->version_prev->version_next = node->version_next;
- } else {
- f->version_head = node->version_next;
- }
- if (node->version_next) {
- node->version_next->version_prev = node->version_prev;
- } else {
- f->version_tail = node->version_prev;
- }
-}
-
-
-/* Unlink a jffs_node from the range list it is in. */
-static inline void
-jffs_unlink_node_from_range_list(struct jffs_file *f, struct jffs_node *node)
-{
- if (node->range_prev) {
- node->range_prev->range_next = node->range_next;
- }
- else {
- f->range_head = node->range_next;
- }
- if (node->range_next) {
- node->range_next->range_prev = node->range_prev;
- }
- else {
- f->range_tail = node->range_prev;
- }
-}
-
-
-/* Function used by jffs_remove_redundant_nodes() below. This function
- classifies what kind of information a node adds to a file. */
-static inline __u8
-jffs_classify_node(struct jffs_node *node)
-{
- __u8 mod_type = JFFS_MODIFY_INODE;
-
- if (node->name_size) {
- mod_type |= JFFS_MODIFY_NAME;
- }
- if (node->data_size || node->removed_size) {
- mod_type |= JFFS_MODIFY_DATA;
- }
- return mod_type;
-}
-
-
-/* Remove redundant nodes from a file. Mark the on-flash memory
- as dirty. */
-static int
-jffs_remove_redundant_nodes(struct jffs_file *f)
-{
- struct jffs_node *newest_node;
- struct jffs_node *cur;
- struct jffs_node *prev;
- __u8 newest_type;
- __u8 mod_type;
- __u8 node_with_name_later = 0;
-
- if (!(newest_node = f->version_tail)) {
- return 0;
- }
-
- /* What does the `newest_node' modify? */
- newest_type = jffs_classify_node(newest_node);
- node_with_name_later = newest_type & JFFS_MODIFY_NAME;
-
- D3(printk("jffs_remove_redundant_nodes(): ino: %u, name: \"%s\", "
- "newest_type: %u\n", f->ino, (f->name ? f->name : ""),
- newest_type));
-
- /* Traverse the file's nodes and determine which of them that are
- superfluous. Yeah, this might look very complex at first
- glance but it is actually very simple. */
- for (cur = newest_node->version_prev; cur; cur = prev) {
- prev = cur->version_prev;
- mod_type = jffs_classify_node(cur);
- if ((mod_type <= JFFS_MODIFY_INODE)
- || ((newest_type & JFFS_MODIFY_NAME)
- && (mod_type
- <= (JFFS_MODIFY_INODE + JFFS_MODIFY_NAME)))
- || (cur->data_size == 0 && cur->removed_size
- && !cur->version_prev && node_with_name_later)) {
- /* Yes, this node is redundant. Remove it. */
- D2(printk("jffs_remove_redundant_nodes(): "
- "Removing node: ino: %u, version: %u, "
- "mod_type: %u\n", cur->ino, cur->version,
- mod_type));
- jffs_unlink_node_from_version_list(f, cur);
- jffs_fmfree(f->c->fmc, cur->fm, cur);
- jffs_free_node(cur);
- DJM(no_jffs_node--);
- }
- else {
- node_with_name_later |= (mod_type & JFFS_MODIFY_NAME);
- }
- }
-
- return 0;
-}
-
-
-/* Insert a file into the hash table. */
-static int
-jffs_insert_file_into_hash(struct jffs_file *f)
-{
- int i = f->ino % f->c->hash_len;
-
- D3(printk("jffs_insert_file_into_hash(): f->ino: %u\n", f->ino));
-
- list_add(&f->hash, &f->c->hash[i]);
- return 0;
-}
-
-
-/* Insert a file into the file system tree. */
-int
-jffs_insert_file_into_tree(struct jffs_file *f)
-{
- struct jffs_file *parent;
-
- D3(printk("jffs_insert_file_into_tree(): name: \"%s\"\n",
- (f->name ? f->name : "")));
-
- if (!(parent = jffs_find_file(f->c, f->pino))) {
- if (f->pino == 0) {
- f->c->root = f;
- f->parent = NULL;
- f->sibling_prev = NULL;
- f->sibling_next = NULL;
- return 0;
- }
- else {
- D1(printk("jffs_insert_file_into_tree(): Found "
- "inode with no parent and pino == %u\n",
- f->pino));
- return -1;
- }
- }
- f->parent = parent;
- f->sibling_next = parent->children;
- if (f->sibling_next) {
- f->sibling_next->sibling_prev = f;
- }
- f->sibling_prev = NULL;
- parent->children = f;
- return 0;
-}
-
-
-/* Remove a file from the hash table. */
-static int
-jffs_unlink_file_from_hash(struct jffs_file *f)
-{
- D3(printk("jffs_unlink_file_from_hash(): f: 0x%p, "
- "ino %u\n", f, f->ino));
-
- list_del(&f->hash);
- return 0;
-}
-
-
-/* Just remove the file from the parent's children. Don't free
- any memory. */
-int
-jffs_unlink_file_from_tree(struct jffs_file *f)
-{
- D3(printk("jffs_unlink_file_from_tree(): ino: %d, pino: %d, name: "
- "\"%s\"\n", f->ino, f->pino, (f->name ? f->name : "")));
-
- if (f->sibling_prev) {
- f->sibling_prev->sibling_next = f->sibling_next;
- }
- else if (f->parent) {
- D3(printk("f->parent=%p\n", f->parent));
- f->parent->children = f->sibling_next;
- }
- if (f->sibling_next) {
- f->sibling_next->sibling_prev = f->sibling_prev;
- }
- return 0;
-}
-
-
-/* Find a file with its inode number. */
-struct jffs_file *
-jffs_find_file(struct jffs_control *c, __u32 ino)
-{
- struct jffs_file *f;
- int i = ino % c->hash_len;
-
- D3(printk("jffs_find_file(): ino: %u\n", ino));
-
- list_for_each_entry(f, &c->hash[i], hash) {
- if (ino != f->ino)
- continue;
- D3(printk("jffs_find_file(): Found file with ino "
- "%u. (name: \"%s\")\n",
- ino, (f->name ? f->name : ""));
- );
- return f;
- }
- D3(printk("jffs_find_file(): Didn't find file "
- "with ino %u.\n", ino);
- );
- return NULL;
-}
-
-
-/* Find a file in a directory. We are comparing the names. */
-struct jffs_file *
-jffs_find_child(struct jffs_file *dir, const char *name, int len)
-{
- struct jffs_file *f;
-
- D3(printk("jffs_find_child()\n"));
-
- for (f = dir->children; f; f = f->sibling_next) {
- if (!f->deleted && f->name
- && !strncmp(f->name, name, len)
- && f->name[len] == '\0') {
- break;
- }
- }
-
- D3(if (f) {
- printk("jffs_find_child(): Found \"%s\".\n", f->name);
- }
- else {
- char *copy = kmalloc(len + 1, GFP_KERNEL);
- if (copy) {
- memcpy(copy, name, len);
- copy[len] = '\0';
- }
- printk("jffs_find_child(): Didn't find the file \"%s\".\n",
- (copy ? copy : ""));
- kfree(copy);
- });
-
- return f;
-}
-
-
-/* Write a raw inode that takes up a certain amount of space in the flash
- memory. At the end of the flash device, there is often space that is
- impossible to use. At these times we want to mark this space as not
- used. In the cases when the amount of space is greater or equal than
- a struct jffs_raw_inode, we write a "dummy node" that takes up this
- space. The space after the raw inode, if it exists, is left as it is.
- Since this space after the raw inode contains JFFS_EMPTY_BITMASK bytes,
- we can compute the checksum of it; we don't have to manipulate it any
- further.
-
- If the space left on the device is less than the size of a struct
- jffs_raw_inode, this space is filled with JFFS_DIRTY_BITMASK bytes.
- No raw inode is written this time. */
-static int
-jffs_write_dummy_node(struct jffs_control *c, struct jffs_fm *dirty_fm)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
- int err;
-
- D1(printk("jffs_write_dummy_node(): dirty_fm->offset = 0x%08x, "
- "dirty_fm->size = %u\n",
- dirty_fm->offset, dirty_fm->size));
-
- if (dirty_fm->size >= sizeof(struct jffs_raw_inode)) {
- struct jffs_raw_inode raw_inode;
- memset(&raw_inode, 0, sizeof(struct jffs_raw_inode));
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.dsize = dirty_fm->size
- - sizeof(struct jffs_raw_inode);
- raw_inode.dchksum = raw_inode.dsize * 0xff;
- raw_inode.chksum
- = jffs_checksum(&raw_inode, sizeof(struct jffs_raw_inode));
-
- if ((err = flash_safe_write(fmc->mtd,
- dirty_fm->offset,
- (u_char *)&raw_inode,
- sizeof(struct jffs_raw_inode)))
- < 0) {
- printk(KERN_ERR "JFFS: jffs_write_dummy_node: "
- "flash_safe_write failed!\n");
- return err;
- }
- }
- else {
- flash_safe_acquire(fmc->mtd);
- flash_memset(fmc->mtd, dirty_fm->offset, 0, dirty_fm->size);
- flash_safe_release(fmc->mtd);
- }
-
- D3(printk("jffs_write_dummy_node(): Leaving...\n"));
- return 0;
-}
-
-
-/* Write a raw inode, possibly its name and possibly some data. */
-int
-jffs_write_node(struct jffs_control *c, struct jffs_node *node,
- struct jffs_raw_inode *raw_inode,
- const char *name, const unsigned char *data,
- int recoverable,
- struct jffs_file *f)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
- struct jffs_fm *fm;
- struct kvec node_iovec[4];
- unsigned long iovec_cnt;
-
- __u32 pos;
- int err;
- __u32 slack = 0;
-
- __u32 total_name_size = raw_inode->nsize
- + JFFS_GET_PAD_BYTES(raw_inode->nsize);
- __u32 total_data_size = raw_inode->dsize
- + JFFS_GET_PAD_BYTES(raw_inode->dsize);
- __u32 total_size = sizeof(struct jffs_raw_inode)
- + total_name_size + total_data_size;
-
- /* If this node isn't something that will eventually let
- GC free even more space, then don't allow it unless
- there's at least max_chunk_size space still available
- */
- if (!recoverable)
- slack = fmc->max_chunk_size;
-
-
- /* Fire the retrorockets and shoot the fruiton torpedoes, sir! */
-
- ASSERT(if (!node) {
- printk("jffs_write_node(): node == NULL\n");
- return -EINVAL;
- });
- ASSERT(if (raw_inode && raw_inode->nsize && !name) {
- printk("*** jffs_write_node(): nsize = %u but name == NULL\n",
- raw_inode->nsize);
- return -EINVAL;
- });
-
- D1(printk("jffs_write_node(): filename = \"%s\", ino = %u, "
- "total_size = %u\n",
- (name ? name : ""), raw_inode->ino,
- total_size));
-
- jffs_fm_write_lock(fmc);
-
-retry:
- fm = NULL;
- err = 0;
- while (!fm) {
-
- /* Deadlocks suck. */
- while(fmc->free_size < fmc->min_free_size + total_size + slack) {
- jffs_fm_write_unlock(fmc);
- if (!JFFS_ENOUGH_SPACE(c, total_size + slack))
- return -ENOSPC;
- jffs_fm_write_lock(fmc);
- }
-
- /* First try to allocate some flash memory. */
- err = jffs_fmalloc(fmc, total_size, node, &fm);
-
- if (err == -ENOSPC) {
- /* Just out of space. GC and try again */
- if (fmc->dirty_size < fmc->sector_size) {
- D(printk("jffs_write_node(): jffs_fmalloc(0x%p, %u) "
- "failed, no dirty space to GC\n", fmc,
- total_size));
- return err;
- }
-
- D1(printk(KERN_INFO "jffs_write_node(): Calling jffs_garbage_collect_now()\n"));
- jffs_fm_write_unlock(fmc);
- if ((err = jffs_garbage_collect_now(c))) {
- D(printk("jffs_write_node(): jffs_garbage_collect_now() failed\n"));
- return err;
- }
- jffs_fm_write_lock(fmc);
- continue;
- }
-
- if (err < 0) {
- jffs_fm_write_unlock(fmc);
-
- D(printk("jffs_write_node(): jffs_fmalloc(0x%p, %u) "
- "failed!\n", fmc, total_size));
- return err;
- }
-
- if (!fm->nodes) {
- /* The jffs_fm struct that we got is not good enough.
- Make that space dirty and try again */
- if ((err = jffs_write_dummy_node(c, fm)) < 0) {
- kfree(fm);
- DJM(no_jffs_fm--);
- jffs_fm_write_unlock(fmc);
- D(printk("jffs_write_node(): "
- "jffs_write_dummy_node(): Failed!\n"));
- return err;
- }
- fm = NULL;
- }
- } /* while(!fm) */
- node->fm = fm;
-
- ASSERT(if (fm->nodes == 0) {
- printk(KERN_ERR "jffs_write_node(): fm->nodes == 0\n");
- });
-
- pos = node->fm->offset;
-
- /* Increment the version number here. We can't let the caller
- set it beforehand, because we might have had to do GC on a node
- of this file - and we'd end up reusing version numbers.
- */
- if (f) {
- raw_inode->version = f->highest_version + 1;
- D1(printk (KERN_NOTICE "jffs_write_node(): setting version of %s to %d\n", f->name, raw_inode->version));
-
- /* if the file was deleted, set the deleted bit in the raw inode */
- if (f->deleted)
- raw_inode->deleted = 1;
- }
-
- /* Compute the checksum for the data and name chunks. */
- raw_inode->dchksum = jffs_checksum(data, raw_inode->dsize);
- raw_inode->nchksum = jffs_checksum(name, raw_inode->nsize);
-
- /* The checksum is calculated without the chksum and accurate
- fields so set them to zero first. */
- raw_inode->accurate = 0;
- raw_inode->chksum = 0;
- raw_inode->chksum = jffs_checksum(raw_inode,
- sizeof(struct jffs_raw_inode));
- raw_inode->accurate = 0xff;
-
- D3(printk("jffs_write_node(): About to write this raw inode to the "
- "flash at pos 0x%lx:\n", (long)pos));
- D3(jffs_print_raw_inode(raw_inode));
-
- /* The actual raw JFFS node */
- node_iovec[0].iov_base = (void *) raw_inode;
- node_iovec[0].iov_len = (size_t) sizeof(struct jffs_raw_inode);
- iovec_cnt = 1;
-
- /* Get name and size if there is one */
- if (raw_inode->nsize) {
- node_iovec[iovec_cnt].iov_base = (void *) name;
- node_iovec[iovec_cnt].iov_len = (size_t) raw_inode->nsize;
- iovec_cnt++;
-
- if (JFFS_GET_PAD_BYTES(raw_inode->nsize)) {
- static unsigned char allff[3]={255,255,255};
- /* Add some extra padding if necessary */
- node_iovec[iovec_cnt].iov_base = allff;
- node_iovec[iovec_cnt].iov_len =
- JFFS_GET_PAD_BYTES(raw_inode->nsize);
- iovec_cnt++;
- }
- }
-
- /* Get data and size if there is any */
- if (raw_inode->dsize) {
- node_iovec[iovec_cnt].iov_base = (void *) data;
- node_iovec[iovec_cnt].iov_len = (size_t) raw_inode->dsize;
- iovec_cnt++;
- /* No need to pad this because we're not actually putting
- anything after it.
- */
- }
-
- if ((err = flash_safe_writev(fmc->mtd, node_iovec, iovec_cnt,
- pos)) < 0) {
- jffs_fmfree_partly(fmc, fm, 0);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_write_node: Failed to write, "
- "requested %i, wrote %i\n", total_size, err);
- goto retry;
- }
- if (raw_inode->deleted)
- f->deleted = 1;
-
- jffs_fm_write_unlock(fmc);
- D3(printk("jffs_write_node(): Leaving...\n"));
- return raw_inode->dsize;
-} /* jffs_write_node() */
-
-
-/* Read data from the node and write it to the buffer. 'node_offset'
- is how much we have read from this particular node before and which
- shouldn't be read again. 'max_size' is how much space there is in
- the buffer. */
-static int
-jffs_get_node_data(struct jffs_file *f, struct jffs_node *node,
- unsigned char *buf,__u32 node_offset, __u32 max_size)
-{
- struct jffs_fmcontrol *fmc = f->c->fmc;
- __u32 pos = node->fm->offset + node->fm_offset + node_offset;
- __u32 avail = node->data_size - node_offset;
- __u32 r;
-
- D2(printk(" jffs_get_node_data(): file: \"%s\", ino: %u, "
- "version: %u, node_offset: %u\n",
- f->name, node->ino, node->version, node_offset));
-
- r = min(avail, max_size);
- D3(printk(KERN_NOTICE "jffs_get_node_data\n"));
- flash_safe_read(fmc->mtd, pos, buf, r);
-
- D3(printk(" jffs_get_node_data(): Read %u byte%s.\n",
- r, (r == 1 ? "" : "s")));
-
- return r;
-}
-
-
-/* Read data from the file's nodes. Write the data to the buffer
- 'buf'. 'read_offset' tells how much data we should skip. */
-int
-jffs_read_data(struct jffs_file *f, unsigned char *buf, __u32 read_offset,
- __u32 size)
-{
- struct jffs_node *node;
- __u32 read_data = 0; /* Total amount of read data. */
- __u32 node_offset = 0;
- __u32 pos = 0; /* Number of bytes traversed. */
-
- D2(printk("jffs_read_data(): file = \"%s\", read_offset = %d, "
- "size = %u\n",
- (f->name ? f->name : ""), read_offset, size));
-
- if (read_offset >= f->size) {
- D(printk(" f->size: %d\n", f->size));
- return 0;
- }
-
- /* First find the node to read data from. */
- node = f->range_head;
- while (pos <= read_offset) {
- node_offset = read_offset - pos;
- if (node_offset >= node->data_size) {
- pos += node->data_size;
- node = node->range_next;
- }
- else {
- break;
- }
- }
-
- /* "Cats are living proof that not everything in nature
- has to be useful."
- - Garrison Keilor ('97) */
-
- /* Fill the buffer. */
- while (node && (read_data < size)) {
- int r;
- if (!node->fm) {
- /* This node does not refer to real data. */
- r = min(size - read_data,
- node->data_size - node_offset);
- memset(&buf[read_data], 0, r);
- }
- else if ((r = jffs_get_node_data(f, node, &buf[read_data],
- node_offset,
- size - read_data)) < 0) {
- return r;
- }
- read_data += r;
- node_offset = 0;
- node = node->range_next;
- }
- D3(printk(" jffs_read_data(): Read %u bytes.\n", read_data));
- return read_data;
-}
-
-
-/* Used for traversing all nodes in the hash table. */
-int
-jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *))
-{
- int pos;
- int r;
- int result = 0;
-
- for (pos = 0; pos < c->hash_len; pos++) {
- struct jffs_file *f, *next;
-
- /* We must do _safe, because 'func' might remove the
- current file 'f' from the list. */
- list_for_each_entry_safe(f, next, &c->hash[pos], hash) {
- r = func(f);
- if (r < 0)
- return r;
- result += r;
- }
- }
-
- return result;
-}
-
-
-/* Free all nodes associated with a file. */
-static int
-jffs_free_node_list(struct jffs_file *f)
-{
- struct jffs_node *node;
- struct jffs_node *p;
-
- D3(printk("jffs_free_node_list(): f #%u, \"%s\"\n",
- f->ino, (f->name ? f->name : "")));
- node = f->version_head;
- while (node) {
- p = node;
- node = node->version_next;
- jffs_free_node(p);
- DJM(no_jffs_node--);
- }
- return 0;
-}
-
-
-/* Free a file and its name. */
-static int
-jffs_free_file(struct jffs_file *f)
-{
- D3(printk("jffs_free_file: f #%u, \"%s\"\n",
- f->ino, (f->name ? f->name : "")));
-
- if (f->name) {
- kfree(f->name);
- DJM(no_name--);
- }
- kfree(f);
- no_jffs_file--;
- return 0;
-}
-
-static long
-jffs_get_file_count(void)
-{
- return no_jffs_file;
-}
-
-/* See if a file is deleted. If so, mark that file's nodes as obsolete. */
-int
-jffs_possibly_delete_file(struct jffs_file *f)
-{
- struct jffs_node *n;
-
- D3(printk("jffs_possibly_delete_file(): ino: %u\n",
- f->ino));
-
- ASSERT(if (!f) {
- printk(KERN_ERR "jffs_possibly_delete_file(): f == NULL\n");
- return -1;
- });
-
- if (f->deleted) {
- /* First try to remove all older versions. Commence with
- the oldest node. */
- for (n = f->version_head; n; n = n->version_next) {
- if (!n->fm) {
- continue;
- }
- if (jffs_fmfree(f->c->fmc, n->fm, n) < 0) {
- break;
- }
- }
- /* Unlink the file from the filesystem. */
- if (!f->c->building_fs) {
- jffs_unlink_file_from_tree(f);
- }
- jffs_unlink_file_from_hash(f);
- jffs_free_node_list(f);
- jffs_free_file(f);
- }
- return 0;
-}
-
-
-/* Used in conjunction with jffs_foreach_file() to count the number
- of files in the file system. */
-int
-jffs_file_count(struct jffs_file *f)
-{
- return 1;
-}
-
-
-/* Build up a file's range list from scratch by going through the
- version list. */
-static int
-jffs_build_file(struct jffs_file *f)
-{
- struct jffs_node *n;
-
- D3(printk("jffs_build_file(): ino: %u, name: \"%s\"\n",
- f->ino, (f->name ? f->name : "")));
-
- for (n = f->version_head; n; n = n->version_next) {
- jffs_update_file(f, n);
- }
- return 0;
-}
-
-
-/* Remove an amount of data from a file. If this amount of data is
- zero, that could mean that a node should be split in two parts.
- We remove or change the appropriate nodes in the lists.
-
- Starting offset of area to be removed is node->data_offset,
- and the length of the area is in node->removed_size. */
-static int
-jffs_delete_data(struct jffs_file *f, struct jffs_node *node)
-{
- struct jffs_node *n;
- __u32 offset = node->data_offset;
- __u32 remove_size = node->removed_size;
-
- D3(printk("jffs_delete_data(): offset = %u, remove_size = %u\n",
- offset, remove_size));
-
- if (remove_size == 0
- && f->range_tail
- && f->range_tail->data_offset + f->range_tail->data_size
- == offset) {
- /* A simple append; nothing to remove or no node to split. */
- return 0;
- }
-
- /* Find the node where we should begin the removal. */
- for (n = f->range_head; n; n = n->range_next) {
- if (n->data_offset + n->data_size > offset) {
- break;
- }
- }
- if (!n) {
- /* If there's no data in the file there's no data to
- remove either. */
- return 0;
- }
-
- if (n->data_offset > offset) {
- /* XXX: Not implemented yet. */
- printk(KERN_WARNING "JFFS: An unexpected situation "
- "occurred in jffs_delete_data.\n");
- }
- else if (n->data_offset < offset) {
- /* See if the node has to be split into two parts. */
- if (n->data_offset + n->data_size > offset + remove_size) {
- /* Do the split. */
- struct jffs_node *new_node;
- D3(printk("jffs_delete_data(): Split node with "
- "version number %u.\n", n->version));
-
- if (!(new_node = jffs_alloc_node())) {
- D(printk("jffs_delete_data(): -ENOMEM\n"));
- return -ENOMEM;
- }
- DJM(no_jffs_node++);
-
- new_node->ino = n->ino;
- new_node->version = n->version;
- new_node->data_offset = offset;
- new_node->data_size = n->data_size - (remove_size + (offset - n->data_offset));
- new_node->fm_offset = n->fm_offset + (remove_size + (offset - n->data_offset));
- new_node->name_size = n->name_size;
- new_node->fm = n->fm;
- new_node->version_prev = n;
- new_node->version_next = n->version_next;
- if (new_node->version_next) {
- new_node->version_next->version_prev
- = new_node;
- }
- else {
- f->version_tail = new_node;
- }
- n->version_next = new_node;
- new_node->range_prev = n;
- new_node->range_next = n->range_next;
- if (new_node->range_next) {
- new_node->range_next->range_prev = new_node;
- }
- else {
- f->range_tail = new_node;
- }
- /* A very interesting can of worms. */
- n->range_next = new_node;
- n->data_size = offset - n->data_offset;
- if (new_node->fm)
- jffs_add_node(new_node);
- else {
- D1(printk(KERN_WARNING "jffs_delete_data(): Splitting an empty node (file hold).\n!"));
- D1(printk(KERN_WARNING "FIXME: Did dwmw2 do the right thing here?\n"));
- }
- n = new_node->range_next;
- remove_size = 0;
- }
- else {
- /* No. No need to split the node. Just remove
- the end of the node. */
- int r = min(n->data_offset + n->data_size
- - offset, remove_size);
- n->data_size -= r;
- remove_size -= r;
- n = n->range_next;
- }
- }
-
- /* Remove as many nodes as necessary. */
- while (n && remove_size) {
- if (n->data_size <= remove_size) {
- struct jffs_node *p = n;
- remove_size -= n->data_size;
- n = n->range_next;
- D3(printk("jffs_delete_data(): Removing node: "
- "ino: %u, version: %u%s\n",
- p->ino, p->version,
- (p->fm ? "" : " (virtual)")));
- if (p->fm) {
- jffs_fmfree(f->c->fmc, p->fm, p);
- }
- jffs_unlink_node_from_range_list(f, p);
- jffs_unlink_node_from_version_list(f, p);
- jffs_free_node(p);
- DJM(no_jffs_node--);
- }
- else {
- n->data_size -= remove_size;
- n->fm_offset += remove_size;
- n->data_offset -= (node->removed_size - remove_size);
- n = n->range_next;
- break;
- }
- }
-
- /* Adjust the following nodes' information about offsets etc. */
- while (n && node->removed_size) {
- n->data_offset -= node->removed_size;
- n = n->range_next;
- }
-
- if (node->removed_size > (f->size - node->data_offset)) {
- /* It's possible that the removed_size is in fact
- * greater than the amount of data we actually thought
- * were present in the first place - some of the nodes
- * which this node originally obsoleted may already have
- * been deleted from the flash by subsequent garbage
- * collection.
- *
- * If this is the case, don't let f->size go negative.
- * Bad things would happen :)
- */
- f->size = node->data_offset;
- } else {
- f->size -= node->removed_size;
- }
- D3(printk("jffs_delete_data(): f->size = %d\n", f->size));
- return 0;
-} /* jffs_delete_data() */
-
-
-/* Insert some data into a file. Prior to the call to this function,
- jffs_delete_data should be called. */
-static int
-jffs_insert_data(struct jffs_file *f, struct jffs_node *node)
-{
- D3(printk("jffs_insert_data(): node->data_offset = %u, "
- "node->data_size = %u, f->size = %u\n",
- node->data_offset, node->data_size, f->size));
-
- /* Find the position where we should insert data. */
- retry:
- if (node->data_offset == f->size) {
- /* A simple append. This is the most common operation. */
- node->range_next = NULL;
- node->range_prev = f->range_tail;
- if (node->range_prev) {
- node->range_prev->range_next = node;
- }
- f->range_tail = node;
- f->size += node->data_size;
- if (!f->range_head) {
- f->range_head = node;
- }
- }
- else if (node->data_offset < f->size) {
- /* Trying to insert data into the middle of the file. This
- means no problem because jffs_delete_data() has already
- prepared the range list for us. */
- struct jffs_node *n;
-
- /* Find the correct place for the insertion and then insert
- the node. */
- for (n = f->range_head; n; n = n->range_next) {
- D2(printk("Cool stuff's happening!\n"));
-
- if (n->data_offset == node->data_offset) {
- node->range_prev = n->range_prev;
- if (node->range_prev) {
- node->range_prev->range_next = node;
- }
- else {
- f->range_head = node;
- }
- node->range_next = n;
- n->range_prev = node;
- break;
- }
- ASSERT(else if (n->data_offset + n->data_size >
- node->data_offset) {
- printk(KERN_ERR "jffs_insert_data(): "
- "Couldn't find a place to insert "
- "the data!\n");
- return -1;
- });
- }
-
- /* Adjust later nodes' offsets etc. */
- n = node->range_next;
- while (n) {
- n->data_offset += node->data_size;
- n = n->range_next;
- }
- f->size += node->data_size;
- }
- else if (node->data_offset > f->size) {
- /* Okay. This is tricky. This means that we want to insert
- data at a place that is beyond the limits of the file as
- it is constructed right now. This is actually a common
- event that for instance could occur during the mounting
- of the file system if a large file have been truncated,
- rewritten and then only partially garbage collected. */
-
- struct jffs_node *n;
-
- /* We need a place holder for the data that is missing in
- front of this insertion. This "virtual node" will not
- be associated with any space on the flash device. */
- struct jffs_node *virtual_node;
- if (!(virtual_node = jffs_alloc_node())) {
- return -ENOMEM;
- }
-
- D(printk("jffs_insert_data: Inserting a virtual node.\n"));
- D(printk(" node->data_offset = %u\n", node->data_offset));
- D(printk(" f->size = %u\n", f->size));
-
- virtual_node->ino = node->ino;
- virtual_node->version = node->version;
- virtual_node->removed_size = 0;
- virtual_node->fm_offset = 0;
- virtual_node->name_size = 0;
- virtual_node->fm = NULL; /* This is a virtual data holder. */
- virtual_node->version_prev = NULL;
- virtual_node->version_next = NULL;
- virtual_node->range_next = NULL;
-
- /* Are there any data at all in the file yet? */
- if (f->range_head) {
- virtual_node->data_offset
- = f->range_tail->data_offset
- + f->range_tail->data_size;
- virtual_node->data_size
- = node->data_offset - virtual_node->data_offset;
- virtual_node->range_prev = f->range_tail;
- f->range_tail->range_next = virtual_node;
- }
- else {
- virtual_node->data_offset = 0;
- virtual_node->data_size = node->data_offset;
- virtual_node->range_prev = NULL;
- f->range_head = virtual_node;
- }
-
- f->range_tail = virtual_node;
- f->size += virtual_node->data_size;
-
- /* Insert this virtual node in the version list as well. */
- for (n = f->version_head; n ; n = n->version_next) {
- if (n->version == virtual_node->version) {
- virtual_node->version_prev = n->version_prev;
- n->version_prev = virtual_node;
- if (virtual_node->version_prev) {
- virtual_node->version_prev
- ->version_next = virtual_node;
- }
- else {
- f->version_head = virtual_node;
- }
- virtual_node->version_next = n;
- break;
- }
- }
-
- D(jffs_print_node(virtual_node));
-
- /* Make a new try to insert the node. */
- goto retry;
- }
-
- D3(printk("jffs_insert_data(): f->size = %d\n", f->size));
- return 0;
-}
-
-
-/* A new node (with data) has been added to the file and now the range
- list has to be modified. */
-static int
-jffs_update_file(struct jffs_file *f, struct jffs_node *node)
-{
- int err;
-
- D3(printk("jffs_update_file(): ino: %u, version: %u\n",
- f->ino, node->version));
-
- if (node->data_size == 0) {
- if (node->removed_size == 0) {
- /* data_offset == X */
- /* data_size == 0 */
- /* remove_size == 0 */
- }
- else {
- /* data_offset == X */
- /* data_size == 0 */
- /* remove_size != 0 */
- if ((err = jffs_delete_data(f, node)) < 0) {
- return err;
- }
- }
- }
- else {
- /* data_offset == X */
- /* data_size != 0 */
- /* remove_size == Y */
- if ((err = jffs_delete_data(f, node)) < 0) {
- return err;
- }
- if ((err = jffs_insert_data(f, node)) < 0) {
- return err;
- }
- }
- return 0;
-}
-
-/* Print the contents of a file. */
-#if 0
-int
-jffs_print_file(struct jffs_file *f)
-{
- D(int i);
- D(printk("jffs_file: 0x%p\n", f));
- D(printk("{\n"));
- D(printk(" 0x%08x, /* ino */\n", f->ino));
- D(printk(" 0x%08x, /* pino */\n", f->pino));
- D(printk(" 0x%08x, /* mode */\n", f->mode));
- D(printk(" 0x%04x, /* uid */\n", f->uid));
- D(printk(" 0x%04x, /* gid */\n", f->gid));
- D(printk(" 0x%08x, /* atime */\n", f->atime));
- D(printk(" 0x%08x, /* mtime */\n", f->mtime));
- D(printk(" 0x%08x, /* ctime */\n", f->ctime));
- D(printk(" 0x%02x, /* nsize */\n", f->nsize));
- D(printk(" 0x%02x, /* nlink */\n", f->nlink));
- D(printk(" 0x%02x, /* deleted */\n", f->deleted));
- D(printk(" \"%s\", ", (f->name ? f->name : "")));
- D(for (i = strlen(f->name ? f->name : ""); i < 8; ++i) {
- printk(" ");
- });
- D(printk("/* name */\n"));
- D(printk(" 0x%08x, /* size */\n", f->size));
- D(printk(" 0x%08x, /* highest_version */\n",
- f->highest_version));
- D(printk(" 0x%p, /* c */\n", f->c));
- D(printk(" 0x%p, /* parent */\n", f->parent));
- D(printk(" 0x%p, /* children */\n", f->children));
- D(printk(" 0x%p, /* sibling_prev */\n", f->sibling_prev));
- D(printk(" 0x%p, /* sibling_next */\n", f->sibling_next));
- D(printk(" 0x%p, /* hash_prev */\n", f->hash.prev));
- D(printk(" 0x%p, /* hash_next */\n", f->hash.next));
- D(printk(" 0x%p, /* range_head */\n", f->range_head));
- D(printk(" 0x%p, /* range_tail */\n", f->range_tail));
- D(printk(" 0x%p, /* version_head */\n", f->version_head));
- D(printk(" 0x%p, /* version_tail */\n", f->version_tail));
- D(printk("}\n"));
- return 0;
-}
-#endif /* 0 */
-
-void
-jffs_print_hash_table(struct jffs_control *c)
-{
- int i;
-
- printk("JFFS: Dumping the file system's hash table...\n");
- for (i = 0; i < c->hash_len; i++) {
- struct jffs_file *f;
- list_for_each_entry(f, &c->hash[i], hash) {
- printk("*** c->hash[%u]: \"%s\" "
- "(ino: %u, pino: %u)\n",
- i, (f->name ? f->name : ""),
- f->ino, f->pino);
- }
- }
-}
-
-
-void
-jffs_print_tree(struct jffs_file *first_file, int indent)
-{
- struct jffs_file *f;
- char *space;
- int dir;
-
- if (!first_file) {
- return;
- }
-
- if (!(space = kmalloc(indent + 1, GFP_KERNEL))) {
- printk("jffs_print_tree(): Out of memory!\n");
- return;
- }
-
- memset(space, ' ', indent);
- space[indent] = '\0';
-
- for (f = first_file; f; f = f->sibling_next) {
- dir = S_ISDIR(f->mode);
- printk("%s%s%s (ino: %u, highest_version: %u, size: %u)\n",
- space, (f->name ? f->name : ""), (dir ? "/" : ""),
- f->ino, f->highest_version, f->size);
- if (dir) {
- jffs_print_tree(f->children, indent + 2);
- }
- }
-
- kfree(space);
-}
-
-
-#if defined(JFFS_MEMORY_DEBUG) && JFFS_MEMORY_DEBUG
-void
-jffs_print_memory_allocation_statistics(void)
-{
- static long printout;
- printk("________ Memory printout #%ld ________\n", ++printout);
- printk("no_jffs_file = %ld\n", no_jffs_file);
- printk("no_jffs_node = %ld\n", no_jffs_node);
- printk("no_jffs_control = %ld\n", no_jffs_control);
- printk("no_jffs_raw_inode = %ld\n", no_jffs_raw_inode);
- printk("no_jffs_node_ref = %ld\n", no_jffs_node_ref);
- printk("no_jffs_fm = %ld\n", no_jffs_fm);
- printk("no_jffs_fmcontrol = %ld\n", no_jffs_fmcontrol);
- printk("no_hash = %ld\n", no_hash);
- printk("no_name = %ld\n", no_name);
- printk("\n");
-}
-#endif
-
-
-/* Rewrite `size' bytes, and begin at `node'. */
-static int
-jffs_rewrite_data(struct jffs_file *f, struct jffs_node *node, __u32 size)
-{
- struct jffs_control *c = f->c;
- struct jffs_fmcontrol *fmc = c->fmc;
- struct jffs_raw_inode raw_inode;
- struct jffs_node *new_node;
- struct jffs_fm *fm;
- __u32 pos;
- __u32 pos_dchksum;
- __u32 total_name_size;
- __u32 total_data_size;
- __u32 total_size;
- int err;
-
- D1(printk("***jffs_rewrite_data(): node: %u, name: \"%s\", size: %u\n",
- f->ino, (f->name ? f->name : "(null)"), size));
-
- /* Create and initialize the new node. */
- if (!(new_node = jffs_alloc_node())) {
- D(printk("jffs_rewrite_data(): "
- "Failed to allocate node.\n"));
- return -ENOMEM;
- }
- DJM(no_jffs_node++);
- new_node->data_offset = node->data_offset;
- new_node->removed_size = size;
- total_name_size = JFFS_PAD(f->nsize);
- total_data_size = JFFS_PAD(size);
- total_size = sizeof(struct jffs_raw_inode)
- + total_name_size + total_data_size;
- new_node->fm_offset = sizeof(struct jffs_raw_inode)
- + total_name_size;
-
-retry:
- jffs_fm_write_lock(fmc);
- err = 0;
-
- if ((err = jffs_fmalloc(fmc, total_size, new_node, &fm)) < 0) {
- DJM(no_jffs_node--);
- jffs_fm_write_unlock(fmc);
- D(printk("jffs_rewrite_data(): Failed to allocate fm.\n"));
- jffs_free_node(new_node);
- return err;
- }
- else if (!fm->nodes) {
- /* The jffs_fm struct that we got is not big enough. */
- /* This should never happen, because we deal with this case
- in jffs_garbage_collect_next().*/
- printk(KERN_WARNING "jffs_rewrite_data(): Allocated node is too small (%d bytes of %d)\n", fm->size, total_size);
- if ((err = jffs_write_dummy_node(c, fm)) < 0) {
- D(printk("jffs_rewrite_data(): "
- "jffs_write_dummy_node() Failed!\n"));
- } else {
- err = -ENOSPC;
- }
- DJM(no_jffs_fm--);
- jffs_fm_write_unlock(fmc);
- kfree(fm);
-
- return err;
- }
- new_node->fm = fm;
-
- /* Initialize the raw inode. */
- raw_inode.magic = JFFS_MAGIC_BITMASK;
- raw_inode.ino = f->ino;
- raw_inode.pino = f->pino;
- raw_inode.version = f->highest_version + 1;
- raw_inode.mode = f->mode;
- raw_inode.uid = f->uid;
- raw_inode.gid = f->gid;
- raw_inode.atime = f->atime;
- raw_inode.mtime = f->mtime;
- raw_inode.ctime = f->ctime;
- raw_inode.offset = node->data_offset;
- raw_inode.dsize = size;
- raw_inode.rsize = size;
- raw_inode.nsize = f->nsize;
- raw_inode.nlink = f->nlink;
- raw_inode.spare = 0;
- raw_inode.rename = 0;
- raw_inode.deleted = f->deleted;
- raw_inode.accurate = 0xff;
- raw_inode.dchksum = 0;
- raw_inode.nchksum = 0;
-
- pos = new_node->fm->offset;
- pos_dchksum = pos +JFFS_RAW_INODE_DCHKSUM_OFFSET;
-
- D3(printk("jffs_rewrite_data(): Writing this raw inode "
- "to pos 0x%ul.\n", pos));
- D3(jffs_print_raw_inode(&raw_inode));
-
- if ((err = flash_safe_write(fmc->mtd, pos,
- (u_char *) &raw_inode,
- sizeof(struct jffs_raw_inode)
- - sizeof(__u32)
- - sizeof(__u16) - sizeof(__u16))) < 0) {
- jffs_fmfree_partly(fmc, fm,
- total_name_size + total_data_size);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_rewrite_data: Write error during "
- "rewrite. (raw inode)\n");
- printk(KERN_ERR "JFFS: jffs_rewrite_data: Now retrying "
- "rewrite. (raw inode)\n");
- goto retry;
- }
- pos += sizeof(struct jffs_raw_inode);
-
- /* Write the name to the flash memory. */
- if (f->nsize) {
- D3(printk("jffs_rewrite_data(): Writing name \"%s\" to "
- "pos 0x%ul.\n", f->name, (unsigned int) pos));
- if ((err = flash_safe_write(fmc->mtd, pos,
- (u_char *)f->name,
- f->nsize)) < 0) {
- jffs_fmfree_partly(fmc, fm, total_data_size);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_rewrite_data: Write "
- "error during rewrite. (name)\n");
- printk(KERN_ERR "JFFS: jffs_rewrite_data: Now retrying "
- "rewrite. (name)\n");
- goto retry;
- }
- pos += total_name_size;
- raw_inode.nchksum = jffs_checksum(f->name, f->nsize);
- }
-
- /* Write the data. */
- if (size) {
- int r;
- unsigned char *page;
- __u32 offset = node->data_offset;
-
- if (!(page = (unsigned char *)__get_free_page(GFP_KERNEL))) {
- jffs_fmfree_partly(fmc, fm, 0);
- return -1;
- }
-
- while (size) {
- __u32 s = min(size, (__u32)PAGE_SIZE);
- if ((r = jffs_read_data(f, (char *)page,
- offset, s)) < s) {
- free_page((unsigned long)page);
- jffs_fmfree_partly(fmc, fm, 0);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_rewrite_data: "
- "jffs_read_data() "
- "failed! (r = %d)\n", r);
- return -1;
- }
- if ((err = flash_safe_write(fmc->mtd,
- pos, page, r)) < 0) {
- free_page((unsigned long)page);
- jffs_fmfree_partly(fmc, fm, 0);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_rewrite_data: "
- "Write error during rewrite. "
- "(data)\n");
- goto retry;
- }
- pos += r;
- size -= r;
- offset += r;
- raw_inode.dchksum += jffs_checksum(page, r);
- }
-
- free_page((unsigned long)page);
- }
-
- raw_inode.accurate = 0;
- raw_inode.chksum = jffs_checksum(&raw_inode,
- sizeof(struct jffs_raw_inode)
- - sizeof(__u16));
-
- /* Add the checksum. */
- if ((err
- = flash_safe_write(fmc->mtd, pos_dchksum,
- &((u_char *)
- &raw_inode)[JFFS_RAW_INODE_DCHKSUM_OFFSET],
- sizeof(__u32) + sizeof(__u16)
- + sizeof(__u16))) < 0) {
- jffs_fmfree_partly(fmc, fm, 0);
- jffs_fm_write_unlock(fmc);
- printk(KERN_ERR "JFFS: jffs_rewrite_data: Write error during "
- "rewrite. (checksum)\n");
- goto retry;
- }
-
- /* Now make the file system aware of the newly written node. */
- jffs_insert_node(c, f, &raw_inode, f->name, new_node);
- jffs_fm_write_unlock(fmc);
-
- D3(printk("jffs_rewrite_data(): Leaving...\n"));
- return 0;
-} /* jffs_rewrite_data() */
-
-
-/* jffs_garbage_collect_next implements one step in the garbage collect
- process and is often called multiple times at each occasion of a
- garbage collect. */
-
-static int
-jffs_garbage_collect_next(struct jffs_control *c)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
- struct jffs_node *node;
- struct jffs_file *f;
- int err = 0;
- __u32 size;
- __u32 data_size;
- __u32 total_name_size;
- __u32 extra_available;
- __u32 space_needed;
- __u32 free_chunk_size1 = jffs_free_size1(fmc);
- D2(__u32 free_chunk_size2 = jffs_free_size2(fmc));
-
- /* Get the oldest node in the flash. */
- node = jffs_get_oldest_node(fmc);
- ASSERT(if (!node) {
- printk(KERN_ERR "JFFS: jffs_garbage_collect_next: "
- "No oldest node found!\n");
- err = -1;
- goto jffs_garbage_collect_next_end;
-
-
- });
-
- /* Find its corresponding file too. */
- f = jffs_find_file(c, node->ino);
-
- if (!f) {
- printk (KERN_ERR "JFFS: jffs_garbage_collect_next: "
- "No file to garbage collect! "
- "(ino = 0x%08x)\n", node->ino);
- /* FIXME: Free the offending node and recover. */
- err = -1;
- goto jffs_garbage_collect_next_end;
- }
-
- /* We always write out the name. Theoretically, we don't need
- to, but for now it's easier - because otherwise we'd have
- to keep track of how many times the current name exists on
- the flash and make sure it never reaches zero.
-
- The current approach means that would be possible to cause
- the GC to end up eating its tail by writing lots of nodes
- with no name for it to garbage-collect. Hence the change in
- inode.c to write names with _every_ node.
-
- It sucks, but it _should_ work.
- */
- total_name_size = JFFS_PAD(f->nsize);
-
- D1(printk("jffs_garbage_collect_next(): \"%s\", "
- "ino: %u, version: %u, location 0x%x, dsize %u\n",
- (f->name ? f->name : ""), node->ino, node->version,
- node->fm->offset, node->data_size));
-
- /* Compute how many data it's possible to rewrite at the moment. */
- data_size = f->size - node->data_offset;
-
- /* And from that, the total size of the chunk we want to write */
- size = sizeof(struct jffs_raw_inode) + total_name_size
- + data_size + JFFS_GET_PAD_BYTES(data_size);
-
- /* If that's more than max_chunk_size, reduce it accordingly */
- if (size > fmc->max_chunk_size) {
- size = fmc->max_chunk_size;
- data_size = size - sizeof(struct jffs_raw_inode)
- - total_name_size;
- }
-
- /* If we're asking to take up more space than free_chunk_size1
- but we _could_ fit in it, shrink accordingly.
- */
- if (size > free_chunk_size1) {
-
- if (free_chunk_size1 <
- (sizeof(struct jffs_raw_inode) + total_name_size + BLOCK_SIZE)){
- /* The space left is too small to be of any
- use really. */
- struct jffs_fm *dirty_fm
- = jffs_fmalloced(fmc,
- fmc->tail->offset + fmc->tail->size,
- free_chunk_size1, NULL);
- if (!dirty_fm) {
- printk(KERN_ERR "JFFS: "
- "jffs_garbage_collect_next: "
- "Failed to allocate `dirty' "
- "flash memory!\n");
- err = -1;
- goto jffs_garbage_collect_next_end;
- }
- D1(printk("Dirtying end of flash - too small\n"));
- jffs_write_dummy_node(c, dirty_fm);
- err = 0;
- goto jffs_garbage_collect_next_end;
- }
- D1(printk("Reducing size of new node from %d to %d to avoid "
- " exceeding free_chunk_size1\n",
- size, free_chunk_size1));
-
- size = free_chunk_size1;
- data_size = size - sizeof(struct jffs_raw_inode)
- - total_name_size;
- }
-
-
- /* Calculate the amount of space needed to hold the nodes
- which are remaining in the tail */
- space_needed = fmc->min_free_size - (node->fm->offset % fmc->sector_size);
-
- /* From that, calculate how much 'extra' space we can use to
- increase the size of the node we're writing from the size
- of the node we're obsoleting
- */
- if (space_needed > fmc->free_size) {
- /* If we've gone below min_free_size for some reason,
- don't fuck up. This is why we have
- min_free_size > sector_size. Whinge about it though,
- just so I can convince myself my maths is right.
- */
- D1(printk(KERN_WARNING "jffs_garbage_collect_next(): "
- "space_needed %d exceeded free_size %d\n",
- space_needed, fmc->free_size));
- extra_available = 0;
- } else {
- extra_available = fmc->free_size - space_needed;
- }
-
- /* Check that we don't use up any more 'extra' space than
- what's available */
- if (size > JFFS_PAD(node->data_size) + total_name_size +
- sizeof(struct jffs_raw_inode) + extra_available) {
- D1(printk("Reducing size of new node from %d to %ld to avoid "
- "catching our tail\n", size,
- (long) (JFFS_PAD(node->data_size) + JFFS_PAD(node->name_size) +
- sizeof(struct jffs_raw_inode) + extra_available)));
- D1(printk("space_needed = %d, extra_available = %d\n",
- space_needed, extra_available));
-
- size = JFFS_PAD(node->data_size) + total_name_size +
- sizeof(struct jffs_raw_inode) + extra_available;
- data_size = size - sizeof(struct jffs_raw_inode)
- - total_name_size;
- };
-
- D2(printk(" total_name_size: %u\n", total_name_size));
- D2(printk(" data_size: %u\n", data_size));
- D2(printk(" size: %u\n", size));
- D2(printk(" f->nsize: %u\n", f->nsize));
- D2(printk(" f->size: %u\n", f->size));
- D2(printk(" node->data_offset: %u\n", node->data_offset));
- D2(printk(" free_chunk_size1: %u\n", free_chunk_size1));
- D2(printk(" free_chunk_size2: %u\n", free_chunk_size2));
- D2(printk(" node->fm->offset: 0x%08x\n", node->fm->offset));
-
- if ((err = jffs_rewrite_data(f, node, data_size))) {
- printk(KERN_WARNING "jffs_rewrite_data() failed: %d\n", err);
- return err;
- }
-
-jffs_garbage_collect_next_end:
- D3(printk("jffs_garbage_collect_next: Leaving...\n"));
- return err;
-} /* jffs_garbage_collect_next */
-
-
-/* If an obsolete node is partly going to be erased due to garbage
- collection, the part that isn't going to be erased must be filled
- with zeroes so that the scan of the flash will work smoothly next
- time. (The data in the file could for instance be a JFFS image
- which could cause enormous confusion during a scan of the flash
- device if we didn't do this.)
- There are two phases in this procedure: First, the clearing of
- the name and data parts of the node. Second, possibly also clearing
- a part of the raw inode as well. If the box is power cycled during
- the first phase, only the checksum of this node-to-be-cleared-at-
- the-end will be wrong. If the box is power cycled during, or after,
- the clearing of the raw inode, the information like the length of
- the name and data parts are zeroed. The next time the box is
- powered up, the scanning algorithm manages this faulty data too
- because:
-
- - The checksum is invalid and thus the raw inode must be discarded
- in any case.
- - If the lengths of the data part or the name part are zeroed, the
- scanning just continues after the raw inode. But after the inode
- the scanning procedure just finds zeroes which is the same as
- dirt.
-
- So, in the end, this could never fail. :-) Even if it does fail,
- the scanning algorithm should manage that too. */
-
-static int
-jffs_clear_end_of_node(struct jffs_control *c, __u32 erase_size)
-{
- struct jffs_fm *fm;
- struct jffs_fmcontrol *fmc = c->fmc;
- __u32 zero_offset;
- __u32 zero_size;
- __u32 zero_offset_data;
- __u32 zero_size_data;
- __u32 cutting_raw_inode = 0;
-
- if (!(fm = jffs_cut_node(fmc, erase_size))) {
- D3(printk("jffs_clear_end_of_node(): fm == NULL\n"));
- return 0;
- }
-
- /* Where and how much shall we clear? */
- zero_offset = fmc->head->offset + erase_size;
- zero_size = fm->offset + fm->size - zero_offset;
-
- /* Do we have to clear the raw_inode explicitly? */
- if (fm->size - zero_size < sizeof(struct jffs_raw_inode)) {
- cutting_raw_inode = sizeof(struct jffs_raw_inode)
- - (fm->size - zero_size);
- }
-
- /* First, clear the name and data fields. */
- zero_offset_data = zero_offset + cutting_raw_inode;
- zero_size_data = zero_size - cutting_raw_inode;
- flash_safe_acquire(fmc->mtd);
- flash_memset(fmc->mtd, zero_offset_data, 0, zero_size_data);
- flash_safe_release(fmc->mtd);
-
- /* Should we clear a part of the raw inode? */
- if (cutting_raw_inode) {
- /* I guess it is ok to clear the raw inode in this order. */
- flash_safe_acquire(fmc->mtd);
- flash_memset(fmc->mtd, zero_offset, 0,
- cutting_raw_inode);
- flash_safe_release(fmc->mtd);
- }
-
- return 0;
-} /* jffs_clear_end_of_node() */
-
-/* Try to erase as much as possible of the dirt in the flash memory. */
-static long
-jffs_try_to_erase(struct jffs_control *c)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
- long erase_size;
- int err;
- __u32 offset;
-
- D3(printk("jffs_try_to_erase()\n"));
-
- erase_size = jffs_erasable_size(fmc);
-
- D2(printk("jffs_try_to_erase(): erase_size = %ld\n", erase_size));
-
- if (erase_size == 0) {
- return 0;
- }
- else if (erase_size < 0) {
- printk(KERN_ERR "JFFS: jffs_try_to_erase: "
- "jffs_erasable_size returned %ld.\n", erase_size);
- return erase_size;
- }
-
- if ((err = jffs_clear_end_of_node(c, erase_size)) < 0) {
- printk(KERN_ERR "JFFS: jffs_try_to_erase: "
- "Clearing of node failed.\n");
- return err;
- }
-
- offset = fmc->head->offset;
-
- /* Now, let's try to do the erase. */
- if ((err = flash_erase_region(fmc->mtd,
- offset, erase_size)) < 0) {
- printk(KERN_ERR "JFFS: Erase of flash failed. "
- "offset = %u, erase_size = %ld\n",
- offset, erase_size);
- /* XXX: Here we should allocate this area as dirty
- with jffs_fmalloced or something similar. Now
- we just report the error. */
- return err;
- }
-
-#if 0
- /* Check if the erased sectors really got erased. */
- {
- __u32 pos;
- __u32 end;
-
- pos = (__u32)flash_get_direct_pointer(to_kdev_t(c->sb->s_dev), offset);
- end = pos + erase_size;
-
- D2(printk("JFFS: Checking erased sector(s)...\n"));
-
- flash_safe_acquire(fmc->mtd);
-
- for (; pos < end; pos += 4) {
- if (*(__u32 *)pos != JFFS_EMPTY_BITMASK) {
- printk("JFFS: Erase failed! pos = 0x%lx\n",
- (long)pos);
- jffs_hexdump(fmc->mtd, pos,
- jffs_min(256, end - pos));
- err = -1;
- break;
- }
- }
-
- flash_safe_release(fmc->mtd);
-
- if (!err) {
- D2(printk("JFFS: Erase succeeded.\n"));
- }
- else {
- /* XXX: Here we should allocate the memory
- with jffs_fmalloced() in order to prevent
- JFFS from using this area accidentally. */
- return err;
- }
- }
-#endif
-
- /* Update the flash memory data structures. */
- jffs_sync_erase(fmc, erase_size);
-
- return erase_size;
-}
-
-
-/* There are different criteria that should trigger a garbage collect:
-
- 1. There is too much dirt in the memory.
- 2. The free space is becoming small.
- 3. There are many versions of a node.
-
- The garbage collect should always be done in a manner that guarantees
- that future garbage collects cannot be locked. E.g. Rewritten chunks
- should not be too large (span more than one sector in the flash memory
- for exemple). Of course there is a limit on how intelligent this garbage
- collection can be. */
-
-
-static int
-jffs_garbage_collect_now(struct jffs_control *c)
-{
- struct jffs_fmcontrol *fmc = c->fmc;
- long erased = 0;
- int result = 0;
- D1(int i = 1);
- D2(printk("***jffs_garbage_collect_now(): fmc->dirty_size = %u, fmc->free_size = 0x%x\n, fcs1=0x%x, fcs2=0x%x",
- fmc->dirty_size, fmc->free_size, jffs_free_size1(fmc), jffs_free_size2(fmc)));
- D2(jffs_print_fmcontrol(fmc));
-
- // down(&fmc->gclock);
-
- /* If it is possible to garbage collect, do so. */
-
- while (erased == 0) {
- D1(printk("***jffs_garbage_collect_now(): round #%u, "
- "fmc->dirty_size = %u\n", i++, fmc->dirty_size));
- D2(jffs_print_fmcontrol(fmc));
-
- if ((erased = jffs_try_to_erase(c)) < 0) {
- printk(KERN_WARNING "JFFS: Error in "
- "garbage collector.\n");
- result = erased;
- goto gc_end;
- }
- if (erased)
- break;
-
- if (fmc->free_size == 0) {
- /* Argh */
- printk(KERN_ERR "jffs_garbage_collect_now(): free_size == 0. This is BAD.\n");
- result = -ENOSPC;
- break;
- }
-
- if (fmc->dirty_size < fmc->sector_size) {
- /* Actually, we _may_ have been able to free some,
- * if there are many overlapping nodes which aren't
- * actually marked dirty because they still have
- * some valid data in each.
- */
- result = -ENOSPC;
- break;
- }
-
- /* Let's dare to make a garbage collect. */
- if ((result = jffs_garbage_collect_next(c)) < 0) {
- printk(KERN_ERR "JFFS: Something "
- "has gone seriously wrong "
- "with a garbage collect.\n");
- goto gc_end;
- }
-
- D1(printk(" jffs_garbage_collect_now(): erased: %ld\n", erased));
- DJM(jffs_print_memory_allocation_statistics());
- }
-
-gc_end:
- // up(&fmc->gclock);
-
- D3(printk(" jffs_garbage_collect_now(): Leaving...\n"));
- D1(if (erased) {
- printk("jffs_g_c_now(): erased = %ld\n", erased);
- jffs_print_fmcontrol(fmc);
- });
-
- if (!erased && !result)
- return -ENOSPC;
-
- return result;
-} /* jffs_garbage_collect_now() */
-
-
-/* Determine if it is reasonable to start garbage collection.
- We start a gc pass if either:
- - The number of free bytes < MIN_FREE_BYTES && at least one
- block is dirty, OR
- - The number of dirty bytes > MAX_DIRTY_BYTES
-*/
-static inline int thread_should_wake (struct jffs_control *c)
-{
- D1(printk (KERN_NOTICE "thread_should_wake(): free=%d, dirty=%d, blocksize=%d.\n",
- c->fmc->free_size, c->fmc->dirty_size, c->fmc->sector_size));
-
- /* If there's not enough dirty space to free a block, there's no point. */
- if (c->fmc->dirty_size < c->fmc->sector_size) {
- D2(printk(KERN_NOTICE "thread_should_wake(): Not waking. Insufficient dirty space\n"));
- return 0;
- }
-#if 1
- /* If there is too much RAM used by the various structures, GC */
- if (jffs_get_node_inuse() > (c->fmc->used_size/c->fmc->max_chunk_size * 5 + jffs_get_file_count() * 2 + 50)) {
- /* FIXME: Provide proof that this test can be satisfied. We
- don't want a filesystem doing endless GC just because this
- condition cannot ever be false.
- */
- D2(printk(KERN_NOTICE "thread_should_wake(): Waking due to number of nodes\n"));
- return 1;
- }
-#endif
- /* If there are fewer free bytes than the threshold, GC */
- if (c->fmc->free_size < c->gc_minfree_threshold) {
- D2(printk(KERN_NOTICE "thread_should_wake(): Waking due to insufficent free space\n"));
- return 1;
- }
- /* If there are more dirty bytes than the threshold, GC */
- if (c->fmc->dirty_size > c->gc_maxdirty_threshold) {
- D2(printk(KERN_NOTICE "thread_should_wake(): Waking due to excessive dirty space\n"));
- return 1;
- }
- /* FIXME: What about the "There are many versions of a node" condition? */
-
- return 0;
-}
-
-
-void jffs_garbage_collect_trigger(struct jffs_control *c)
-{
- /* NOTE: We rely on the fact that we have the BKL here.
- * Otherwise, the gc_task could go away between the check
- * and the wake_up_process()
- */
- if (c->gc_task && thread_should_wake(c))
- send_sig(SIGHUP, c->gc_task, 1);
-}
-
-
-/* Kernel threads take (void *) as arguments. Thus we pass
- the jffs_control data as a (void *) and then cast it. */
-int
-jffs_garbage_collect_thread(void *ptr)
-{
- struct jffs_control *c = (struct jffs_control *) ptr;
- struct jffs_fmcontrol *fmc = c->fmc;
- long erased;
- int result = 0;
- D1(int i = 1);
-
- daemonize("jffs_gcd");
-
- c->gc_task = current;
-
- lock_kernel();
- init_completion(&c->gc_thread_comp); /* barrier */
- spin_lock_irq(&current->sighand->siglock);
- siginitsetinv (&current->blocked, sigmask(SIGHUP) | sigmask(SIGKILL) | sigmask(SIGSTOP) | sigmask(SIGCONT));
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
-
- D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): Starting infinite loop.\n"));
-
- for (;;) {
-
- /* See if we need to start gc. If we don't, go to sleep.
-
- Current implementation is a BAD THING(tm). If we try
- to unmount the FS, the unmount operation will sleep waiting
- for this thread to exit. We need to arrange to send it a
- sig before the umount process sleeps.
- */
-
- if (!thread_should_wake(c))
- set_current_state (TASK_INTERRUPTIBLE);
-
- schedule(); /* Yes, we do this even if we want to go
- on immediately - we're a low priority
- background task. */
-
- /* Put_super will send a SIGKILL and then wait on the sem.
- */
- while (signal_pending(current)) {
- siginfo_t info;
- unsigned long signr = 0;
-
- if (try_to_freeze())
- continue;
-
- spin_lock_irq(&current->sighand->siglock);
- signr = dequeue_signal(current, &current->blocked, &info);
- spin_unlock_irq(&current->sighand->siglock);
-
- switch(signr) {
- case SIGSTOP:
- D1(printk("jffs_garbage_collect_thread(): SIGSTOP received.\n"));
- set_current_state(TASK_STOPPED);
- schedule();
- break;
-
- case SIGKILL:
- D1(printk("jffs_garbage_collect_thread(): SIGKILL received.\n"));
- c->gc_task = NULL;
- complete_and_exit(&c->gc_thread_comp, 0);
- }
- }
-
-
- D1(printk (KERN_NOTICE "jffs_garbage_collect_thread(): collecting.\n"));
-
- D3(printk (KERN_NOTICE "g_c_thread(): down biglock\n"));
- mutex_lock(&fmc->biglock);
-
- D1(printk("***jffs_garbage_collect_thread(): round #%u, "
- "fmc->dirty_size = %u\n", i++, fmc->dirty_size));
- D2(jffs_print_fmcontrol(fmc));
-
- if ((erased = jffs_try_to_erase(c)) < 0) {
- printk(KERN_WARNING "JFFS: Error in "
- "garbage collector: %ld.\n", erased);
- }
-
- if (erased)
- goto gc_end;
-
- if (fmc->free_size == 0) {
- /* Argh. Might as well commit suicide. */
- printk(KERN_ERR "jffs_garbage_collect_thread(): free_size == 0. This is BAD.\n");
- send_sig(SIGQUIT, c->gc_task, 1);
- // panic()
- goto gc_end;
- }
-
- /* Let's dare to make a garbage collect. */
- if ((result = jffs_garbage_collect_next(c)) < 0) {
- printk(KERN_ERR "JFFS: Something "
- "has gone seriously wrong "
- "with a garbage collect: %d\n", result);
- }
-
- gc_end:
- D3(printk (KERN_NOTICE "g_c_thread(): up biglock\n"));
- mutex_unlock(&fmc->biglock);
- } /* for (;;) */
-} /* jffs_garbage_collect_thread() */
diff --git a/fs/jffs/intrep.h b/fs/jffs/intrep.h
deleted file mode 100644
index 5c7abe0e2695..000000000000
--- a/fs/jffs/intrep.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 1999, 2000 Axis Communications AB.
- *
- * Created by Finn Hakansson <finn@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: intrep.h,v 1.14 2001/09/23 23:28:37 dwmw2 Exp $
- *
- */
-
-#ifndef __LINUX_JFFS_INTREP_H__
-#define __LINUX_JFFS_INTREP_H__
-#include "jffs_fm.h"
-struct jffs_node *jffs_alloc_node(void);
-void jffs_free_node(struct jffs_node *n);
-int jffs_get_node_inuse(void);
-
-void jffs_cleanup_control(struct jffs_control *c);
-int jffs_build_fs(struct super_block *sb);
-
-int jffs_insert_node(struct jffs_control *c, struct jffs_file *f,
- const struct jffs_raw_inode *raw_inode,
- const char *name, struct jffs_node *node);
-struct jffs_file *jffs_find_file(struct jffs_control *c, __u32 ino);
-struct jffs_file *jffs_find_child(struct jffs_file *dir, const char *name, int len);
-
-void jffs_free_node(struct jffs_node *node);
-
-int jffs_foreach_file(struct jffs_control *c, int (*func)(struct jffs_file *));
-int jffs_possibly_delete_file(struct jffs_file *f);
-int jffs_insert_file_into_tree(struct jffs_file *f);
-int jffs_unlink_file_from_tree(struct jffs_file *f);
-int jffs_file_count(struct jffs_file *f);
-
-int jffs_write_node(struct jffs_control *c, struct jffs_node *node,
- struct jffs_raw_inode *raw_inode,
- const char *name, const unsigned char *buf,
- int recoverable, struct jffs_file *f);
-int jffs_read_data(struct jffs_file *f, unsigned char *buf, __u32 read_offset, __u32 size);
-
-/* Garbage collection stuff. */
-int jffs_garbage_collect_thread(void *c);
-void jffs_garbage_collect_trigger(struct jffs_control *c);
-
-/* For debugging purposes. */
-#if 0
-int jffs_print_file(struct jffs_file *f);
-#endif /* 0 */
-void jffs_print_hash_table(struct jffs_control *c);
-void jffs_print_tree(struct jffs_file *first_file, int indent);
-
-#endif /* __LINUX_JFFS_INTREP_H__ */
diff --git a/fs/jffs/jffs_fm.c b/fs/jffs/jffs_fm.c
deleted file mode 100644
index 5a95fbdd6fdb..000000000000
--- a/fs/jffs/jffs_fm.c
+++ /dev/null
@@ -1,798 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 1999, 2000 Axis Communications AB.
- *
- * Created by Finn Hakansson <finn@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: jffs_fm.c,v 1.27 2001/09/20 12:29:47 dwmw2 Exp $
- *
- * Ported to Linux 2.3.x and MTD:
- * Copyright (C) 2000 Alexander Larsson (alex@cendio.se), Cendio Systems AB
- *
- */
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/blkdev.h>
-#include <linux/jffs.h>
-#include "jffs_fm.h"
-#include "intrep.h"
-
-#if defined(JFFS_MARK_OBSOLETE) && JFFS_MARK_OBSOLETE
-static int jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset);
-#endif
-
-static struct jffs_fm *jffs_alloc_fm(void);
-static void jffs_free_fm(struct jffs_fm *n);
-
-extern struct kmem_cache *fm_cache;
-extern struct kmem_cache *node_cache;
-
-#if CONFIG_JFFS_FS_VERBOSE > 0
-void
-jffs_print_fmcontrol(struct jffs_fmcontrol *fmc)
-{
- D(printk("struct jffs_fmcontrol: 0x%p\n", fmc));
- D(printk("{\n"));
- D(printk(" %u, /* flash_size */\n", fmc->flash_size));
- D(printk(" %u, /* used_size */\n", fmc->used_size));
- D(printk(" %u, /* dirty_size */\n", fmc->dirty_size));
- D(printk(" %u, /* free_size */\n", fmc->free_size));
- D(printk(" %u, /* sector_size */\n", fmc->sector_size));
- D(printk(" %u, /* min_free_size */\n", fmc->min_free_size));
- D(printk(" %u, /* max_chunk_size */\n", fmc->max_chunk_size));
- D(printk(" 0x%p, /* mtd */\n", fmc->mtd));
- D(printk(" 0x%p, /* head */ "
- "(head->offset = 0x%08x)\n",
- fmc->head, (fmc->head ? fmc->head->offset : 0)));
- D(printk(" 0x%p, /* tail */ "
- "(tail->offset + tail->size = 0x%08x)\n",
- fmc->tail,
- (fmc->tail ? fmc->tail->offset + fmc->tail->size : 0)));
- D(printk(" 0x%p, /* head_extra */\n", fmc->head_extra));
- D(printk(" 0x%p, /* tail_extra */\n", fmc->tail_extra));
- D(printk("}\n"));
-}
-#endif /* CONFIG_JFFS_FS_VERBOSE > 0 */
-
-#if CONFIG_JFFS_FS_VERBOSE > 2
-static void
-jffs_print_fm(struct jffs_fm *fm)
-{
- D(printk("struct jffs_fm: 0x%p\n", fm));
- D(printk("{\n"));
- D(printk(" 0x%08x, /* offset */\n", fm->offset));
- D(printk(" %u, /* size */\n", fm->size));
- D(printk(" 0x%p, /* prev */\n", fm->prev));
- D(printk(" 0x%p, /* next */\n", fm->next));
- D(printk(" 0x%p, /* nodes */\n", fm->nodes));
- D(printk("}\n"));
-}
-#endif /* CONFIG_JFFS_FS_VERBOSE > 2 */
-
-#if 0
-void
-jffs_print_node_ref(struct jffs_node_ref *ref)
-{
- D(printk("struct jffs_node_ref: 0x%p\n", ref));
- D(printk("{\n"));
- D(printk(" 0x%p, /* node */\n", ref->node));
- D(printk(" 0x%p, /* next */\n", ref->next));
- D(printk("}\n"));
-}
-#endif /* 0 */
-
-/* This function creates a new shiny flash memory control structure. */
-struct jffs_fmcontrol *
-jffs_build_begin(struct jffs_control *c, int unit)
-{
- struct jffs_fmcontrol *fmc;
- struct mtd_info *mtd;
-
- D3(printk("jffs_build_begin()\n"));
- fmc = kmalloc(sizeof(*fmc), GFP_KERNEL);
- if (!fmc) {
- D(printk("jffs_build_begin(): Allocation of "
- "struct jffs_fmcontrol failed!\n"));
- return (struct jffs_fmcontrol *)0;
- }
- DJM(no_jffs_fmcontrol++);
-
- mtd = get_mtd_device(NULL, unit);
-
- if (IS_ERR(mtd)) {
- kfree(fmc);
- DJM(no_jffs_fmcontrol--);
- return NULL;
- }
-
- /* Retrieve the size of the flash memory. */
- fmc->flash_size = mtd->size;
- D3(printk(" fmc->flash_size = %d bytes\n", fmc->flash_size));
-
- fmc->used_size = 0;
- fmc->dirty_size = 0;
- fmc->free_size = mtd->size;
- fmc->sector_size = mtd->erasesize;
- fmc->max_chunk_size = fmc->sector_size >> 1;
- /* min_free_size:
- 1 sector, obviously.
- + 1 x max_chunk_size, for when a nodes overlaps the end of a sector
- + 1 x max_chunk_size again, which ought to be enough to handle
- the case where a rename causes a name to grow, and GC has
- to write out larger nodes than the ones it's obsoleting.
- We should fix it so it doesn't have to write the name
- _every_ time. Later.
- + another 2 sectors because people keep getting GC stuck and
- we don't know why. This scares me - I want formal proof
- of correctness of whatever number we put here. dwmw2.
- */
- fmc->min_free_size = fmc->sector_size << 2;
- fmc->mtd = mtd;
- fmc->c = c;
- fmc->head = NULL;
- fmc->tail = NULL;
- fmc->head_extra = NULL;
- fmc->tail_extra = NULL;
- mutex_init(&fmc->biglock);
- return fmc;
-}
-
-
-/* When the flash memory scan has completed, this function should be called
- before use of the control structure. */
-void
-jffs_build_end(struct jffs_fmcontrol *fmc)
-{
- D3(printk("jffs_build_end()\n"));
-
- if (!fmc->head) {
- fmc->head = fmc->head_extra;
- fmc->tail = fmc->tail_extra;
- }
- else if (fmc->head_extra) {
- fmc->tail_extra->next = fmc->head;
- fmc->head->prev = fmc->tail_extra;
- fmc->head = fmc->head_extra;
- }
- fmc->head_extra = NULL; /* These two instructions should be omitted. */
- fmc->tail_extra = NULL;
- D3(jffs_print_fmcontrol(fmc));
-}
-
-
-/* Call this function when the file system is unmounted. This function
- frees all memory used by this module. */
-void
-jffs_cleanup_fmcontrol(struct jffs_fmcontrol *fmc)
-{
- if (fmc) {
- struct jffs_fm *next = fmc->head;
- while (next) {
- struct jffs_fm *cur = next;
- next = next->next;
- jffs_free_fm(cur);
- }
- put_mtd_device(fmc->mtd);
- kfree(fmc);
- DJM(no_jffs_fmcontrol--);
- }
-}
-
-
-/* This function returns the size of the first chunk of free space on the
- flash memory. This function will return something nonzero if the flash
- memory contains any free space. */
-__u32
-jffs_free_size1(struct jffs_fmcontrol *fmc)
-{
- __u32 head;
- __u32 tail;
- __u32 end = fmc->flash_size;
-
- if (!fmc->head) {
- /* There is nothing on the flash. */
- return fmc->flash_size;
- }
-
- /* Compute the beginning and ending of the contents of the flash. */
- head = fmc->head->offset;
- tail = fmc->tail->offset + fmc->tail->size;
- if (tail == end) {
- tail = 0;
- }
- ASSERT(else if (tail > end) {
- printk(KERN_WARNING "jffs_free_size1(): tail > end\n");
- tail = 0;
- });
-
- if (head <= tail) {
- return end - tail;
- }
- else {
- return head - tail;
- }
-}
-
-/* This function will return something nonzero in case there are two free
- areas on the flash. Like this:
-
- +----------------+------------------+----------------+
- | FREE 1 | USED / DIRTY | FREE 2 |
- +----------------+------------------+----------------+
- fmc->head -----^
- fmc->tail ------------------------^
-
- The value returned, will be the size of the first empty area on the
- flash, in this case marked "FREE 1". */
-__u32
-jffs_free_size2(struct jffs_fmcontrol *fmc)
-{
- if (fmc->head) {
- __u32 head = fmc->head->offset;
- __u32 tail = fmc->tail->offset + fmc->tail->size;
- if (tail == fmc->flash_size) {
- tail = 0;
- }
-
- if (tail >= head) {
- return head;
- }
- }
- return 0;
-}
-
-
-/* Allocate a chunk of flash memory. If there is enough space on the
- device, a reference to the associated node is stored in the jffs_fm
- struct. */
-int
-jffs_fmalloc(struct jffs_fmcontrol *fmc, __u32 size, struct jffs_node *node,
- struct jffs_fm **result)
-{
- struct jffs_fm *fm;
- __u32 free_chunk_size1;
- __u32 free_chunk_size2;
-
- D2(printk("jffs_fmalloc(): fmc = 0x%p, size = %d, "
- "node = 0x%p\n", fmc, size, node));
-
- *result = NULL;
-
- if (!(fm = jffs_alloc_fm())) {
- D(printk("jffs_fmalloc(): kmalloc() failed! (fm)\n"));
- return -ENOMEM;
- }
-
- free_chunk_size1 = jffs_free_size1(fmc);
- free_chunk_size2 = jffs_free_size2(fmc);
- if (free_chunk_size1 + free_chunk_size2 != fmc->free_size) {
- printk(KERN_WARNING "Free size accounting screwed\n");
- printk(KERN_WARNING "free_chunk_size1 == 0x%x, free_chunk_size2 == 0x%x, fmc->free_size == 0x%x\n", free_chunk_size1, free_chunk_size2, fmc->free_size);
- }
-
- D3(printk("jffs_fmalloc(): free_chunk_size1 = %u, "
- "free_chunk_size2 = %u\n",
- free_chunk_size1, free_chunk_size2));
-
- if (size <= free_chunk_size1) {
- if (!(fm->nodes = (struct jffs_node_ref *)
- kmalloc(sizeof(struct jffs_node_ref),
- GFP_KERNEL))) {
- D(printk("jffs_fmalloc(): kmalloc() failed! "
- "(node_ref)\n"));
- jffs_free_fm(fm);
- return -ENOMEM;
- }
- DJM(no_jffs_node_ref++);
- fm->nodes->node = node;
- fm->nodes->next = NULL;
- if (fmc->tail) {
- fm->offset = fmc->tail->offset + fmc->tail->size;
- if (fm->offset == fmc->flash_size) {
- fm->offset = 0;
- }
- ASSERT(else if (fm->offset > fmc->flash_size) {
- printk(KERN_WARNING "jffs_fmalloc(): "
- "offset > flash_end\n");
- fm->offset = 0;
- });
- }
- else {
- /* There don't have to be files in the file
- system yet. */
- fm->offset = 0;
- }
- fm->size = size;
- fmc->free_size -= size;
- fmc->used_size += size;
- }
- else if (size > free_chunk_size2) {
- printk(KERN_WARNING "JFFS: Tried to allocate a too "
- "large flash memory chunk. (size = %u)\n", size);
- jffs_free_fm(fm);
- return -ENOSPC;
- }
- else {
- fm->offset = fmc->tail->offset + fmc->tail->size;
- fm->size = free_chunk_size1;
- fm->nodes = NULL;
- fmc->free_size -= fm->size;
- fmc->dirty_size += fm->size; /* Changed by simonk. This seemingly fixes a
- bug that caused infinite garbage collection.
- It previously set fmc->dirty_size to size (which is the
- size of the requested chunk).
- */
- }
-
- fm->next = NULL;
- if (!fmc->head) {
- fm->prev = NULL;
- fmc->head = fm;
- fmc->tail = fm;
- }
- else {
- fm->prev = fmc->tail;
- fmc->tail->next = fm;
- fmc->tail = fm;
- }
-
- D3(jffs_print_fmcontrol(fmc));
- D3(jffs_print_fm(fm));
- *result = fm;
- return 0;
-}
-
-
-/* The on-flash space is not needed anymore by the passed node. Remove
- the reference to the node from the node list. If the data chunk in
- the flash memory isn't used by any more nodes anymore (fm->nodes == 0),
- then mark that chunk as dirty. */
-int
-jffs_fmfree(struct jffs_fmcontrol *fmc, struct jffs_fm *fm, struct jffs_node *node)
-{
- struct jffs_node_ref *ref;
- struct jffs_node_ref *prev;
- ASSERT(int del = 0);
-
- D2(printk("jffs_fmfree(): node->ino = %u, node->version = %u\n",
- node->ino, node->version));
-
- ASSERT(if (!fmc || !fm || !fm->nodes) {
- printk(KERN_ERR "jffs_fmfree(): fmc: 0x%p, fm: 0x%p, "
- "fm->nodes: 0x%p\n",
- fmc, fm, (fm ? fm->nodes : NULL));
- return -1;
- });
-
- /* Find the reference to the node that is going to be removed
- and remove it. */
- for (ref = fm->nodes, prev = NULL; ref; ref = ref->next) {
- if (ref->node == node) {
- if (prev) {
- prev->next = ref->next;
- }
- else {
- fm->nodes = ref->next;
- }
- kfree(ref);
- DJM(no_jffs_node_ref--);
- ASSERT(del = 1);
- break;
- }
- prev = ref;
- }
-
- /* If the data chunk in the flash memory isn't used anymore
- just mark it as obsolete. */
- if (!fm->nodes) {
- /* No node uses this chunk so let's remove it. */
- fmc->used_size -= fm->size;
- fmc->dirty_size += fm->size;
-#if defined(JFFS_MARK_OBSOLETE) && JFFS_MARK_OBSOLETE
- if (jffs_mark_obsolete(fmc, fm->offset) < 0) {
- D1(printk("jffs_fmfree(): Failed to mark an on-flash "
- "node obsolete!\n"));
- return -1;
- }
-#endif
- }
-
- ASSERT(if (!del) {
- printk(KERN_WARNING "***jffs_fmfree(): "
- "Didn't delete any node reference!\n");
- });
-
- return 0;
-}
-
-
-/* This allocation function is used during the initialization of
- the file system. */
-struct jffs_fm *
-jffs_fmalloced(struct jffs_fmcontrol *fmc, __u32 offset, __u32 size,
- struct jffs_node *node)
-{
- struct jffs_fm *fm;
-
- D3(printk("jffs_fmalloced()\n"));
-
- if (!(fm = jffs_alloc_fm())) {
- D(printk("jffs_fmalloced(0x%p, %u, %u, 0x%p): failed!\n",
- fmc, offset, size, node));
- return NULL;
- }
- fm->offset = offset;
- fm->size = size;
- fm->prev = NULL;
- fm->next = NULL;
- fm->nodes = NULL;
- if (node) {
- /* `node' exists and it should be associated with the
- jffs_fm structure `fm'. */
- if (!(fm->nodes = (struct jffs_node_ref *)
- kmalloc(sizeof(struct jffs_node_ref),
- GFP_KERNEL))) {
- D(printk("jffs_fmalloced(): !fm->nodes\n"));
- jffs_free_fm(fm);
- return NULL;
- }
- DJM(no_jffs_node_ref++);
- fm->nodes->node = node;
- fm->nodes->next = NULL;
- fmc->used_size += size;
- fmc->free_size -= size;
- }
- else {
- /* If there is no node, then this is just a chunk of dirt. */
- fmc->dirty_size += size;
- fmc->free_size -= size;
- }
-
- if (fmc->head_extra) {
- fm->prev = fmc->tail_extra;
- fmc->tail_extra->next = fm;
- fmc->tail_extra = fm;
- }
- else if (!fmc->head) {
- fmc->head = fm;
- fmc->tail = fm;
- }
- else if (fmc->tail->offset + fmc->tail->size < offset) {
- fmc->head_extra = fm;
- fmc->tail_extra = fm;
- }
- else {
- fm->prev = fmc->tail;
- fmc->tail->next = fm;
- fmc->tail = fm;
- }
- D3(jffs_print_fmcontrol(fmc));
- D3(jffs_print_fm(fm));
- return fm;
-}
-
-
-/* Add a new node to an already existing jffs_fm struct. */
-int
-jffs_add_node(struct jffs_node *node)
-{
- struct jffs_node_ref *ref;
-
- D3(printk("jffs_add_node(): ino = %u\n", node->ino));
-
- ref = kmalloc(sizeof(*ref), GFP_KERNEL);
- if (!ref)
- return -ENOMEM;
-
- DJM(no_jffs_node_ref++);
- ref->node = node;
- ref->next = node->fm->nodes;
- node->fm->nodes = ref;
- return 0;
-}
-
-
-/* Free a part of some allocated space. */
-void
-jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm, __u32 size)
-{
- D1(printk("***jffs_fmfree_partly(): fm = 0x%p, fm->nodes = 0x%p, "
- "fm->nodes->node->ino = %u, size = %u\n",
- fm, (fm ? fm->nodes : 0),
- (!fm ? 0 : (!fm->nodes ? 0 : fm->nodes->node->ino)), size));
-
- if (fm->nodes) {
- kfree(fm->nodes);
- DJM(no_jffs_node_ref--);
- fm->nodes = NULL;
- }
- fmc->used_size -= fm->size;
- if (fm == fmc->tail) {
- fm->size -= size;
- fmc->free_size += size;
- }
- fmc->dirty_size += fm->size;
-}
-
-
-/* Find the jffs_fm struct that contains the end of the data chunk that
- begins at the logical beginning of the flash memory and spans `size'
- bytes. If we want to erase a sector of the flash memory, we use this
- function to find where the sector limit cuts a chunk of data. */
-struct jffs_fm *
-jffs_cut_node(struct jffs_fmcontrol *fmc, __u32 size)
-{
- struct jffs_fm *fm;
- __u32 pos = 0;
-
- if (size == 0) {
- return NULL;
- }
-
- ASSERT(if (!fmc) {
- printk(KERN_ERR "jffs_cut_node(): fmc == NULL\n");
- return NULL;
- });
-
- fm = fmc->head;
-
- while (fm) {
- pos += fm->size;
- if (pos < size) {
- fm = fm->next;
- }
- else if (pos > size) {
- break;
- }
- else {
- fm = NULL;
- break;
- }
- }
-
- return fm;
-}
-
-
-/* Move the head of the fmc structures and delete the obsolete parts. */
-void
-jffs_sync_erase(struct jffs_fmcontrol *fmc, int erased_size)
-{
- struct jffs_fm *fm;
- struct jffs_fm *del;
-
- ASSERT(if (!fmc) {
- printk(KERN_ERR "jffs_sync_erase(): fmc == NULL\n");
- return;
- });
-
- fmc->dirty_size -= erased_size;
- fmc->free_size += erased_size;
-
- for (fm = fmc->head; fm && (erased_size > 0);) {
- if (erased_size >= fm->size) {
- erased_size -= fm->size;
- del = fm;
- fm = fm->next;
- fm->prev = NULL;
- fmc->head = fm;
- jffs_free_fm(del);
- }
- else {
- fm->size -= erased_size;
- fm->offset += erased_size;
- break;
- }
- }
-}
-
-
-/* Return the oldest used node in the flash memory. */
-struct jffs_node *
-jffs_get_oldest_node(struct jffs_fmcontrol *fmc)
-{
- struct jffs_fm *fm;
- struct jffs_node_ref *nref;
- struct jffs_node *node = NULL;
-
- ASSERT(if (!fmc) {
- printk(KERN_ERR "jffs_get_oldest_node(): fmc == NULL\n");
- return NULL;
- });
-
- for (fm = fmc->head; fm && !fm->nodes; fm = fm->next);
-
- if (!fm) {
- return NULL;
- }
-
- /* The oldest node is the last one in the reference list. This list
- shouldn't be too long; just one or perhaps two elements. */
- for (nref = fm->nodes; nref; nref = nref->next) {
- node = nref->node;
- }
-
- D2(printk("jffs_get_oldest_node(): ino = %u, version = %u\n",
- (node ? node->ino : 0), (node ? node->version : 0)));
-
- return node;
-}
-
-
-#if defined(JFFS_MARK_OBSOLETE) && JFFS_MARK_OBSOLETE
-
-/* Mark an on-flash node as obsolete.
-
- Note that this is just an optimization that isn't necessary for the
- filesystem to work. */
-
-static int
-jffs_mark_obsolete(struct jffs_fmcontrol *fmc, __u32 fm_offset)
-{
- /* The `accurate_pos' holds the position of the accurate byte
- in the jffs_raw_inode structure that we are going to mark
- as obsolete. */
- __u32 accurate_pos = fm_offset + JFFS_RAW_INODE_ACCURATE_OFFSET;
- unsigned char zero = 0x00;
- size_t len;
-
- D3(printk("jffs_mark_obsolete(): accurate_pos = %u\n", accurate_pos));
- ASSERT(if (!fmc) {
- printk(KERN_ERR "jffs_mark_obsolete(): fmc == NULL\n");
- return -1;
- });
-
- /* Write 0x00 to the raw inode's accurate member. Don't care
- about the return value. */
- MTD_WRITE(fmc->mtd, accurate_pos, 1, &len, &zero);
- return 0;
-}
-
-#endif /* JFFS_MARK_OBSOLETE */
-
-/* check if it's possible to erase the wanted range, and if not, return
- * the range that IS erasable, or a negative error code.
- */
-static long
-jffs_flash_erasable_size(struct mtd_info *mtd, __u32 offset, __u32 size)
-{
- u_long ssize;
-
- /* assume that sector size for a partition is constant even
- * if it spans more than one chip (you usually put the same
- * type of chips in a system)
- */
-
- ssize = mtd->erasesize;
-
- if (offset % ssize) {
- printk(KERN_WARNING "jffs_flash_erasable_size() given non-aligned offset %x (erasesize %lx)\n", offset, ssize);
- /* The offset is not sector size aligned. */
- return -1;
- }
- else if (offset > mtd->size) {
- printk(KERN_WARNING "jffs_flash_erasable_size given offset off the end of device (%x > %x)\n", offset, mtd->size);
- return -2;
- }
- else if (offset + size > mtd->size) {
- printk(KERN_WARNING "jffs_flash_erasable_size() given length which runs off the end of device (ofs %x + len %x = %x, > %x)\n", offset,size, offset+size, mtd->size);
- return -3;
- }
-
- return (size / ssize) * ssize;
-}
-
-
-/* How much dirty flash memory is possible to erase at the moment? */
-long
-jffs_erasable_size(struct jffs_fmcontrol *fmc)
-{
- struct jffs_fm *fm;
- __u32 size = 0;
- long ret;
-
- ASSERT(if (!fmc) {
- printk(KERN_ERR "jffs_erasable_size(): fmc = NULL\n");
- return -1;
- });
-
- if (!fmc->head) {
- /* The flash memory is totally empty. No nodes. No dirt.
- Just return. */
- return 0;
- }
-
- /* Calculate how much space that is dirty. */
- for (fm = fmc->head; fm && !fm->nodes; fm = fm->next) {
- if (size && fm->offset == 0) {
- /* We have reached the beginning of the flash. */
- break;
- }
- size += fm->size;
- }
-
- /* Someone's signature contained this:
- There's a fine line between fishing and just standing on
- the shore like an idiot... */
- ret = jffs_flash_erasable_size(fmc->mtd, fmc->head->offset, size);
-
- ASSERT(if (ret < 0) {
- printk("jffs_erasable_size: flash_erasable_size() "
- "returned something less than zero (%ld).\n", ret);
- printk("jffs_erasable_size: offset = 0x%08x\n",
- fmc->head->offset);
- });
-
- /* If there is dirt on the flash (which is the reason to why
- this function was called in the first place) but no space is
- possible to erase right now, the initial part of the list of
- jffs_fm structs, that hold place for dirty space, could perhaps
- be shortened. The list's initial "dirty" elements are merged
- into just one large dirty jffs_fm struct. This operation must
- only be performed if nothing is possible to erase. Otherwise,
- jffs_clear_end_of_node() won't work as expected. */
- if (ret == 0) {
- struct jffs_fm *head = fmc->head;
- struct jffs_fm *del;
- /* While there are two dirty nodes beside each other.*/
- while (head->nodes == 0
- && head->next
- && head->next->nodes == 0) {
- del = head->next;
- head->size += del->size;
- head->next = del->next;
- if (del->next) {
- del->next->prev = head;
- }
- jffs_free_fm(del);
- }
- }
-
- return (ret >= 0 ? ret : 0);
-}
-
-static struct jffs_fm *jffs_alloc_fm(void)
-{
- struct jffs_fm *fm;
-
- fm = kmem_cache_alloc(fm_cache,GFP_KERNEL);
- DJM(if (fm) no_jffs_fm++;);
-
- return fm;
-}
-
-static void jffs_free_fm(struct jffs_fm *n)
-{
- kmem_cache_free(fm_cache,n);
- DJM(no_jffs_fm--);
-}
-
-
-
-struct jffs_node *jffs_alloc_node(void)
-{
- struct jffs_node *n;
-
- n = (struct jffs_node *)kmem_cache_alloc(node_cache,GFP_KERNEL);
- if(n != NULL)
- no_jffs_node++;
- return n;
-}
-
-void jffs_free_node(struct jffs_node *n)
-{
- kmem_cache_free(node_cache,n);
- no_jffs_node--;
-}
-
-
-int jffs_get_node_inuse(void)
-{
- return no_jffs_node;
-}
diff --git a/fs/jffs/jffs_fm.h b/fs/jffs/jffs_fm.h
deleted file mode 100644
index 9ee6ad29eff5..000000000000
--- a/fs/jffs/jffs_fm.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 1999, 2000 Axis Communications AB.
- *
- * Created by Finn Hakansson <finn@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: jffs_fm.h,v 1.13 2001/01/11 12:03:25 dwmw2 Exp $
- *
- * Ported to Linux 2.3.x and MTD:
- * Copyright (C) 2000 Alexander Larsson (alex@cendio.se), Cendio Systems AB
- *
- */
-
-#ifndef __LINUX_JFFS_FM_H__
-#define __LINUX_JFFS_FM_H__
-
-#include <linux/types.h>
-#include <linux/jffs.h>
-#include <linux/mtd/mtd.h>
-#include <linux/mutex.h>
-
-/* The alignment between two nodes in the flash memory. */
-#define JFFS_ALIGN_SIZE 4
-
-/* Mark the on-flash space as obsolete when appropriate. */
-#define JFFS_MARK_OBSOLETE 0
-
-#ifndef CONFIG_JFFS_FS_VERBOSE
-#define CONFIG_JFFS_FS_VERBOSE 1
-#endif
-
-#if CONFIG_JFFS_FS_VERBOSE > 0
-#define D(x) x
-#define D1(x) D(x)
-#else
-#define D(x)
-#define D1(x)
-#endif
-
-#if CONFIG_JFFS_FS_VERBOSE > 1
-#define D2(x) D(x)
-#else
-#define D2(x)
-#endif
-
-#if CONFIG_JFFS_FS_VERBOSE > 2
-#define D3(x) D(x)
-#else
-#define D3(x)
-#endif
-
-#define ASSERT(x) x
-
-/* How many padding bytes should be inserted between two chunks of data
- on the flash? */
-#define JFFS_GET_PAD_BYTES(size) ( (JFFS_ALIGN_SIZE-1) & -(__u32)(size) )
-#define JFFS_PAD(size) ( (size + (JFFS_ALIGN_SIZE-1)) & ~(JFFS_ALIGN_SIZE-1) )
-
-
-
-struct jffs_node_ref
-{
- struct jffs_node *node;
- struct jffs_node_ref *next;
-};
-
-
-/* The struct jffs_fm represents a chunk of data in the flash memory. */
-struct jffs_fm
-{
- __u32 offset;
- __u32 size;
- struct jffs_fm *prev;
- struct jffs_fm *next;
- struct jffs_node_ref *nodes; /* USED if != 0. */
-};
-
-struct jffs_fmcontrol
-{
- __u32 flash_size;
- __u32 used_size;
- __u32 dirty_size;
- __u32 free_size;
- __u32 sector_size;
- __u32 min_free_size; /* The minimum free space needed to be able
- to perform garbage collections. */
- __u32 max_chunk_size; /* The maximum size of a chunk of data. */
- struct mtd_info *mtd;
- struct jffs_control *c;
- struct jffs_fm *head;
- struct jffs_fm *tail;
- struct jffs_fm *head_extra;
- struct jffs_fm *tail_extra;
- struct mutex biglock;
-};
-
-/* Notice the two members head_extra and tail_extra in the jffs_control
- structure above. Those are only used during the scanning of the flash
- memory; while the file system is being built. If the data in the flash
- memory is organized like
-
- +----------------+------------------+----------------+
- | USED / DIRTY | FREE | USED / DIRTY |
- +----------------+------------------+----------------+
-
- then the scan is split in two parts. The first scanned part of the
- flash memory is organized through the members head and tail. The
- second scanned part is organized with head_extra and tail_extra. When
- the scan is completed, the two lists are merged together. The jffs_fm
- struct that head_extra references is the logical beginning of the
- flash memory so it will be referenced by the head member. */
-
-
-
-struct jffs_fmcontrol *jffs_build_begin(struct jffs_control *c, int unit);
-void jffs_build_end(struct jffs_fmcontrol *fmc);
-void jffs_cleanup_fmcontrol(struct jffs_fmcontrol *fmc);
-
-int jffs_fmalloc(struct jffs_fmcontrol *fmc, __u32 size,
- struct jffs_node *node, struct jffs_fm **result);
-int jffs_fmfree(struct jffs_fmcontrol *fmc, struct jffs_fm *fm,
- struct jffs_node *node);
-
-__u32 jffs_free_size1(struct jffs_fmcontrol *fmc);
-__u32 jffs_free_size2(struct jffs_fmcontrol *fmc);
-void jffs_sync_erase(struct jffs_fmcontrol *fmc, int erased_size);
-struct jffs_fm *jffs_cut_node(struct jffs_fmcontrol *fmc, __u32 size);
-struct jffs_node *jffs_get_oldest_node(struct jffs_fmcontrol *fmc);
-long jffs_erasable_size(struct jffs_fmcontrol *fmc);
-struct jffs_fm *jffs_fmalloced(struct jffs_fmcontrol *fmc, __u32 offset,
- __u32 size, struct jffs_node *node);
-int jffs_add_node(struct jffs_node *node);
-void jffs_fmfree_partly(struct jffs_fmcontrol *fmc, struct jffs_fm *fm,
- __u32 size);
-
-#if CONFIG_JFFS_FS_VERBOSE > 0
-void jffs_print_fmcontrol(struct jffs_fmcontrol *fmc);
-#endif
-#if 0
-void jffs_print_node_ref(struct jffs_node_ref *ref);
-#endif /* 0 */
-
-#endif /* __LINUX_JFFS_FM_H__ */
diff --git a/fs/jffs/jffs_proc.c b/fs/jffs/jffs_proc.c
deleted file mode 100644
index 9bdd99a557c2..000000000000
--- a/fs/jffs/jffs_proc.c
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 2000 Axis Communications AB.
- *
- * Created by Simon Kagstrom <simonk@axis.com>.
- *
- * $Id: jffs_proc.c,v 1.5 2001/06/02 14:34:55 dwmw2 Exp $
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * Overview:
- * This file defines JFFS partition entries in the proc file system.
- *
- * TODO:
- * Create some more proc files for different kinds of info, i.e. statistics
- * about written and read bytes, number of calls to different routines,
- * reports about failures.
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/jffs.h>
-#include <linux/slab.h>
-#include <linux/proc_fs.h>
-#include <linux/time.h>
-#include <linux/types.h>
-#include "jffs_fm.h"
-#include "jffs_proc.h"
-
-/*
- * Structure for a JFFS partition in the system
- */
-struct jffs_partition_dir {
- struct jffs_control *c;
- struct proc_dir_entry *part_root;
- struct proc_dir_entry *part_info;
- struct proc_dir_entry *part_layout;
- struct jffs_partition_dir *next;
-};
-
-/*
- * Structure for top-level entry in '/proc/fs' directory
- */
-struct proc_dir_entry *jffs_proc_root;
-
-/*
- * Linked list of 'jffs_partition_dirs' to help us track
- * the mounted JFFS partitions in the system
- */
-static struct jffs_partition_dir *jffs_part_dirs;
-
-/*
- * Read functions for entries
- */
-static int jffs_proc_info_read(char *page, char **start, off_t off,
- int count, int *eof, void *data);
-static int jffs_proc_layout_read (char *page, char **start, off_t off,
- int count, int *eof, void *data);
-
-
-/*
- * Register a JFFS partition directory (called upon mount)
- */
-int jffs_register_jffs_proc_dir(int mtd, struct jffs_control *c)
-{
- struct jffs_partition_dir *part_dir;
- struct proc_dir_entry *part_info = NULL;
- struct proc_dir_entry *part_layout = NULL;
- struct proc_dir_entry *part_root = NULL;
- char name[10];
-
- sprintf(name, "%d", mtd);
- /* Allocate structure for local JFFS partition table */
- part_dir = (struct jffs_partition_dir *)
- kmalloc(sizeof (struct jffs_partition_dir), GFP_KERNEL);
- if (!part_dir)
- goto out;
-
- /* Create entry for this partition */
- part_root = proc_mkdir(name, jffs_proc_root);
- if (!part_root)
- goto out1;
-
- /* Create entry for 'info' file */
- part_info = create_proc_entry ("info", 0, part_root);
- if (!part_info)
- goto out2;
- part_info->read_proc = jffs_proc_info_read;
- part_info->data = (void *) c;
-
- /* Create entry for 'layout' file */
- part_layout = create_proc_entry ("layout", 0, part_root);
- if (!part_layout)
- goto out3;
- part_layout->read_proc = jffs_proc_layout_read;
- part_layout->data = (void *) c;
-
- /* Fill in structure for table and insert in the list */
- part_dir->c = c;
- part_dir->part_root = part_root;
- part_dir->part_info = part_info;
- part_dir->part_layout = part_layout;
- part_dir->next = jffs_part_dirs;
- jffs_part_dirs = part_dir;
-
- /* Return happy */
- return 0;
-
-out3:
- remove_proc_entry("info", part_root);
-out2:
- remove_proc_entry(name, jffs_proc_root);
-out1:
- kfree(part_dir);
-out:
- return -ENOMEM;
-}
-
-
-/*
- * Unregister a JFFS partition directory (called at umount)
- */
-int jffs_unregister_jffs_proc_dir(struct jffs_control *c)
-{
- struct jffs_partition_dir *part_dir = jffs_part_dirs;
- struct jffs_partition_dir *prev_part_dir = NULL;
-
- while (part_dir) {
- if (part_dir->c == c) {
- /* Remove entries for partition */
- remove_proc_entry (part_dir->part_info->name,
- part_dir->part_root);
- remove_proc_entry (part_dir->part_layout->name,
- part_dir->part_root);
- remove_proc_entry (part_dir->part_root->name,
- jffs_proc_root);
-
- /* Remove entry from list */
- if (prev_part_dir)
- prev_part_dir->next = part_dir->next;
- else
- jffs_part_dirs = part_dir->next;
-
- /*
- * Check to see if this is the last one
- * and remove the entry from '/proc/fs'
- * if it is.
- */
- if (jffs_part_dirs == part_dir->next)
- remove_proc_entry ("jffs", proc_root_fs);
-
- /* Free memory for entry */
- kfree(part_dir);
-
- /* Return happy */
- return 0;
- }
-
- /* Move to next entry */
- prev_part_dir = part_dir;
- part_dir = part_dir->next;
- }
-
- /* Return unhappy */
- return -1;
-}
-
-
-/*
- * Read a JFFS partition's `info' file
- */
-static int jffs_proc_info_read (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct jffs_control *c = (struct jffs_control *) data;
- int len = 0;
-
- /* Get information on the parition */
- len += sprintf (page,
- "partition size: %08lX (%u)\n"
- "sector size: %08lX (%u)\n"
- "used size: %08lX (%u)\n"
- "dirty size: %08lX (%u)\n"
- "free size: %08lX (%u)\n\n",
- (unsigned long) c->fmc->flash_size, c->fmc->flash_size,
- (unsigned long) c->fmc->sector_size, c->fmc->sector_size,
- (unsigned long) c->fmc->used_size, c->fmc->used_size,
- (unsigned long) c->fmc->dirty_size, c->fmc->dirty_size,
- (unsigned long) (c->fmc->flash_size -
- (c->fmc->used_size + c->fmc->dirty_size)),
- c->fmc->flash_size - (c->fmc->used_size + c->fmc->dirty_size));
-
- /* We're done */
- *eof = 1;
-
- /* Return length */
- return len;
-}
-
-
-/*
- * Read a JFFS partition's `layout' file
- */
-static int jffs_proc_layout_read (char *page, char **start, off_t off,
- int count, int *eof, void *data)
-{
- struct jffs_control *c = (struct jffs_control *) data;
- struct jffs_fm *fm = NULL;
- struct jffs_fm *last_fm = NULL;
- int len = 0;
-
- /* Get the first item in the list */
- fm = c->fmc->head;
-
- /* Print free space */
- if (fm && fm->offset) {
- len += sprintf (page, "00000000 %08lX free\n",
- (unsigned long) fm->offset);
- }
-
- /* Loop through all of the flash control structures */
- while (fm && (len < (off + count))) {
- if (fm->nodes) {
- len += sprintf (page + len,
- "%08lX %08lX ino=%08lX, ver=%08lX\n",
- (unsigned long) fm->offset,
- (unsigned long) fm->size,
- (unsigned long) fm->nodes->node->ino,
- (unsigned long) fm->nodes->node->version);
- }
- else {
- len += sprintf (page + len,
- "%08lX %08lX dirty\n",
- (unsigned long) fm->offset,
- (unsigned long) fm->size);
- }
- last_fm = fm;
- fm = fm->next;
- }
-
- /* Print free space */
- if ((len < (off + count)) && last_fm
- && (last_fm->offset < c->fmc->flash_size)) {
- len += sprintf (page + len,
- "%08lX %08lX free\n",
- (unsigned long) last_fm->offset +
- last_fm->size,
- (unsigned long) (c->fmc->flash_size -
- (last_fm->offset + last_fm->size)));
- }
-
- /* We're done */
- *eof = 1;
-
- /* Return length */
- return len;
-}
diff --git a/fs/jffs/jffs_proc.h b/fs/jffs/jffs_proc.h
deleted file mode 100644
index 39a1c5d162b0..000000000000
--- a/fs/jffs/jffs_proc.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * JFFS -- Journaling Flash File System, Linux implementation.
- *
- * Copyright (C) 2000 Axis Communications AB.
- *
- * Created by Simon Kagstrom <simonk@axis.com>.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * $Id: jffs_proc.h,v 1.2 2000/11/15 22:04:12 sjhill Exp $
- */
-
-/* jffs_proc.h defines a structure for inclusion in the proc-file system. */
-#ifndef __LINUX_JFFS_PROC_H__
-#define __LINUX_JFFS_PROC_H__
-
-#include <linux/proc_fs.h>
-
-/* The proc_dir_entry for jffs (defined in jffs_proc.c). */
-extern struct proc_dir_entry *jffs_proc_root;
-
-int jffs_register_jffs_proc_dir(int mtd, struct jffs_control *c);
-int jffs_unregister_jffs_proc_dir(struct jffs_control *c);
-
-#endif /* __LINUX_JFFS_PROC_H__ */
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 02826967ab58..07119c42a861 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -348,23 +348,27 @@ int jffs2_do_mount_fs(struct jffs2_sb_info *c)
ret = jffs2_sum_init(c);
if (ret)
- return ret;
+ goto out_free;
if (jffs2_build_filesystem(c)) {
dbg_fsbuild("build_fs failed\n");
jffs2_free_ino_caches(c);
jffs2_free_raw_node_refs(c);
-#ifndef __ECOS
- if (jffs2_blocks_use_vmalloc(c))
- vfree(c->blocks);
- else
-#endif
- kfree(c->blocks);
-
- return -EIO;
+ ret = -EIO;
+ goto out_free;
}
jffs2_calc_trigger_levels(c);
return 0;
+
+ out_free:
+#ifndef __ECOS
+ if (jffs2_blocks_use_vmalloc(c))
+ vfree(c->blocks);
+ else
+#endif
+ kfree(c->blocks);
+
+ return ret;
}
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 3681d0728ac7..0c1fc6e20b43 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -16,7 +16,6 @@
#endif
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/zlib.h>
#include <linux/zutil.h>
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index da6034d50718..9fa2e27f0641 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -13,7 +13,6 @@
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/crc32.h>
#include <linux/jffs2.h>
@@ -46,7 +45,7 @@ const struct file_operations jffs2_dir_operations =
};
-struct inode_operations jffs2_dir_inode_operations =
+const struct inode_operations jffs2_dir_inode_operations =
{
.create = jffs2_create,
.lookup = jffs2_lookup,
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 242875f77cb3..e82eeaf7590d 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -54,7 +54,7 @@ const struct file_operations jffs2_file_operations =
/* jffs2_file_inode_operations */
-struct inode_operations jffs2_file_inode_operations =
+const struct inode_operations jffs2_file_inode_operations =
{
.permission = jffs2_permission,
.setattr = jffs2_setattr,
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index b98594992eed..ea88f69af130 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -98,20 +98,14 @@ struct jffs2_sb_info {
uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
- /* Write-behind buffer for NAND flash */
- unsigned char *wbuf;
- unsigned char *oobbuf;
+ unsigned char *wbuf; /* Write-behind buffer for NAND flash */
uint32_t wbuf_ofs;
uint32_t wbuf_len;
struct jffs2_inodirty *wbuf_inodes;
-
struct rw_semaphore wbuf_sem; /* Protects the write buffer */
- /* Information about out-of-band area usage... */
- struct nand_ecclayout *ecclayout;
- uint32_t badblock_pos;
- uint32_t fsdata_pos;
- uint32_t fsdata_len;
+ unsigned char *oobbuf;
+ int oobavail; /* How many bytes are available for JFFS2 in OOB */
#endif
struct jffs2_summary *summary; /* Summary information */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 9f41fc01a371..e07a0edcdb4f 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -153,11 +153,11 @@ void jffs2_garbage_collect_trigger(struct jffs2_sb_info *c);
/* dir.c */
extern const struct file_operations jffs2_dir_operations;
-extern struct inode_operations jffs2_dir_inode_operations;
+extern const struct inode_operations jffs2_dir_inode_operations;
/* file.c */
extern const struct file_operations jffs2_file_operations;
-extern struct inode_operations jffs2_file_inode_operations;
+extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
int jffs2_fsync(struct file *, struct dentry *, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
@@ -166,7 +166,7 @@ int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long);
/* symlink.c */
-extern struct inode_operations jffs2_symlink_inode_operations;
+extern const struct inode_operations jffs2_symlink_inode_operations;
/* fs.c */
int jffs2_setattr (struct dentry *, struct iattr *);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 3af746eaff0e..31c1475d922a 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -450,16 +450,20 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
if (jffs2_cleanmarker_oob(c)) {
- int ret = jffs2_check_nand_cleanmarker(c, jeb);
+ int ret;
+
+ if (c->mtd->block_isbad(c->mtd, jeb->offset))
+ return BLK_STATE_BADBLOCK;
+
+ ret = jffs2_check_nand_cleanmarker(c, jeb);
D2(printk(KERN_NOTICE "jffs_check_nand_cleanmarker returned %d\n",ret));
+
/* Even if it's not found, we still scan to see
if the block is empty. We use this information
to decide whether to erase it or not. */
switch (ret) {
case 0: cleanmarkerfound = 1; break;
case 1: break;
- case 2: return BLK_STATE_BADBLOCK;
- case 3: return BLK_STATE_ALLDIRTY; /* Block has failed to erase min. once */
default: return ret;
}
}
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 25265965bdc1..30f888414ce7 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -14,7 +14,6 @@
*/
#include <linux/kernel.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/mtd/mtd.h>
#include <linux/pagemap.h>
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 08a0e6c49e61..cc7e8e71ad46 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -66,7 +66,7 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
return 0;
}
-static struct super_operations jffs2_super_operations =
+static const struct super_operations jffs2_super_operations =
{
.alloc_inode = jffs2_alloc_inode,
.destroy_inode =jffs2_destroy_inode,
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b90d5aa3d969..7e4882c8a7ed 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -20,7 +20,7 @@
static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd);
-struct inode_operations jffs2_symlink_inode_operations =
+const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
.follow_link = jffs2_follow_link,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 9c99859f5edd..de718e3a1692 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -957,43 +957,48 @@ exit:
return ret;
}
-#define NR_OOB_SCAN_PAGES 4
+#define NR_OOB_SCAN_PAGES 4
+
+/* For historical reasons we use only 12 bytes for OOB clean marker */
+#define OOB_CM_SIZE 12
+
+static const struct jffs2_unknown_node oob_cleanmarker =
+{
+ .magic = cpu_to_je16(JFFS2_MAGIC_BITMASK),
+ .nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER),
+ .totlen = cpu_to_je32(8)
+};
/*
- * Check, if the out of band area is empty
+ * Check, if the out of band area is empty. This function knows about the clean
+ * marker and if it is present in OOB, treats the OOB as empty anyway.
*/
int jffs2_check_oob_empty(struct jffs2_sb_info *c,
struct jffs2_eraseblock *jeb, int mode)
{
- int i, page, ret;
- int oobsize = c->mtd->oobsize;
+ int i, ret;
+ int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
struct mtd_oob_ops ops;
- ops.ooblen = NR_OOB_SCAN_PAGES * oobsize;
+ ops.mode = MTD_OOB_AUTO;
+ ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail;
ops.oobbuf = c->oobbuf;
- ops.ooboffs = 0;
+ ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
ops.datbuf = NULL;
- ops.mode = MTD_OOB_PLACE;
ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
- if (ret) {
- D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB "
- "failed %d for block at %08x\n", ret, jeb->offset));
+ if (ret || ops.oobretlen != ops.ooblen) {
+ printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
+ " bytes, read %zd bytes, error %d\n",
+ jeb->offset, ops.ooblen, ops.oobretlen, ret);
+ if (!ret)
+ ret = -EIO;
return ret;
}
- if (ops.oobretlen < ops.ooblen) {
- D1(printk(KERN_WARNING "jffs2_check_oob_empty(): Read OOB "
- "returned short read (%zd bytes not %d) for block "
- "at %08x\n", ops.oobretlen, ops.ooblen, jeb->offset));
- return -EIO;
- }
-
- /* Special check for first page */
- for(i = 0; i < oobsize ; i++) {
- /* Yeah, we know about the cleanmarker. */
- if (mode && i >= c->fsdata_pos &&
- i < c->fsdata_pos + c->fsdata_len)
+ for(i = 0; i < ops.ooblen; i++) {
+ if (mode && i < cmlen)
+ /* Yeah, we know about the cleanmarker */
continue;
if (ops.oobbuf[i] != 0xFF) {
@@ -1003,111 +1008,63 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
}
}
- /* we know, we are aligned :) */
- for (page = oobsize; page < ops.ooblen; page += sizeof(long)) {
- long dat = *(long *)(&ops.oobbuf[page]);
- if(dat != -1)
- return 1;
- }
return 0;
}
/*
- * Scan for a valid cleanmarker and for bad blocks
+ * Check for a valid cleanmarker.
+ * Returns: 0 if a valid cleanmarker was found
+ * 1 if no cleanmarker was found
+ * negative error code if an error occurred
*/
-int jffs2_check_nand_cleanmarker (struct jffs2_sb_info *c,
- struct jffs2_eraseblock *jeb)
+int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
+ struct jffs2_eraseblock *jeb)
{
- struct jffs2_unknown_node n;
struct mtd_oob_ops ops;
- int oobsize = c->mtd->oobsize;
- unsigned char *p,*b;
- int i, ret;
- size_t offset = jeb->offset;
-
- /* Check first if the block is bad. */
- if (c->mtd->block_isbad(c->mtd, offset)) {
- D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker()"
- ": Bad block at %08x\n", jeb->offset));
- return 2;
- }
+ int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
- ops.ooblen = oobsize;
+ ops.mode = MTD_OOB_AUTO;
+ ops.ooblen = cmlen;
ops.oobbuf = c->oobbuf;
- ops.ooboffs = 0;
+ ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
ops.datbuf = NULL;
- ops.mode = MTD_OOB_PLACE;
- ret = c->mtd->read_oob(c->mtd, offset, &ops);
- if (ret) {
- D1 (printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): "
- "Read OOB failed %d for block at %08x\n",
- ret, jeb->offset));
+ ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
+ if (ret || ops.oobretlen != ops.ooblen) {
+ printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
+ " bytes, read %zd bytes, error %d\n",
+ jeb->offset, ops.ooblen, ops.oobretlen, ret);
+ if (!ret)
+ ret = -EIO;
return ret;
}
- if (ops.oobretlen < ops.ooblen) {
- D1 (printk (KERN_WARNING "jffs2_check_nand_cleanmarker(): "
- "Read OOB return short read (%zd bytes not %d) "
- "for block at %08x\n", ops.oobretlen, ops.ooblen,
- jeb->offset));
- return -EIO;
- }
-
- n.magic = cpu_to_je16 (JFFS2_MAGIC_BITMASK);
- n.nodetype = cpu_to_je16 (JFFS2_NODETYPE_CLEANMARKER);
- n.totlen = cpu_to_je32 (8);
- p = (unsigned char *) &n;
- b = c->oobbuf + c->fsdata_pos;
-
- for (i = c->fsdata_len; i; i--) {
- if (*b++ != *p++)
- ret = 1;
- }
-
- D1(if (ret == 1) {
- printk(KERN_WARNING "jffs2_check_nand_cleanmarker(): "
- "Cleanmarker node not detected in block at %08x\n",
- offset);
- printk(KERN_WARNING "OOB at %08zx was ", offset);
- for (i=0; i < oobsize; i++)
- printk("%02x ", c->oobbuf[i]);
- printk("\n");
- });
- return ret;
+ return !!memcmp(&oob_cleanmarker, c->oobbuf, cmlen);
}
int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
struct jffs2_eraseblock *jeb)
{
- struct jffs2_unknown_node n;
- int ret;
+ int ret;
struct mtd_oob_ops ops;
+ int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
- n.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
- n.nodetype = cpu_to_je16(JFFS2_NODETYPE_CLEANMARKER);
- n.totlen = cpu_to_je32(8);
-
- ops.ooblen = c->fsdata_len;
- ops.oobbuf = (uint8_t *)&n;
- ops.ooboffs = c->fsdata_pos;
+ ops.mode = MTD_OOB_AUTO;
+ ops.ooblen = cmlen;
+ ops.oobbuf = (uint8_t *)&oob_cleanmarker;
+ ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
ops.datbuf = NULL;
- ops.mode = MTD_OOB_PLACE;
ret = c->mtd->write_oob(c->mtd, jeb->offset, &ops);
-
- if (ret) {
- D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): "
- "Write failed for block at %08x: error %d\n",
- jeb->offset, ret));
+ if (ret || ops.oobretlen != ops.ooblen) {
+ printk(KERN_ERR "cannot write OOB for EB at %08x, requested %zd"
+ " bytes, read %zd bytes, error %d\n",
+ jeb->offset, ops.ooblen, ops.oobretlen, ret);
+ if (!ret)
+ ret = -EIO;
return ret;
}
- if (ops.oobretlen != ops.ooblen) {
- D1(printk(KERN_WARNING "jffs2_write_nand_cleanmarker(): "
- "Short write for block at %08x: %zd not %d\n",
- jeb->offset, ops.oobretlen, ops.ooblen));
- return -EIO;
- }
+
return 0;
}
@@ -1140,41 +1097,24 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock *
return 1;
}
-static int jffs2_nand_set_oobinfo(struct jffs2_sb_info *c)
+int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
{
struct nand_ecclayout *oinfo = c->mtd->ecclayout;
- /* Do this only, if we have an oob buffer */
if (!c->mtd->oobsize)
return 0;
/* Cleanmarker is out-of-band, so inline size zero */
c->cleanmarker_size = 0;
- /* Should we use autoplacement ? */
- if (!oinfo) {
- D1(printk(KERN_DEBUG "JFFS2 on NAND. No autoplacment info found\n"));
+ if (!oinfo || oinfo->oobavail == 0) {
+ printk(KERN_ERR "inconsistent device description\n");
return -EINVAL;
}
- D1(printk(KERN_DEBUG "JFFS2 using autoplace on NAND\n"));
- /* Get the position of the free bytes */
- if (!oinfo->oobfree[0].length) {
- printk (KERN_WARNING "jffs2_nand_set_oobinfo(): Eeep."
- " Autoplacement selected and no empty space in oob\n");
- return -ENOSPC;
- }
- c->fsdata_pos = oinfo->oobfree[0].offset;
- c->fsdata_len = oinfo->oobfree[0].length;
- if (c->fsdata_len > 8)
- c->fsdata_len = 8;
+ D1(printk(KERN_DEBUG "JFFS2 using OOB on NAND\n"));
- return 0;
-}
-
-int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
-{
- int res;
+ c->oobavail = oinfo->oobavail;
/* Initialise write buffer */
init_rwsem(&c->wbuf_sem);
@@ -1185,22 +1125,13 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
if (!c->wbuf)
return -ENOMEM;
- c->oobbuf = kmalloc(NR_OOB_SCAN_PAGES * c->mtd->oobsize, GFP_KERNEL);
- if (!c->oobbuf)
- return -ENOMEM;
-
- res = jffs2_nand_set_oobinfo(c);
-
-#ifdef BREAKME
- if (!brokenbuf)
- brokenbuf = kmalloc(c->wbuf_pagesize, GFP_KERNEL);
- if (!brokenbuf) {
+ c->oobbuf = kmalloc(NR_OOB_SCAN_PAGES * c->oobavail, GFP_KERNEL);
+ if (!c->oobbuf) {
kfree(c->wbuf);
return -ENOMEM;
}
- memset(brokenbuf, 0xdb, c->wbuf_pagesize);
-#endif
- return res;
+
+ return 0;
}
void jffs2_nand_flash_cleanup(struct jffs2_sb_info *c)
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index aa9132d04920..f7f8eff19b7b 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -88,7 +88,7 @@ static int jfs_release(struct inode *inode, struct file *file)
return 0;
}
-struct inode_operations jfs_file_inode_operations = {
+const struct inode_operations jfs_file_inode_operations = {
.truncate = jfs_truncate,
.setxattr = jfs_setxattr,
.getxattr = jfs_getxattr,
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index f5719117edfe..e285022f006c 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -182,9 +182,9 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
* Take appropriate lock on inode
*/
if (create)
- IWRITE_LOCK(ip);
+ IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
else
- IREAD_LOCK(ip);
+ IREAD_LOCK(ip, RDWRLOCK_NORMAL);
if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
(!xtLookup(ip, lblock64, xlen, &xflag, &xaddr, &xlen, 0)) &&
@@ -359,7 +359,7 @@ void jfs_truncate(struct inode *ip)
nobh_truncate_page(ip->i_mapping, ip->i_size);
- IWRITE_LOCK(ip);
+ IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
jfs_truncate_nolock(ip, ip->i_size);
IWRITE_UNLOCK(ip);
}
diff --git a/fs/jfs/jfs_debug.h b/fs/jfs/jfs_debug.h
index ddffbbd4d955..7378798f0b21 100644
--- a/fs/jfs/jfs_debug.h
+++ b/fs/jfs/jfs_debug.h
@@ -39,10 +39,6 @@ extern void jfs_proc_clean(void);
/*
* assert with traditional printf/panic
*/
-#ifdef CONFIG_KERNEL_ASSERTS
-/* kgdb stuff */
-#define assert(p) KERNEL_ASSERT(#p, p)
-#else
#define assert(p) do { \
if (!(p)) { \
printk(KERN_CRIT "BUG at %s:%d assert(%s)\n", \
@@ -50,7 +46,6 @@ extern void jfs_proc_clean(void);
BUG(); \
} \
} while (0)
-#endif
/*
* debug ON
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 23546c8fd48b..82b0544bd76d 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -337,7 +337,7 @@ int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
- IREAD_LOCK(ipbmap);
+ IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
/* block to be freed better be within the mapsize. */
if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
@@ -733,7 +733,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
* allocation group size, try to allocate anywhere.
*/
if (l2nb > bmp->db_agl2size) {
- IWRITE_LOCK(ipbmap);
+ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
rc = dbAllocAny(bmp, nblocks, l2nb, results);
@@ -774,7 +774,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
* the hint using a tiered strategy.
*/
if (nblocks <= BPERDMAP) {
- IREAD_LOCK(ipbmap);
+ IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
/* get the buffer for the dmap containing the hint.
*/
@@ -844,7 +844,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
/* try to satisfy the allocation request with blocks within
* the same allocation group as the hint.
*/
- IWRITE_LOCK(ipbmap);
+ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
goto write_unlock;
@@ -856,7 +856,7 @@ int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
* Let dbNextAG recommend a preferred allocation group
*/
agno = dbNextAG(ipbmap);
- IWRITE_LOCK(ipbmap);
+ IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
/* Try to allocate within this allocation group. if that fails, try to
* allocate anywhere in the map.
@@ -900,7 +900,7 @@ int dbAllocExact(struct inode *ip, s64 blkno, int nblocks)
s64 lblkno;
struct metapage *mp;
- IREAD_LOCK(ipbmap);
+ IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
/*
* validate extent request:
@@ -1050,7 +1050,7 @@ static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
*/
extblkno = lastblkno + 1;
- IREAD_LOCK(ipbmap);
+ IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
/* better be within the file system */
bmp = sbi->bmap;
@@ -3116,7 +3116,7 @@ int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
- IREAD_LOCK(ipbmap);
+ IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
/* block to be allocated better be within the mapsize. */
ASSERT(nblocks <= bmp->db_mapsize - blkno);
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 53f63b47a6d3..aa5124b643b1 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -331,7 +331,7 @@ int diRead(struct inode *ip)
/* read the iag */
imap = JFS_IP(ipimap)->i_imap;
- IREAD_LOCK(ipimap);
+ IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
rc = diIAGRead(imap, iagno, &mp);
IREAD_UNLOCK(ipimap);
if (rc) {
@@ -920,7 +920,7 @@ int diFree(struct inode *ip)
/* Obtain read lock in imap inode. Don't release it until we have
* read all of the IAG's that we are going to.
*/
- IREAD_LOCK(ipimap);
+ IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
/* read the iag.
*/
@@ -1415,7 +1415,7 @@ int diAlloc(struct inode *pip, bool dir, struct inode *ip)
AG_LOCK(imap, agno);
/* Get read lock on imap inode */
- IREAD_LOCK(ipimap);
+ IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
/* get the iag number and read the iag */
iagno = INOTOIAG(inum);
@@ -1808,7 +1808,7 @@ static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
return -ENOSPC;
/* obtain read lock on imap inode */
- IREAD_LOCK(imap->im_ipimap);
+ IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
/* read the iag at the head of the list.
*/
@@ -1946,7 +1946,7 @@ static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
} else {
/* read the iag.
*/
- IREAD_LOCK(imap->im_ipimap);
+ IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
if ((rc = diIAGRead(imap, iagno, &mp))) {
IREAD_UNLOCK(imap->im_ipimap);
jfs_error(ip->i_sb, "diAllocExt: error reading iag");
@@ -2509,7 +2509,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
*/
/* acquire inode map lock */
- IWRITE_LOCK(ipimap);
+ IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
IWRITE_UNLOCK(ipimap);
@@ -2648,7 +2648,7 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
}
/* obtain read lock on map */
- IREAD_LOCK(ipimap);
+ IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
/* read the iag */
if ((rc = diIAGRead(imap, iagno, &mp))) {
@@ -2779,7 +2779,7 @@ diUpdatePMap(struct inode *ipimap,
return -EIO;
}
/* read the iag */
- IREAD_LOCK(ipimap);
+ IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
rc = diIAGRead(imap, iagno, &mp);
IREAD_UNLOCK(ipimap);
if (rc)
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 94005584445a..8f453eff3c83 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -109,9 +109,11 @@ struct jfs_inode_info {
#define JFS_ACL_NOT_CACHED ((void *)-1)
-#define IREAD_LOCK(ip) down_read(&JFS_IP(ip)->rdwrlock)
+#define IREAD_LOCK(ip, subclass) \
+ down_read_nested(&JFS_IP(ip)->rdwrlock, subclass)
#define IREAD_UNLOCK(ip) up_read(&JFS_IP(ip)->rdwrlock)
-#define IWRITE_LOCK(ip) down_write(&JFS_IP(ip)->rdwrlock)
+#define IWRITE_LOCK(ip, subclass) \
+ down_write_nested(&JFS_IP(ip)->rdwrlock, subclass)
#define IWRITE_UNLOCK(ip) up_write(&JFS_IP(ip)->rdwrlock)
/*
@@ -127,6 +129,29 @@ enum cflags {
COMMIT_Synclist, /* metadata pages on group commit synclist */
};
+/*
+ * commit_mutex nesting subclasses:
+ */
+enum commit_mutex_class
+{
+ COMMIT_MUTEX_PARENT,
+ COMMIT_MUTEX_CHILD,
+ COMMIT_MUTEX_SECOND_PARENT, /* Renaming */
+ COMMIT_MUTEX_VICTIM /* Inode being unlinked due to rename */
+};
+
+/*
+ * rdwrlock subclasses:
+ * The dmap inode may be locked while a normal inode or the imap inode are
+ * locked.
+ */
+enum rdwrlock_class
+{
+ RDWRLOCK_NORMAL,
+ RDWRLOCK_IMAP,
+ RDWRLOCK_DMAP
+};
+
#define set_cflag(flag, ip) set_bit(flag, &(JFS_IP(ip)->cflag))
#define clear_cflag(flag, ip) clear_bit(flag, &(JFS_IP(ip)->cflag))
#define test_cflag(flag, ip) test_bit(flag, &(JFS_IP(ip)->cflag))
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index 0d06ccfaff0e..6802837f757e 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -35,10 +35,10 @@ extern void jfs_set_inode_flags(struct inode *);
extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
extern const struct address_space_operations jfs_aops;
-extern struct inode_operations jfs_dir_inode_operations;
+extern const struct inode_operations jfs_dir_inode_operations;
extern const struct file_operations jfs_dir_operations;
-extern struct inode_operations jfs_file_inode_operations;
+extern const struct inode_operations jfs_file_inode_operations;
extern const struct file_operations jfs_file_operations;
-extern struct inode_operations jfs_symlink_inode_operations;
+extern const struct inode_operations jfs_symlink_inode_operations;
extern struct dentry_operations jfs_ci_dentry_operations;
#endif /* _H_JFS_INODE */
diff --git a/fs/jfs/jfs_lock.h b/fs/jfs/jfs_lock.h
index 7d78e83d7c40..df48ece4b7a3 100644
--- a/fs/jfs/jfs_lock.h
+++ b/fs/jfs/jfs_lock.h
@@ -42,7 +42,7 @@ do { \
if (cond) \
break; \
unlock_cmd; \
- schedule(); \
+ io_schedule(); \
lock_cmd; \
} \
current->state = TASK_RUNNING; \
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index ceaf03b94935..58deae007507 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -56,7 +56,7 @@ static inline void __lock_metapage(struct metapage *mp)
set_current_state(TASK_UNINTERRUPTIBLE);
if (metapage_locked(mp)) {
unlock_page(mp->page);
- schedule();
+ io_schedule();
lock_page(mp->page);
}
} while (trylock_metapage(mp));
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d558e51b0df8..6988a1082f58 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -135,7 +135,7 @@ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event)
add_wait_queue(event, &wait);
set_current_state(TASK_UNINTERRUPTIBLE);
TXN_UNLOCK();
- schedule();
+ io_schedule();
current->state = TASK_RUNNING;
remove_wait_queue(event, &wait);
}
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index e98eb03e5310..acc97c46d8a4 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -757,6 +757,11 @@ static int xtSearch(struct inode *ip, s64 xoff, s64 *nextp,
nsplit = 0;
/* push (bn, index) of the parent page/entry */
+ if (BT_STACK_FULL(btstack)) {
+ jfs_error(ip->i_sb, "stack overrun in xtSearch!");
+ XT_PUTPAGE(mp);
+ return -EIO;
+ }
BT_PUSH(btstack, bn, index);
/* get the child page block number */
@@ -3915,6 +3920,11 @@ s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
*/
getChild:
/* save current parent entry for the child page */
+ if (BT_STACK_FULL(&btstack)) {
+ jfs_error(ip->i_sb, "stack overrun in xtTruncate!");
+ XT_PUTPAGE(mp);
+ return -EIO;
+ }
BT_PUSH(&btstack, bn, index);
/* get child page */
@@ -4112,6 +4122,11 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
*/
getChild:
/* save current parent entry for the child page */
+ if (BT_STACK_FULL(&btstack)) {
+ jfs_error(ip->i_sb, "stack overrun in xtTruncate_pmap!");
+ XT_PUTPAGE(mp);
+ return -EIO;
+ }
BT_PUSH(&btstack, bn, index);
/* get child page */
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a6a8c16c872c..41c204771262 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -104,8 +104,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, int mode,
tid = txBegin(dip->i_sb, 0);
- mutex_lock(&JFS_IP(dip)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
rc = jfs_init_acl(tid, ip, dip);
if (rc)
@@ -238,8 +238,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, int mode)
tid = txBegin(dip->i_sb, 0);
- mutex_lock(&JFS_IP(dip)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
rc = jfs_init_acl(tid, ip, dip);
if (rc)
@@ -365,8 +365,8 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
tid = txBegin(dip->i_sb, 0);
- mutex_lock(&JFS_IP(dip)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
iplist[0] = dip;
iplist[1] = ip;
@@ -483,12 +483,12 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
if ((rc = get_UCSname(&dname, dentry)))
goto out;
- IWRITE_LOCK(ip);
+ IWRITE_LOCK(ip, RDWRLOCK_NORMAL);
tid = txBegin(dip->i_sb, 0);
- mutex_lock(&JFS_IP(dip)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
iplist[0] = dip;
iplist[1] = ip;
@@ -802,8 +802,8 @@ static int jfs_link(struct dentry *old_dentry,
tid = txBegin(ip->i_sb, 0);
- mutex_lock(&JFS_IP(dir)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
/*
* scan parent directory for entry/freespace
@@ -913,8 +913,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
tid = txBegin(dip->i_sb, 0);
- mutex_lock(&JFS_IP(dip)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dip)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
rc = jfs_init_security(tid, ip, dip);
if (rc)
@@ -1127,7 +1127,7 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out3;
}
} else if (new_ip) {
- IWRITE_LOCK(new_ip);
+ IWRITE_LOCK(new_ip, RDWRLOCK_NORMAL);
/* Init inode for quota operations. */
DQUOT_INIT(new_ip);
}
@@ -1137,13 +1137,21 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
tid = txBegin(new_dir->i_sb, 0);
- mutex_lock(&JFS_IP(new_dir)->commit_mutex);
- mutex_lock(&JFS_IP(old_ip)->commit_mutex);
+ /*
+ * How do we know the locking is safe from deadlocks?
+ * The vfs does the hard part for us. Any time we are taking nested
+ * commit_mutexes, the vfs already has i_mutex held on the parent.
+ * Here, the vfs has already taken i_mutex on both old_dir and new_dir.
+ */
+ mutex_lock_nested(&JFS_IP(new_dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(old_ip)->commit_mutex, COMMIT_MUTEX_CHILD);
if (old_dir != new_dir)
- mutex_lock(&JFS_IP(old_dir)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(old_dir)->commit_mutex,
+ COMMIT_MUTEX_SECOND_PARENT);
if (new_ip) {
- mutex_lock(&JFS_IP(new_ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(new_ip)->commit_mutex,
+ COMMIT_MUTEX_VICTIM);
/*
* Change existing directory entry to new inode number
*/
@@ -1357,8 +1365,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry,
tid = txBegin(dir->i_sb, 0);
- mutex_lock(&JFS_IP(dir)->commit_mutex);
- mutex_lock(&JFS_IP(ip)->commit_mutex);
+ mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+ mutex_lock_nested(&JFS_IP(ip)->commit_mutex, COMMIT_MUTEX_CHILD);
rc = jfs_init_acl(tid, ip, dir);
if (rc)
@@ -1495,7 +1503,7 @@ struct dentry *jfs_get_parent(struct dentry *dentry)
return parent;
}
-struct inode_operations jfs_dir_inode_operations = {
+const struct inode_operations jfs_dir_inode_operations = {
.create = jfs_create,
.lookup = jfs_lookup,
.link = jfs_link,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 846ac8f34513..52d73d54a931 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -46,7 +46,7 @@ MODULE_LICENSE("GPL");
static struct kmem_cache * jfs_inode_cachep;
-static struct super_operations jfs_super_operations;
+static const struct super_operations jfs_super_operations;
static struct export_operations jfs_export_operations;
static struct file_system_type jfs_fs_type;
@@ -716,7 +716,7 @@ out:
#endif
-static struct super_operations jfs_super_operations = {
+static const struct super_operations jfs_super_operations = {
.alloc_inode = jfs_alloc_inode,
.destroy_inode = jfs_destroy_inode,
.read_inode = jfs_read_inode,
diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c
index cee43f36f51d..4af1a05aad0a 100644
--- a/fs/jfs/symlink.c
+++ b/fs/jfs/symlink.c
@@ -29,7 +29,7 @@ static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations jfs_symlink_inode_operations = {
+const struct inode_operations jfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = jfs_follow_link,
.setxattr = jfs_setxattr,
diff --git a/fs/libfs.c b/fs/libfs.c
index 503898d5c4a7..7d487047dbb8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -186,7 +186,7 @@ const struct file_operations simple_dir_operations = {
.fsync = simple_sync_file,
};
-struct inode_operations simple_dir_inode_operations = {
+const struct inode_operations simple_dir_inode_operations = {
.lookup = simple_lookup,
};
@@ -195,11 +195,11 @@ struct inode_operations simple_dir_inode_operations = {
* will never be mountable)
*/
int get_sb_pseudo(struct file_system_type *fs_type, char *name,
- struct super_operations *ops, unsigned long magic,
+ const struct super_operations *ops, unsigned long magic,
struct vfsmount *mnt)
{
struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
- static struct super_operations default_ops = {.statfs = simple_statfs};
+ static const struct super_operations default_ops = {.statfs = simple_statfs};
struct dentry *dentry;
struct inode *root;
struct qstr d_name = {.name = name, .len = strlen(name)};
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 0b4acc1c5e7d..a5c019e1a447 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -361,7 +361,6 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *
{
struct nlm_host *host = req->a_host;
struct rpc_clnt *clnt;
- int status = -ENOLCK;
dprintk("lockd: call procedure %d on %s (async)\n",
(int)proc, host->h_name);
@@ -373,12 +372,10 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *
msg->rpc_proc = &clnt->cl_procinfo[proc];
/* bootstrap and kick off the async RPC call */
- status = rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
- if (status == 0)
- return 0;
+ return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
out_err:
- nlm_release_call(req);
- return status;
+ tk_ops->rpc_release(req);
+ return -ENOLCK;
}
int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 3d4610c2a266..ad21c0713efa 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -9,7 +9,6 @@
*/
#include <linux/types.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/in.h>
#include <linux/sunrpc/clnt.h>
@@ -192,7 +191,7 @@ struct nlm_host *
nlmsvc_lookup_host(struct svc_rqst *rqstp,
const char *hostname, int hostname_len)
{
- return nlm_lookup_host(1, &rqstp->rq_addr,
+ return nlm_lookup_host(1, svc_addr_in(rqstp),
rqstp->rq_prot, rqstp->rq_vers,
hostname, hostname_len);
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 8ca18085e68d..126b1bf02c0e 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -141,6 +141,7 @@ lockd(struct svc_rqst *rqstp)
*/
while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) {
long timeout = MAX_SCHEDULE_TIMEOUT;
+ char buf[RPC_MAX_ADDRBUFLEN];
if (signalled()) {
flush_signals(current);
@@ -175,11 +176,10 @@ lockd(struct svc_rqst *rqstp)
break;
}
- dprintk("lockd: request from %08x\n",
- (unsigned)ntohl(rqstp->rq_addr.sin_addr.s_addr));
+ dprintk("lockd: request from %s\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
svc_process(rqstp);
-
}
flush_signals(current);
@@ -223,23 +223,29 @@ static int find_socket(struct svc_serv *serv, int proto)
return found;
}
+/*
+ * Make any sockets that are needed but not present.
+ * If nlm_udpport or nlm_tcpport were set as module
+ * options, make those sockets unconditionally
+ */
static int make_socks(struct svc_serv *serv, int proto)
{
- /* Make any sockets that are needed but not present.
- * If nlm_udpport or nlm_tcpport were set as module
- * options, make those sockets unconditionally
- */
- static int warned;
+ static int warned;
int err = 0;
+
if (proto == IPPROTO_UDP || nlm_udpport)
if (!find_socket(serv, IPPROTO_UDP))
- err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport);
- if (err == 0 && (proto == IPPROTO_TCP || nlm_tcpport))
+ err = svc_makesock(serv, IPPROTO_UDP, nlm_udpport,
+ SVC_SOCK_DEFAULTS);
+ if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport))
if (!find_socket(serv, IPPROTO_TCP))
- err= svc_makesock(serv, IPPROTO_TCP, nlm_tcpport);
- if (!err)
+ err = svc_makesock(serv, IPPROTO_TCP, nlm_tcpport,
+ SVC_SOCK_DEFAULTS);
+
+ if (err >= 0) {
warned = 0;
- else if (warned++ == 0)
+ err = 0;
+ } else if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
return err;
@@ -434,7 +440,7 @@ static ctl_table nlm_sysctl_root[] = {
};
/*
- * Module (and driverfs) parameters.
+ * Module (and sysfs) parameters.
*/
#define param_set_min_max(name, type, which_strtol, min, max) \
@@ -506,7 +512,7 @@ module_param(nsm_use_hostnames, bool, 0644);
static int __init init_nlm(void)
{
- nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root, 0);
+ nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
return nlm_sysctl_table ? 0 : -ENOMEM;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index f67146a8199a..47a66aa5d55b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -224,7 +224,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
dprintk("lockd: GRANTED called\n");
- resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
+ resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock);
dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
return rpc_success;
}
@@ -421,15 +421,16 @@ static __be32
nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
void *resp)
{
- struct sockaddr_in saddr = rqstp->rq_addr;
+ struct sockaddr_in saddr;
+
+ memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
dprintk("lockd: SM_NOTIFY called\n");
if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
|| ntohs(saddr.sin_port) >= 1024) {
- printk(KERN_WARNING
- "lockd: rejected NSM callback from %08x:%d\n",
- ntohl(rqstp->rq_addr.sin_addr.s_addr),
- ntohs(rqstp->rq_addr.sin_port));
+ char buf[RPC_MAX_ADDRBUFLEN];
+ printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
return rpc_system_err;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index c7db0a5bccdc..cf51f849e76c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -593,9 +593,7 @@ callback:
/* Call the client */
kref_get(&block->b_count);
- if (nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
- &nlmsvc_grant_ops) < 0)
- nlmsvc_release_block(block);
+ nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
}
/*
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 3707c3a23e93..31cb48425733 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -253,7 +253,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
dprintk("lockd: GRANTED called\n");
- resp->status = nlmclnt_grant(&rqstp->rq_addr, &argp->lock);
+ resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock);
dprintk("lockd: GRANTED status %d\n", ntohl(resp->status));
return rpc_success;
}
@@ -452,15 +452,16 @@ static __be32
nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
void *resp)
{
- struct sockaddr_in saddr = rqstp->rq_addr;
+ struct sockaddr_in saddr;
+
+ memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr));
dprintk("lockd: SM_NOTIFY called\n");
if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK)
|| ntohs(saddr.sin_port) >= 1024) {
- printk(KERN_WARNING
- "lockd: rejected NSM callback from %08x:%d\n",
- ntohl(rqstp->rq_addr.sin_addr.s_addr),
- ntohs(rqstp->rq_addr.sin_port));
+ char buf[RPC_MAX_ADDRBUFLEN];
+ printk(KERN_WARNING "lockd: rejected NSM callback from %s\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
return rpc_system_err;
}
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index df6b1075b549..c4a554df7b7e 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -26,14 +26,14 @@ static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, _
for (i=0; i<numblocks-1; i++) {
if (!(bh=map[i]))
return(0);
- for (j=0; j<BLOCK_SIZE; j++)
+ for (j=0; j<bh->b_size; j++)
sum += nibblemap[bh->b_data[j] & 0xf]
+ nibblemap[(bh->b_data[j]>>4) & 0xf];
}
if (numblocks==0 || !(bh=map[numblocks-1]))
return(0);
- i = ((numbits-(numblocks-1)*BLOCK_SIZE*8)/16)*2;
+ i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2;
for (j=0; j<i; j++) {
sum += nibblemap[bh->b_data[j] & 0xf]
+ nibblemap[(bh->b_data[j]>>4) & 0xf];
@@ -48,28 +48,29 @@ static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, _
return(sum);
}
-void minix_free_block(struct inode * inode, int block)
+void minix_free_block(struct inode *inode, unsigned long block)
{
- struct super_block * sb = inode->i_sb;
- struct minix_sb_info * sbi = minix_sb(sb);
- struct buffer_head * bh;
- unsigned int bit,zone;
+ struct super_block *sb = inode->i_sb;
+ struct minix_sb_info *sbi = minix_sb(sb);
+ struct buffer_head *bh;
+ int k = sb->s_blocksize_bits + 3;
+ unsigned long bit, zone;
if (block < sbi->s_firstdatazone || block >= sbi->s_nzones) {
printk("Trying to free block not in datazone\n");
return;
}
zone = block - sbi->s_firstdatazone + 1;
- bit = zone & 8191;
- zone >>= 13;
+ bit = zone & ((1<<k) - 1);
+ zone >>= k;
if (zone >= sbi->s_zmap_blocks) {
printk("minix_free_block: nonexistent bitmap buffer\n");
return;
}
bh = sbi->s_zmap[zone];
lock_kernel();
- if (!minix_test_and_clear_bit(bit,bh->b_data))
- printk("free_block (%s:%d): bit already cleared\n",
+ if (!minix_test_and_clear_bit(bit, bh->b_data))
+ printk("minix_free_block (%s:%lu): bit already cleared\n",
sb->s_id, block);
unlock_kernel();
mark_buffer_dirty(bh);
@@ -79,6 +80,7 @@ void minix_free_block(struct inode * inode, int block)
int minix_new_block(struct inode * inode)
{
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
+ int bits_per_zone = 8 * inode->i_sb->s_blocksize;
int i;
for (i = 0; i < sbi->s_zmap_blocks; i++) {
@@ -86,11 +88,12 @@ int minix_new_block(struct inode * inode)
int j;
lock_kernel();
- if ((j = minix_find_first_zero_bit(bh->b_data, 8192)) < 8192) {
- minix_set_bit(j,bh->b_data);
+ j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
+ if (j < bits_per_zone) {
+ minix_set_bit(j, bh->b_data);
unlock_kernel();
mark_buffer_dirty(bh);
- j += i*8192 + sbi->s_firstdatazone-1;
+ j += i * bits_per_zone + sbi->s_firstdatazone-1;
if (j < sbi->s_firstdatazone || j >= sbi->s_nzones)
break;
return j;
@@ -137,6 +140,7 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
int block;
struct minix_sb_info *sbi = minix_sb(sb);
struct minix2_inode *p;
+ int minix2_inodes_per_block = sb->s_blocksize / sizeof(struct minix2_inode);
*bh = NULL;
if (!ino || ino > sbi->s_ninodes) {
@@ -146,14 +150,14 @@ minix_V2_raw_inode(struct super_block *sb, ino_t ino, struct buffer_head **bh)
}
ino--;
block = 2 + sbi->s_imap_blocks + sbi->s_zmap_blocks +
- ino / MINIX2_INODES_PER_BLOCK;
+ ino / minix2_inodes_per_block;
*bh = sb_bread(sb, block);
if (!*bh) {
printk("Unable to read inode block\n");
return NULL;
}
p = (void *)(*bh)->b_data;
- return p + ino % MINIX2_INODES_PER_BLOCK;
+ return p + ino % minix2_inodes_per_block;
}
/* Clear the link count and mode of a deleted inode on disk. */
@@ -185,26 +189,30 @@ static void minix_clear_inode(struct inode *inode)
void minix_free_inode(struct inode * inode)
{
+ struct super_block *sb = inode->i_sb;
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
- struct buffer_head * bh;
- unsigned long ino;
+ struct buffer_head *bh;
+ int k = sb->s_blocksize_bits + 3;
+ unsigned long ino, bit;
ino = inode->i_ino;
if (ino < 1 || ino > sbi->s_ninodes) {
printk("minix_free_inode: inode 0 or nonexistent inode\n");
goto out;
}
- if ((ino >> 13) >= sbi->s_imap_blocks) {
+ bit = ino & ((1<<k) - 1);
+ ino >>= k;
+ if (ino >= sbi->s_imap_blocks) {
printk("minix_free_inode: nonexistent imap in superblock\n");
goto out;
}
minix_clear_inode(inode); /* clear on-disk copy */
- bh = sbi->s_imap[ino >> 13];
+ bh = sbi->s_imap[ino];
lock_kernel();
- if (!minix_test_and_clear_bit(ino & 8191, bh->b_data))
- printk("minix_free_inode: bit %lu already cleared\n", ino);
+ if (!minix_test_and_clear_bit(bit, bh->b_data))
+ printk("minix_free_inode: bit %lu already cleared\n", bit);
unlock_kernel();
mark_buffer_dirty(bh);
out:
@@ -217,35 +225,38 @@ struct inode * minix_new_inode(const struct inode * dir, int * error)
struct minix_sb_info *sbi = minix_sb(sb);
struct inode *inode = new_inode(sb);
struct buffer_head * bh;
- int i,j;
+ int bits_per_zone = 8 * sb->s_blocksize;
+ unsigned long j;
+ int i;
if (!inode) {
*error = -ENOMEM;
return NULL;
}
- j = 8192;
+ j = bits_per_zone;
bh = NULL;
*error = -ENOSPC;
lock_kernel();
for (i = 0; i < sbi->s_imap_blocks; i++) {
bh = sbi->s_imap[i];
- if ((j = minix_find_first_zero_bit(bh->b_data, 8192)) < 8192)
+ j = minix_find_first_zero_bit(bh->b_data, bits_per_zone);
+ if (j < bits_per_zone)
break;
}
- if (!bh || j >= 8192) {
+ if (!bh || j >= bits_per_zone) {
unlock_kernel();
iput(inode);
return NULL;
}
- if (minix_test_and_set_bit(j,bh->b_data)) { /* shouldn't happen */
- printk("new_inode: bit already set\n");
+ if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */
unlock_kernel();
+ printk("minix_new_inode: bit already set\n");
iput(inode);
return NULL;
}
unlock_kernel();
mark_buffer_dirty(bh);
- j += i*8192;
+ j += i * bits_per_zone;
if (!j || j > sbi->s_ninodes) {
iput(inode);
return NULL;
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index ab782c4086f5..cb4cb571fddf 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -4,6 +4,8 @@
* Copyright (C) 1991, 1992 Linus Torvalds
*
* minix directory handling functions
+ *
+ * Updated to filesystem version 3 by Daniel Aragones
*/
#include "minix.h"
@@ -11,6 +13,7 @@
#include <linux/smp_lock.h>
typedef struct minix_dir_entry minix_dirent;
+typedef struct minix3_dir_entry minix3_dirent;
static int minix_readdir(struct file *, void *, filldir_t);
@@ -89,6 +92,8 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
unsigned long npages = dir_pages(inode);
struct minix_sb_info *sbi = minix_sb(sb);
unsigned chunk_size = sbi->s_dirsize;
+ char *name;
+ __u32 inumber;
lock_kernel();
@@ -105,16 +110,24 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir)
kaddr = (char *)page_address(page);
p = kaddr+offset;
limit = kaddr + minix_last_byte(inode, n) - chunk_size;
- for ( ; p <= limit ; p = minix_next_entry(p, sbi)) {
- minix_dirent *de = (minix_dirent *)p;
- if (de->inode) {
+ for ( ; p <= limit; p = minix_next_entry(p, sbi)) {
+ if (sbi->s_version == MINIX_V3) {
+ minix3_dirent *de3 = (minix3_dirent *)p;
+ name = de3->name;
+ inumber = de3->inode;
+ } else {
+ minix_dirent *de = (minix_dirent *)p;
+ name = de->name;
+ inumber = de->inode;
+ }
+ if (inumber) {
int over;
- unsigned l = strnlen(de->name,sbi->s_namelen);
+ unsigned l = strnlen(name, sbi->s_namelen);
offset = p - kaddr;
- over = filldir(dirent, de->name, l,
- (n<<PAGE_CACHE_SHIFT) | offset,
- de->inode, DT_UNKNOWN);
+ over = filldir(dirent, name, l,
+ (n << PAGE_CACHE_SHIFT) | offset,
+ inumber, DT_UNKNOWN);
if (over) {
dir_put_page(page);
goto done;
@@ -156,23 +169,34 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
unsigned long n;
unsigned long npages = dir_pages(dir);
struct page *page = NULL;
- struct minix_dir_entry *de;
+ char *p;
+ char *namx;
+ __u32 inumber;
*res_page = NULL;
for (n = 0; n < npages; n++) {
- char *kaddr;
+ char *kaddr, *limit;
+
page = dir_get_page(dir, n);
if (IS_ERR(page))
continue;
kaddr = (char*)page_address(page);
- de = (struct minix_dir_entry *) kaddr;
- kaddr += minix_last_byte(dir, n) - sbi->s_dirsize;
- for ( ; (char *) de <= kaddr ; de = minix_next_entry(de,sbi)) {
- if (!de->inode)
+ limit = kaddr + minix_last_byte(dir, n) - sbi->s_dirsize;
+ for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
+ if (sbi->s_version == MINIX_V3) {
+ minix3_dirent *de3 = (minix3_dirent *)p;
+ namx = de3->name;
+ inumber = de3->inode;
+ } else {
+ minix_dirent *de = (minix_dirent *)p;
+ namx = de->name;
+ inumber = de->inode;
+ }
+ if (!inumber)
continue;
- if (namecompare(namelen,sbi->s_namelen,name,de->name))
+ if (namecompare(namelen, sbi->s_namelen, name, namx))
goto found;
}
dir_put_page(page);
@@ -181,7 +205,7 @@ minix_dirent *minix_find_entry(struct dentry *dentry, struct page **res_page)
found:
*res_page = page;
- return de;
+ return (minix_dirent *)p;
}
int minix_add_link(struct dentry *dentry, struct inode *inode)
@@ -192,12 +216,15 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
struct super_block * sb = dir->i_sb;
struct minix_sb_info * sbi = minix_sb(sb);
struct page *page = NULL;
- struct minix_dir_entry * de;
unsigned long npages = dir_pages(dir);
unsigned long n;
- char *kaddr;
+ char *kaddr, *p;
+ minix_dirent *de;
+ minix3_dirent *de3;
unsigned from, to;
int err;
+ char *namx = NULL;
+ __u32 inumber;
/*
* We take care of directory expansion in the same loop
@@ -205,7 +232,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
* to protect that region.
*/
for (n = 0; n <= npages; n++) {
- char *dir_end;
+ char *limit, *dir_end;
page = dir_get_page(dir, n);
err = PTR_ERR(page);
@@ -214,20 +241,30 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
lock_page(page);
kaddr = (char*)page_address(page);
dir_end = kaddr + minix_last_byte(dir, n);
- de = (minix_dirent *)kaddr;
- kaddr += PAGE_CACHE_SIZE - sbi->s_dirsize;
- while ((char *)de <= kaddr) {
- if ((char *)de == dir_end) {
+ limit = kaddr + PAGE_CACHE_SIZE - sbi->s_dirsize;
+ for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
+ de = (minix_dirent *)p;
+ de3 = (minix3_dirent *)p;
+ if (sbi->s_version == MINIX_V3) {
+ namx = de3->name;
+ inumber = de3->inode;
+ } else {
+ namx = de->name;
+ inumber = de->inode;
+ }
+ if (p == dir_end) {
/* We hit i_size */
- de->inode = 0;
+ if (sbi->s_version == MINIX_V3)
+ de3->inode = 0;
+ else
+ de->inode = 0;
goto got_it;
}
- if (!de->inode)
+ if (!inumber)
goto got_it;
err = -EEXIST;
- if (namecompare(namelen,sbi->s_namelen,name,de->name))
+ if (namecompare(namelen, sbi->s_namelen, name, namx))
goto out_unlock;
- de = minix_next_entry(de, sbi);
}
unlock_page(page);
dir_put_page(page);
@@ -236,14 +273,19 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
return -EINVAL;
got_it:
- from = (char*)de - (char*)page_address(page);
+ from = p - (char*)page_address(page);
to = from + sbi->s_dirsize;
err = page->mapping->a_ops->prepare_write(NULL, page, from, to);
if (err)
goto out_unlock;
- memcpy (de->name, name, namelen);
- memset (de->name + namelen, 0, sbi->s_dirsize - namelen - 2);
- de->inode = inode->i_ino;
+ memcpy (namx, name, namelen);
+ if (sbi->s_version == MINIX_V3) {
+ memset (namx + namelen, 0, sbi->s_dirsize - namelen - 4);
+ de3->inode = inode->i_ino;
+ } else {
+ memset (namx + namelen, 0, sbi->s_dirsize - namelen - 2);
+ de->inode = inode->i_ino;
+ }
err = dir_commit_chunk(page, from, to);
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(dir);
@@ -283,8 +325,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
{
struct address_space *mapping = inode->i_mapping;
struct page *page = grab_cache_page(mapping, 0);
- struct minix_sb_info * sbi = minix_sb(inode->i_sb);
- struct minix_dir_entry * de;
+ struct minix_sb_info *sbi = minix_sb(inode->i_sb);
char *kaddr;
int err;
@@ -299,12 +340,23 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
kaddr = kmap_atomic(page, KM_USER0);
memset(kaddr, 0, PAGE_CACHE_SIZE);
- de = (struct minix_dir_entry *)kaddr;
- de->inode = inode->i_ino;
- strcpy(de->name,".");
- de = minix_next_entry(de, sbi);
- de->inode = dir->i_ino;
- strcpy(de->name,"..");
+ if (sbi->s_version == MINIX_V3) {
+ minix3_dirent *de3 = (minix3_dirent *)kaddr;
+
+ de3->inode = inode->i_ino;
+ strcpy(de3->name, ".");
+ de3 = minix_next_entry(de3, sbi);
+ de3->inode = dir->i_ino;
+ strcpy(de3->name, "..");
+ } else {
+ minix_dirent *de = (minix_dirent *)kaddr;
+
+ de->inode = inode->i_ino;
+ strcpy(de->name, ".");
+ de = minix_next_entry(de, sbi);
+ de->inode = dir->i_ino;
+ strcpy(de->name, "..");
+ }
kunmap_atomic(kaddr, KM_USER0);
err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
@@ -321,33 +373,41 @@ int minix_empty_dir(struct inode * inode)
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
struct minix_sb_info *sbi = minix_sb(inode->i_sb);
+ char *name;
+ __u32 inumber;
for (i = 0; i < npages; i++) {
- char *kaddr;
- minix_dirent * de;
- page = dir_get_page(inode, i);
+ char *p, *kaddr, *limit;
+ page = dir_get_page(inode, i);
if (IS_ERR(page))
continue;
kaddr = (char *)page_address(page);
- de = (minix_dirent *)kaddr;
- kaddr += minix_last_byte(inode, i) - sbi->s_dirsize;
+ limit = kaddr + minix_last_byte(inode, i) - sbi->s_dirsize;
+ for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
+ if (sbi->s_version == MINIX_V3) {
+ minix3_dirent *de3 = (minix3_dirent *)p;
+ name = de3->name;
+ inumber = de3->inode;
+ } else {
+ minix_dirent *de = (minix_dirent *)p;
+ name = de->name;
+ inumber = de->inode;
+ }
- while ((char *)de <= kaddr) {
- if (de->inode != 0) {
+ if (inumber != 0) {
/* check for . and .. */
- if (de->name[0] != '.')
+ if (name[0] != '.')
goto not_empty;
- if (!de->name[1]) {
- if (de->inode != inode->i_ino)
+ if (!name[1]) {
+ if (inumber != inode->i_ino)
goto not_empty;
- } else if (de->name[1] != '.')
+ } else if (name[1] != '.')
goto not_empty;
- else if (de->name[2])
+ else if (name[2])
goto not_empty;
}
- de = minix_next_entry(de, sbi);
}
dir_put_page(page);
}
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 40eac2e60d25..f92baa1d7570 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -26,7 +26,7 @@ const struct file_operations minix_file_operations = {
.sendfile = generic_file_sendfile,
};
-struct inode_operations minix_file_inode_operations = {
+const struct inode_operations minix_file_inode_operations = {
.truncate = minix_truncate,
.getattr = minix_getattr,
};
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 629e09b38c5c..92e383af3709 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -7,6 +7,7 @@
* Minix V2 fs support.
*
* Modified for 680x0 by Andreas Schwab
+ * Updated to filesystem version 3 by Daniel Aragones
*/
#include <linux/module.h>
@@ -36,7 +37,8 @@ static void minix_put_super(struct super_block *sb)
struct minix_sb_info *sbi = minix_sb(sb);
if (!(sb->s_flags & MS_RDONLY)) {
- sbi->s_ms->s_state = sbi->s_mount_state;
+ if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
+ sbi->s_ms->s_state = sbi->s_mount_state;
mark_buffer_dirty(sbi->s_sbh);
}
for (i = 0; i < sbi->s_imap_blocks; i++)
@@ -93,7 +95,7 @@ static void destroy_inodecache(void)
kmem_cache_destroy(minix_inode_cachep);
}
-static struct super_operations minix_sops = {
+static const struct super_operations minix_sops = {
.alloc_inode = minix_alloc_inode,
.destroy_inode = minix_destroy_inode,
.read_inode = minix_read_inode,
@@ -117,12 +119,17 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
!(sbi->s_mount_state & MINIX_VALID_FS))
return 0;
/* Mounting a rw partition read-only. */
- ms->s_state = sbi->s_mount_state;
+ if (sbi->s_version != MINIX_V3)
+ ms->s_state = sbi->s_mount_state;
mark_buffer_dirty(sbi->s_sbh);
} else {
/* Mount a partition which is read-only, read-write. */
- sbi->s_mount_state = ms->s_state;
- ms->s_state &= ~MINIX_VALID_FS;
+ if (sbi->s_version != MINIX_V3) {
+ sbi->s_mount_state = ms->s_state;
+ ms->s_state &= ~MINIX_VALID_FS;
+ } else {
+ sbi->s_mount_state = MINIX_VALID_FS;
+ }
mark_buffer_dirty(sbi->s_sbh);
if (!(sbi->s_mount_state & MINIX_VALID_FS))
@@ -140,7 +147,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
struct buffer_head *bh;
struct buffer_head **map;
struct minix_super_block *ms;
- int i, block;
+ struct minix3_super_block *m3s = NULL;
+ unsigned long i, block;
struct inode *root_inode;
struct minix_sb_info *sbi;
@@ -192,6 +200,22 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
sbi->s_dirsize = 32;
sbi->s_namelen = 30;
sbi->s_link_max = MINIX2_LINK_MAX;
+ } else if ( *(__u16 *)(bh->b_data + 24) == MINIX3_SUPER_MAGIC) {
+ m3s = (struct minix3_super_block *) bh->b_data;
+ s->s_magic = m3s->s_magic;
+ sbi->s_imap_blocks = m3s->s_imap_blocks;
+ sbi->s_zmap_blocks = m3s->s_zmap_blocks;
+ sbi->s_firstdatazone = m3s->s_firstdatazone;
+ sbi->s_log_zone_size = m3s->s_log_zone_size;
+ sbi->s_max_size = m3s->s_max_size;
+ sbi->s_ninodes = m3s->s_ninodes;
+ sbi->s_nzones = m3s->s_zones;
+ sbi->s_dirsize = 64;
+ sbi->s_namelen = 60;
+ sbi->s_version = MINIX_V3;
+ sbi->s_link_max = MINIX2_LINK_MAX;
+ sbi->s_mount_state = MINIX_VALID_FS;
+ sb_set_blocksize(s, m3s->s_blocksize);
} else
goto out_no_fs;
@@ -236,7 +260,8 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
s->s_root->d_op = &minix_dentry_operations;
if (!(s->s_flags & MS_RDONLY)) {
- ms->s_state &= ~MINIX_VALID_FS;
+ if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
+ ms->s_state &= ~MINIX_VALID_FS;
mark_buffer_dirty(bh);
}
if (!(sbi->s_mount_state & MINIX_VALID_FS))
@@ -278,8 +303,8 @@ out_illegal_sb:
out_no_fs:
if (!silent)
- printk("VFS: Can't find a Minix or Minix V2 filesystem "
- "on device %s\n", s->s_id);
+ printk("VFS: Can't find a Minix filesystem V1 | V2 | V3 "
+ "on device %s.\n", s->s_id);
out_release:
brelse(bh);
goto out;
@@ -344,7 +369,7 @@ static const struct address_space_operations minix_aops = {
.bmap = minix_bmap
};
-static struct inode_operations minix_symlink_inode_operations = {
+static const struct inode_operations minix_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -537,12 +562,14 @@ int minix_sync_inode(struct inode * inode)
int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct super_block *sb = dir->i_sb;
generic_fillattr(dentry->d_inode, stat);
if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
- stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size);
+ stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
else
- stat->blocks = (BLOCK_SIZE / 512) * V2_minix_blocks(stat->size);
- stat->blksize = BLOCK_SIZE;
+ stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb);
+ stat->blksize = sb->s_blocksize;
return 0;
}
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 429baf8de105..a731cabf1540 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -23,7 +23,7 @@ static inline int verify_chain(Indirect *from, Indirect *to)
static inline block_t *block_end(struct buffer_head *bh)
{
- return (block_t *)((char*)bh->b_data + BLOCK_SIZE);
+ return (block_t *)((char*)bh->b_data + bh->b_size);
}
static inline Indirect *get_branch(struct inode *inode,
@@ -85,7 +85,7 @@ static int alloc_branch(struct inode *inode,
branch[n].key = cpu_to_block(nr);
bh = sb_getblk(inode->i_sb, parent);
lock_buffer(bh);
- memset(bh->b_data, 0, BLOCK_SIZE);
+ memset(bh->b_data, 0, bh->b_size);
branch[n].bh = bh;
branch[n].p = (block_t*) bh->b_data + offsets[n];
*branch[n].p = branch[n].key;
@@ -292,6 +292,7 @@ static void free_branches(struct inode *inode, block_t *p, block_t *q, int depth
static inline void truncate (struct inode * inode)
{
+ struct super_block *sb = inode->i_sb;
block_t *idata = i_data(inode);
int offsets[DEPTH];
Indirect chain[DEPTH];
@@ -301,7 +302,7 @@ static inline void truncate (struct inode * inode)
int first_whole;
long iblock;
- iblock = (inode->i_size + BLOCK_SIZE-1) >> 10;
+ iblock = (inode->i_size + sb->s_blocksize -1) >> sb->s_blocksize_bits;
block_truncate_page(inode->i_mapping, inode->i_size, get_block);
n = block_to_path(inode, iblock, offsets);
@@ -346,15 +347,16 @@ do_indirects:
mark_inode_dirty(inode);
}
-static inline unsigned nblocks(loff_t size)
+static inline unsigned nblocks(loff_t size, struct super_block *sb)
{
+ int k = sb->s_blocksize_bits - 10;
unsigned blocks, res, direct = DIRECT, i = DEPTH;
- blocks = (size + BLOCK_SIZE - 1) >> BLOCK_SIZE_BITS;
+ blocks = (size + sb->s_blocksize - 1) >> (BLOCK_SIZE_BITS + k);
res = blocks;
while (--i && blocks > direct) {
blocks -= direct;
- blocks += BLOCK_SIZE/sizeof(block_t) - 1;
- blocks /= BLOCK_SIZE/sizeof(block_t);
+ blocks += sb->s_blocksize/sizeof(block_t) - 1;
+ blocks /= sb->s_blocksize/sizeof(block_t);
res += blocks;
direct = 1;
}
diff --git a/fs/minix/itree_v1.c b/fs/minix/itree_v1.c
index 656b1347a25b..1a5f3bf0bcec 100644
--- a/fs/minix/itree_v1.c
+++ b/fs/minix/itree_v1.c
@@ -55,7 +55,7 @@ void V1_minix_truncate(struct inode * inode)
truncate(inode);
}
-unsigned V1_minix_blocks(loff_t size)
+unsigned V1_minix_blocks(loff_t size, struct super_block *sb)
{
- return nblocks(size);
+ return nblocks(size, sb);
}
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 9adcdc754e0f..ad8f0dec4ef4 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -23,10 +23,11 @@ static inline block_t *i_data(struct inode *inode)
static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
{
int n = 0;
+ struct super_block *sb = inode->i_sb;
if (block < 0) {
printk("minix_bmap: block<0\n");
- } else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
+ } else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) {
printk("minix_bmap: block>big\n");
} else if (block < 7) {
offsets[n++] = block;
@@ -60,7 +61,7 @@ void V2_minix_truncate(struct inode * inode)
truncate(inode);
}
-unsigned V2_minix_blocks(loff_t size)
+unsigned V2_minix_blocks(loff_t size, struct super_block *sb)
{
- return nblocks(size);
+ return nblocks(size, sb);
}
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index c55b77cdcc8e..73ef84f8fb0b 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -7,11 +7,10 @@
* truncated. Else they will be disallowed (ENAMETOOLONG).
*/
#define NO_TRUNCATE 1
-
#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
-
#define MINIX_V1 0x0001 /* original minix fs */
#define MINIX_V2 0x0002 /* minix V2 fs */
+#define MINIX_V3 0x0003 /* minix V3 fs */
/*
* minix fs inode data in memory
@@ -52,12 +51,10 @@ extern struct inode * minix_new_inode(const struct inode * dir, int * error);
extern void minix_free_inode(struct inode * inode);
extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi);
extern int minix_new_block(struct inode * inode);
-extern void minix_free_block(struct inode * inode, int block);
+extern void minix_free_block(struct inode *inode, unsigned long block);
extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
-
extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern void V2_minix_truncate(struct inode *);
extern void V1_minix_truncate(struct inode *);
extern void V2_minix_truncate(struct inode *);
extern void minix_truncate(struct inode *);
@@ -65,8 +62,8 @@ extern int minix_sync_inode(struct inode *);
extern void minix_set_inode(struct inode *, dev_t);
extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
-extern unsigned V1_minix_blocks(loff_t);
-extern unsigned V2_minix_blocks(loff_t);
+extern unsigned V1_minix_blocks(loff_t, struct super_block *);
+extern unsigned V2_minix_blocks(loff_t, struct super_block *);
extern struct minix_dir_entry *minix_find_entry(struct dentry*, struct page**);
extern int minix_add_link(struct dentry*, struct inode*);
@@ -76,11 +73,10 @@ extern int minix_empty_dir(struct inode*);
extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
extern ino_t minix_inode_by_name(struct dentry*);
-
extern int minix_sync_file(struct file *, struct dentry *, int);
-extern struct inode_operations minix_file_inode_operations;
-extern struct inode_operations minix_dir_inode_operations;
+extern const struct inode_operations minix_file_inode_operations;
+extern const struct inode_operations minix_dir_inode_operations;
extern const struct file_operations minix_file_operations;
extern const struct file_operations minix_dir_operations;
extern struct dentry_operations minix_dentry_operations;
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 299bb66e3bde..f4aa7a939040 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -291,7 +291,7 @@ out:
/*
* directories can handle most operations...
*/
-struct inode_operations minix_dir_inode_operations = {
+const struct inode_operations minix_dir_inode_operations = {
.create = minix_create,
.lookup = minix_lookup,
.link = minix_link,
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 452461955cbd..30f7d0ae2215 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -646,7 +646,7 @@ out:
return err;
}
-static struct inode_operations msdos_dir_inode_operations = {
+static const struct inode_operations msdos_dir_inode_operations = {
.create = msdos_create,
.lookup = msdos_lookup,
.unlink = msdos_unlink,
diff --git a/fs/namei.c b/fs/namei.c
index e4f108f08230..ee60cc4d3453 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2688,10 +2688,11 @@ int __page_symlink(struct inode *inode, const char *symname, int len,
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- int err = -ENOMEM;
+ int err;
char *kaddr;
retry:
+ err = -ENOMEM;
page = find_or_create_page(mapping, 0, gfp_mask);
if (!page)
goto fail;
@@ -2744,7 +2745,7 @@ int page_symlink(struct inode *inode, const char *symname, int len)
mapping_gfp_mask(inode->i_mapping));
}
-struct inode_operations page_symlink_inode_operations = {
+const struct inode_operations page_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
diff --git a/fs/namespace.c b/fs/namespace.c
index 5ef336c1103c..fd999cab7b57 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -53,9 +53,8 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
struct vfsmount *alloc_vfsmnt(const char *name)
{
- struct vfsmount *mnt = kmem_cache_alloc(mnt_cache, GFP_KERNEL);
+ struct vfsmount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
if (mnt) {
- memset(mnt, 0, sizeof(struct vfsmount));
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 73747772c3bb..011ef0b6d2d4 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -58,7 +58,7 @@ const struct file_operations ncp_dir_operations =
#endif
};
-struct inode_operations ncp_dir_inode_operations =
+const struct inode_operations ncp_dir_inode_operations =
{
.create = ncp_create,
.lookup = ncp_lookup,
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index b91fea03b1c3..6b1f6d27099a 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -297,7 +297,7 @@ const struct file_operations ncp_file_operations =
.fsync = ncp_fsync,
};
-struct inode_operations ncp_file_inode_operations =
+const struct inode_operations ncp_file_inode_operations =
{
.setattr = ncp_notify_change,
};
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 67a90bf795d5..14939ddf74f1 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -90,7 +90,7 @@ static int ncp_remount(struct super_block *sb, int *flags, char* data)
return 0;
}
-static struct super_operations ncp_sops =
+static const struct super_operations ncp_sops =
{
.alloc_inode = ncp_alloc_inode,
.destroy_inode = ncp_destroy_inode,
@@ -229,7 +229,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
}
#if defined(CONFIG_NCPFS_EXTRAS) || defined(CONFIG_NCPFS_NFS_NS)
-static struct inode_operations ncp_symlink_inode_operations = {
+static const struct inode_operations ncp_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 7933e2e99dbc..75f309c8741a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -71,6 +71,8 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
complete(&nfs_callback_info.started);
for(;;) {
+ char buf[RPC_MAX_ADDRBUFLEN];
+
if (signalled()) {
if (nfs_callback_info.users == 0)
break;
@@ -88,8 +90,8 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
__FUNCTION__, -err);
break;
}
- dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__,
- NIPQUAD(rqstp->rq_addr.sin_addr.s_addr));
+ dprintk("%s: request from %s\n", __FUNCTION__,
+ svc_print_addr(rqstp, buf, sizeof(buf)));
svc_process(rqstp);
}
@@ -106,7 +108,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
int nfs_callback_up(void)
{
struct svc_serv *serv;
- struct svc_sock *svsk;
int ret = 0;
lock_kernel();
@@ -119,17 +120,14 @@ int nfs_callback_up(void)
ret = -ENOMEM;
if (!serv)
goto out_err;
- /* FIXME: We don't want to register this socket with the portmapper */
- ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport);
- if (ret < 0)
+
+ ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport,
+ SVC_SOCK_ANONYMOUS);
+ if (ret <= 0)
goto out_destroy;
- if (!list_empty(&serv->sv_permsocks)) {
- svsk = list_entry(serv->sv_permsocks.next,
- struct svc_sock, sk_list);
- nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport);
- dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport);
- } else
- BUG();
+ nfs_callback_tcpport = ret;
+ dprintk("Callback port = 0x%x\n", nfs_callback_tcpport);
+
ret = svc_create_thread(nfs_callback_svc, serv);
if (ret < 0)
goto out_destroy;
@@ -140,6 +138,8 @@ out:
unlock_kernel();
return ret;
out_destroy:
+ dprintk("Couldn't create callback socket or server thread; err = %d\n",
+ ret);
svc_destroy(serv);
out_err:
nfs_callback_info.users--;
@@ -166,15 +166,19 @@ void nfs_callback_down(void)
static int nfs_callback_authenticate(struct svc_rqst *rqstp)
{
- struct sockaddr_in *addr = &rqstp->rq_addr;
+ struct sockaddr_in *addr = svc_addr_in(rqstp);
struct nfs_client *clp;
+ char buf[RPC_MAX_ADDRBUFLEN];
/* Don't talk to strangers */
clp = nfs_find_client(addr, 4);
if (clp == NULL)
return SVC_DROP;
- dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr->sin_addr));
+
+ dprintk("%s: %s NFSv4 callback!\n", __FUNCTION__,
+ svc_print_addr(rqstp, buf, sizeof(buf)));
nfs_put_client(clp);
+
switch (rqstp->rq_authop->flavour) {
case RPC_AUTH_NULL:
if (rqstp->rq_proc != CB_NULL)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index f8ea1f51f590..849a2029975d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -176,7 +176,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
status = decode_fh(xdr, &args->fh);
if (unlikely(status != 0))
goto out;
- args->addr = &rqstp->rq_addr;
+ args->addr = svc_addr_in(rqstp);
status = decode_bitmap(xdr, args->bitmap);
out:
dprintk("%s: exit with status = %d\n", __FUNCTION__, status);
@@ -188,7 +188,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
__be32 *p;
__be32 status;
- args->addr = &rqstp->rq_addr;
+ args->addr = svc_addr_in(rqstp);
status = decode_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
goto out;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 23ab145daa2d..2190e6c2792e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -394,7 +394,8 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
unsigned int timeo,
unsigned int retrans,
- rpc_authflavor_t flavor)
+ rpc_authflavor_t flavor,
+ int flags)
{
struct rpc_timeout timeparms;
struct rpc_clnt *clnt = NULL;
@@ -407,6 +408,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
.program = &nfs_program,
.version = clp->rpc_ops->version,
.authflavor = flavor,
+ .flags = flags,
};
if (!IS_ERR(clp->cl_rpcclient))
@@ -548,7 +550,7 @@ static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *
* - RFC 2623, sec 2.3.2
*/
error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans,
- RPC_AUTH_UNIX);
+ RPC_AUTH_UNIX, 0);
if (error < 0)
goto error;
nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -868,7 +870,8 @@ static int nfs4_init_client(struct nfs_client *clp,
/* Check NFS protocol revision and initialize RPC op vector */
clp->rpc_ops = &nfs_v4_clientops;
- error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour);
+ error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour,
+ RPC_CLNT_CREATE_DISCRTRY);
if (error < 0)
goto error;
memcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1030,7 +1033,7 @@ error:
* Create an NFS4 referral server record
*/
struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
- struct nfs_fh *fh)
+ struct nfs_fh *mntfh)
{
struct nfs_client *parent_client;
struct nfs_server *server, *parent_server;
@@ -1069,8 +1072,13 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
BUG_ON(!server->nfs_client->rpc_ops);
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_path_walk(server, mntfh, data->mnt_path);
+ if (error < 0)
+ goto error;
+
/* probe the filesystem info for this server filesystem */
- error = nfs_probe_fsinfo(server, fh, &fattr);
+ error = nfs_probe_fsinfo(server, mntfh, &fattr);
if (error < 0)
goto error;
@@ -1173,7 +1181,7 @@ static struct seq_operations nfs_server_list_ops = {
.show = nfs_server_list_show,
};
-static struct file_operations nfs_server_list_fops = {
+static const struct file_operations nfs_server_list_fops = {
.open = nfs_server_list_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1193,7 +1201,7 @@ static struct seq_operations nfs_volume_list_ops = {
.show = nfs_volume_list_show,
};
-static struct file_operations nfs_volume_list_fops = {
+static const struct file_operations nfs_volume_list_fops = {
.open = nfs_volume_list_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9ba8cb0ee75..92d8ec859e22 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -65,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
.fsync = nfs_fsync_dir,
};
-struct inode_operations nfs_dir_inode_operations = {
+const struct inode_operations nfs_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
.link = nfs_link,
@@ -81,7 +81,7 @@ struct inode_operations nfs_dir_inode_operations = {
};
#ifdef CONFIG_NFS_V3
-struct inode_operations nfs3_dir_inode_operations = {
+const struct inode_operations nfs3_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_lookup,
.link = nfs_link,
@@ -104,7 +104,7 @@ struct inode_operations nfs3_dir_inode_operations = {
#ifdef CONFIG_NFS_V4
static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *);
-struct inode_operations nfs4_dir_inode_operations = {
+const struct inode_operations nfs4_dir_inode_operations = {
.create = nfs_create,
.lookup = nfs_atomic_lookup,
.link = nfs_link,
@@ -637,7 +637,7 @@ int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
* In the case it has, we assume that the dentries are untrustworthy
* and may need to be looked up again.
*/
-static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
{
if (IS_ROOT(dentry))
return 1;
@@ -652,6 +652,12 @@ static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
dentry->d_fsdata = (void *)verf;
}
+static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf)
+{
+ if (time_after(verf, (unsigned long)dentry->d_fsdata))
+ nfs_set_verifier(dentry, verf);
+}
+
/*
* Whenever an NFS operation succeeds, we know that the dentry
* is valid, so we update the revalidation timestamp.
@@ -785,7 +791,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd)
goto out_bad;
nfs_renew_times(dentry);
- nfs_set_verifier(dentry, verifier);
+ nfs_refresh_verifier(dentry, verifier);
out_valid:
unlock_kernel();
dput(parent);
@@ -1085,7 +1091,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
verifier = nfs_save_change_attribute(dir);
ret = nfs4_open_revalidate(dir, dentry, openflags, nd);
if (!ret)
- nfs_set_verifier(dentry, verifier);
+ nfs_refresh_verifier(dentry, verifier);
unlock_kernel();
out:
dput(parent);
@@ -1123,8 +1129,21 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
}
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
- if (dentry != NULL)
- return dentry;
+ if (dentry != NULL) {
+ /* Is this a positive dentry that matches the readdir info? */
+ if (dentry->d_inode != NULL &&
+ (NFS_FILEID(dentry->d_inode) == entry->ino ||
+ d_mountpoint(dentry))) {
+ if (!desc->plus || entry->fh->size == 0)
+ return dentry;
+ if (nfs_compare_fh(NFS_FH(dentry->d_inode),
+ entry->fh) == 0)
+ goto out_renew;
+ }
+ /* No, so d_drop to allow one to be created */
+ d_drop(dentry);
+ dput(dentry);
+ }
if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR))
return NULL;
/* Note: caller is already holding the dir->i_mutex! */
@@ -1149,6 +1168,10 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc)
nfs_renew_times(dentry);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
return dentry;
+out_renew:
+ nfs_renew_times(dentry);
+ nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir));
+ return dentry;
}
/*
@@ -1443,6 +1466,8 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
if (atomic_read(&dentry->d_count) > 1) {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
+ /* Start asynchronous writeout of the inode */
+ write_inode_now(dentry->d_inode, 0);
error = nfs_sillyrename(dir, dentry);
unlock_kernel();
return error;
@@ -1684,7 +1709,7 @@ out:
if (!error) {
d_move(old_dentry, new_dentry);
nfs_renew_times(new_dentry);
- nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
+ nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir));
}
/* new dentry created? */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index bd21d7fde650..b1c98ea39b72 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -309,7 +309,8 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
rpc_execute(&data->task);
- dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ dprintk("NFS: %5u initiated direct read call "
+ "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
@@ -639,7 +640,8 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
rpc_execute(&data->task);
- dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
+ dprintk("NFS: %5u initiated direct write call "
+ "(req %s/%Ld, %zu bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
@@ -797,7 +799,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
const char __user *buf = iov[0].iov_base;
size_t count = iov[0].iov_len;
- dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n",
+ dprintk("nfs: direct write(%s/%s, %lu@%Ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name,
(unsigned long) count, (long long) pos);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 9e4a2b70995a..8e66b5a2d490 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -68,14 +68,14 @@ const struct file_operations nfs_file_operations = {
.check_flags = nfs_check_flags,
};
-struct inode_operations nfs_file_inode_operations = {
+const struct inode_operations nfs_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
};
#ifdef CONFIG_NFS_V3
-struct inode_operations nfs3_file_inode_operations = {
+const struct inode_operations nfs3_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 8391bd7a83ce..6ef268f7c300 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -135,17 +135,15 @@ int nfs4_path_walk(struct nfs_server *server,
struct nfs_fh lastfh;
struct qstr name;
int ret;
- //int referral_count = 0;
dprintk("--> nfs4_path_walk(,,%s)\n", path);
fsinfo.fattr = &fattr;
nfs_fattr_init(&fattr);
- if (*path++ != '/') {
- dprintk("nfs4_get_root: Path does not begin with a slash\n");
- return -EINVAL;
- }
+ /* Eat leading slashes */
+ while (*path == '/')
+ path++;
/* Start by getting the root filehandle from the server */
ret = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
@@ -160,6 +158,7 @@ int nfs4_path_walk(struct nfs_server *server,
return -ENOTDIR;
}
+ /* FIXME: It is quite valid for the server to return a referral here */
if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
printk(KERN_ERR "nfs4_get_root:"
" getroot obtained referral\n");
@@ -187,6 +186,7 @@ eat_dot_dir:
goto eat_dot_dir;
}
+ /* FIXME: Why shouldn't the user be able to use ".." in the path? */
if (path[0] == '.' && path[1] == '.' && (path[2] == '/' || !path[2])
) {
printk(KERN_ERR "nfs4_get_root:"
@@ -212,6 +212,7 @@ eat_dot_dir:
return -ENOTDIR;
}
+ /* FIXME: Referrals are quite valid here too */
if (fattr.valid & NFS_ATTR_FATTR_V4_REFERRAL) {
printk(KERN_ERR "nfs4_get_root:"
" lookupfh obtained referral\n");
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d83498282837..af53c02f473b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -65,13 +65,18 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
int nfs_write_inode(struct inode *inode, int sync)
{
- int flags = sync ? FLUSH_SYNC : 0;
int ret;
- ret = nfs_commit_inode(inode, flags);
- if (ret < 0)
- return ret;
- return 0;
+ if (sync) {
+ ret = filemap_fdatawait(inode->i_mapping);
+ if (ret == 0)
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ } else
+ ret = nfs_commit_inode(inode, 0);
+ if (ret >= 0)
+ return 0;
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ return ret;
}
void nfs_clear_inode(struct inode *inode)
@@ -235,6 +240,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
if (inode->i_state & I_NEW) {
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long now = jiffies;
/* We set i_ino for the few things that still rely on it,
* such as stat(2) */
@@ -271,7 +277,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
init_special_inode(inode, inode->i_mode, fattr->rdev);
nfsi->read_cache_jiffies = fattr->time_start;
- nfsi->last_updated = jiffies;
+ nfsi->last_updated = now;
+ nfsi->cache_change_attribute = now;
inode->i_atime = fattr->atime;
inode->i_mtime = fattr->mtime;
inode->i_ctime = fattr->ctime;
@@ -290,7 +297,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
inode->i_blocks = fattr->du.nfs2.blocks;
}
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
- nfsi->attrtimeo_timestamp = jiffies;
+ nfsi->attrtimeo_timestamp = now;
memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
nfsi->access_cache = RB_ROOT;
@@ -783,20 +790,21 @@ void nfs_end_data_update(struct inode *inode)
static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long now = jiffies;
/* If we have atomic WCC data, we may update some attributes */
if ((fattr->valid & NFS_ATTR_WCC) != 0) {
if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
if (inode->i_size == fattr->pre_size && nfsi->npages == 0) {
inode->i_size = fattr->size;
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
}
}
@@ -934,6 +942,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize;
unsigned int invalid = 0;
+ unsigned long now = jiffies;
int data_stable;
dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
@@ -959,7 +968,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
* Update the read time so we don't revalidate too often.
*/
nfsi->read_cache_jiffies = fattr->time_start;
- nfsi->last_updated = jiffies;
+ nfsi->last_updated = now;
+
+ /* Fix a wraparound issue with nfsi->cache_change_attribute */
+ if (time_before(now, nfsi->cache_change_attribute))
+ nfsi->cache_change_attribute = now - 600*HZ;
/* Are we racing with known updates of the metadata on the server? */
data_stable = nfs_verify_change_attribute(inode, fattr->time_start);
@@ -985,7 +998,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_size = new_isize;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
}
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
dprintk("NFS: isize change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
}
@@ -996,14 +1009,14 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
dprintk("NFS: mtime change on server for file %s/%ld\n",
inode->i_sb->s_id, inode->i_ino);
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
/* If ctime has changed we should definitely clear access+acl caches */
if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) {
invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
@@ -1032,18 +1045,18 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_sb->s_id, inode->i_ino);
nfsi->change_attr = fattr->change_attr;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- nfsi->cache_change_attribute = jiffies;
+ nfsi->cache_change_attribute = now;
}
/* Update attrtimeo value if we're out of the unstable period */
if (invalid & NFS_INO_INVALID_ATTR) {
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
- nfsi->attrtimeo_timestamp = jiffies;
- } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
+ nfsi->attrtimeo_timestamp = now;
+ } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
- nfsi->attrtimeo_timestamp = jiffies;
+ nfsi->attrtimeo_timestamp = now;
}
/* Don't invalidate the data if we were to blame */
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -1122,7 +1135,6 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
return NULL;
nfsi->flags = 0UL;
nfsi->cache_validity = 0UL;
- nfsi->cache_change_attribute = jiffies;
#ifdef CONFIG_NFS_V3_ACL
nfsi->acl_access = ERR_PTR(-EAGAIN);
nfsi->acl_default = ERR_PTR(-EAGAIN);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a28f6ce2e131..6610f2b02077 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -107,10 +107,6 @@ extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
/* nfs4proc.c */
#ifdef CONFIG_NFS_V4
extern struct rpc_procinfo nfs4_procedures[];
-
-extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
- struct nfs4_fs_locations *fs_locations,
- struct page *page);
#endif
/* dir.c */
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 371b804e7cc8..7f86e65182e4 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -155,12 +155,12 @@ out_follow:
goto out;
}
-struct inode_operations nfs_mountpoint_inode_operations = {
+const struct inode_operations nfs_mountpoint_inode_operations = {
.follow_link = nfs_follow_mountpoint,
.getattr = nfs_getattr,
};
-struct inode_operations nfs_referral_inode_operations = {
+const struct inode_operations nfs_referral_inode_operations = {
.follow_link = nfs_follow_mountpoint,
};
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index acd8fe9762d3..7d0371e2bad5 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -253,29 +253,6 @@ static int nfs3_proc_readlink(struct inode *inode, struct page *page,
return status;
}
-static int nfs3_proc_read(struct nfs_read_data *rdata)
-{
- int flags = rdata->flags;
- struct inode * inode = rdata->inode;
- struct nfs_fattr * fattr = rdata->res.fattr;
- struct rpc_message msg = {
- .rpc_proc = &nfs3_procedures[NFS3PROC_READ],
- .rpc_argp = &rdata->args,
- .rpc_resp = &rdata->res,
- .rpc_cred = rdata->cred,
- };
- int status;
-
- dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
- (long long) rdata->args.offset);
- nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
- if (status >= 0)
- nfs_refresh_inode(inode, fattr);
- dprintk("NFS reply read: %d\n", status);
- return status;
-}
-
/*
* Create a regular file.
* For now, we don't implement O_EXCL.
@@ -855,7 +832,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.lookup = nfs3_proc_lookup,
.access = nfs3_proc_access,
.readlink = nfs3_proc_readlink,
- .read = nfs3_proc_read,
.create = nfs3_proc_create,
.remove = nfs3_proc_remove,
.unlink_setup = nfs3_proc_unlink_setup,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c26cd978c7cc..cf3a17eb5c09 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -151,7 +151,7 @@ struct nfs4_state_recovery_ops {
};
extern struct dentry_operations nfs4_dentry_operations;
-extern struct inode_operations nfs4_dir_inode_operations;
+extern const struct inode_operations nfs4_dir_inode_operations;
/* inode.c */
extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t);
@@ -169,7 +169,7 @@ extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
-extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
+extern int nfs4_proc_fs_locations(struct inode *dir, struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page);
extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops;
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index b872779d7cd5..dd5fef20c702 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -16,6 +16,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include "internal.h"
+#include "nfs4_fs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -130,7 +131,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
};
char *page = NULL, *page2 = NULL;
- char *devname;
int loc, s, error;
if (locations == NULL || locations->nlocations <= 0)
@@ -154,12 +154,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
goto out;
}
- devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE);
- if (IS_ERR(devname)) {
- mnt = (struct vfsmount *)devname;
- goto out;
- }
-
loc = 0;
while (loc < locations->nlocations && IS_ERR(mnt)) {
const struct nfs4_fs_location *location = &locations->locations[loc];
@@ -194,7 +188,11 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
addr.sin_port = htons(NFS_PORT);
mountdata.addr = &addr;
- mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, devname, &mountdata);
+ snprintf(page, PAGE_SIZE, "%s:%s",
+ mountdata.hostname,
+ mountdata.mnt_path);
+
+ mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata);
if (!IS_ERR(mnt)) {
break;
}
@@ -242,7 +240,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
dprintk("%s: getting locations for %s/%s\n",
__FUNCTION__, parent->d_name.name, dentry->d_name.name);
- err = nfs4_proc_fs_locations(parent->d_inode, dentry, fs_locations, page);
+ err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page);
dput(parent);
if (err != 0 ||
fs_locations->nlocations <= 0 ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b3fd29baadc3..f52cf5c33c6c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1140,7 +1140,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(server->nfs_client);
break;
default:
if (nfs4_async_handle_error(task, server) == -EAGAIN) {
@@ -1424,7 +1423,6 @@ static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fa
int status = -ENOMEM;
struct page *page = NULL;
struct nfs4_fs_locations *locations = NULL;
- struct dentry dentry = {};
page = alloc_page(GFP_KERNEL);
if (page == NULL)
@@ -1433,9 +1431,7 @@ static int nfs4_get_referral(struct inode *dir, struct qstr *name, struct nfs_fa
if (locations == NULL)
goto out;
- dentry.d_name.name = name->name;
- dentry.d_name.len = name->len;
- status = nfs4_proc_fs_locations(dir, &dentry, locations, page);
+ status = nfs4_proc_fs_locations(dir, name, locations, page);
if (status != 0)
goto out;
/* Make sure server returned a different fsid for the referral */
@@ -1737,44 +1733,6 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page,
return err;
}
-static int _nfs4_proc_read(struct nfs_read_data *rdata)
-{
- int flags = rdata->flags;
- struct inode *inode = rdata->inode;
- struct nfs_fattr *fattr = rdata->res.fattr;
- struct nfs_server *server = NFS_SERVER(inode);
- struct rpc_message msg = {
- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
- .rpc_argp = &rdata->args,
- .rpc_resp = &rdata->res,
- .rpc_cred = rdata->cred,
- };
- unsigned long timestamp = jiffies;
- int status;
-
- dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
- (long long) rdata->args.offset);
-
- nfs_fattr_init(fattr);
- status = rpc_call_sync(server->client, &msg, flags);
- if (!status)
- renew_lease(server, timestamp);
- dprintk("NFS reply read: %d\n", status);
- return status;
-}
-
-static int nfs4_proc_read(struct nfs_read_data *rdata)
-{
- struct nfs4_exception exception = { };
- int err;
- do {
- err = nfs4_handle_exception(NFS_SERVER(rdata->inode),
- _nfs4_proc_read(rdata),
- &exception);
- } while (exception.retry);
- return err;
-}
-
/*
* Got race?
* We will need to arrange for the VFS layer to provide an atomic open.
@@ -2753,11 +2711,15 @@ static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp)
might_sleep();
+ rwsem_acquire(&clp->cl_sem.dep_map, 0, 0, _RET_IP_);
+
rpc_clnt_sigmask(clnt, &oldset);
res = wait_on_bit(&clp->cl_state, NFS4CLNT_STATE_RECOVER,
nfs4_wait_bit_interruptible,
TASK_INTERRUPTIBLE);
rpc_clnt_sigunmask(clnt, &oldset);
+
+ rwsem_release(&clp->cl_sem.dep_map, 1, _RET_IP_);
return res;
}
@@ -2996,7 +2958,6 @@ int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4
switch (err) {
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(server->nfs_client);
case 0:
return 0;
}
@@ -3150,12 +3111,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
break;
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(calldata->server->nfs_client);
break;
default:
- if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN)
rpc_restart_call(task);
- }
}
}
@@ -3585,7 +3544,7 @@ ssize_t nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen)
return len;
}
-int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
+int nfs4_proc_fs_locations(struct inode *dir, struct qstr *name,
struct nfs4_fs_locations *fs_locations, struct page *page)
{
struct nfs_server *server = NFS_SERVER(dir);
@@ -3595,7 +3554,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
};
struct nfs4_fs_locations_arg args = {
.dir_fh = NFS_FH(dir),
- .name = &dentry->d_name,
+ .name = name,
.page = page,
.bitmask = bitmask,
};
@@ -3607,7 +3566,7 @@ int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry,
int status;
dprintk("%s: start\n", __FUNCTION__);
- fs_locations->fattr.valid = 0;
+ nfs_fattr_init(&fs_locations->fattr);
fs_locations->server = server;
fs_locations->nlocations = 0;
status = rpc_call_sync(server->client, &msg, 0);
@@ -3625,7 +3584,7 @@ struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = {
.recover_lock = nfs4_lock_expired,
};
-static struct inode_operations nfs4_file_inode_operations = {
+static const struct inode_operations nfs4_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
.setattr = nfs_setattr,
@@ -3646,7 +3605,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.lookup = nfs4_proc_lookup,
.access = nfs4_proc_access,
.readlink = nfs4_proc_readlink,
- .read = nfs4_proc_read,
.create = nfs4_proc_create,
.remove = nfs4_proc_remove,
.unlink_setup = nfs4_proc_unlink_setup,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 823298561c0a..f5f4430fb2a4 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -43,7 +43,6 @@
* child task framework of the RPC layer?
*/
-#include <linux/sched.h>
#include <linux/smp_lock.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0cf3fa312a33..f02d522fd788 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -387,8 +387,10 @@ static int nfs4_stat_to_errno(int);
decode_putfh_maxsz + \
op_decode_hdr_maxsz + 12)
#define NFS4_enc_server_caps_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
encode_getattr_maxsz)
#define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
decode_getattr_maxsz)
#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 560536ad74a4..1dcf56de9482 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -186,35 +186,6 @@ static int nfs_proc_readlink(struct inode *inode, struct page *page,
return status;
}
-static int nfs_proc_read(struct nfs_read_data *rdata)
-{
- int flags = rdata->flags;
- struct inode * inode = rdata->inode;
- struct nfs_fattr * fattr = rdata->res.fattr;
- struct rpc_message msg = {
- .rpc_proc = &nfs_procedures[NFSPROC_READ],
- .rpc_argp = &rdata->args,
- .rpc_resp = &rdata->res,
- .rpc_cred = rdata->cred,
- };
- int status;
-
- dprintk("NFS call read %d @ %Ld\n", rdata->args.count,
- (long long) rdata->args.offset);
- nfs_fattr_init(fattr);
- status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
- if (status >= 0) {
- nfs_refresh_inode(inode, fattr);
- /* Emulate the eof flag, which isn't normally needed in NFSv2
- * as it is guaranteed to always return the file attributes
- */
- if (rdata->args.offset + rdata->args.count >= fattr->size)
- rdata->res.eof = 1;
- }
- dprintk("NFS reply read: %d\n", status);
- return status;
-}
-
static int
nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
int flags, struct nameidata *nd)
@@ -666,7 +637,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lookup = nfs_proc_lookup,
.access = NULL, /* access */
.readlink = nfs_proc_readlink,
- .read = nfs_proc_read,
.create = nfs_proc_create,
.remove = nfs_proc_remove,
.unlink_setup = nfs_proc_unlink_setup,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a9c26521a9e2..6ab4d5a9edf2 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -5,14 +5,6 @@
*
* Partial copy of Linus' read cache modifications to fs/nfs/file.c
* modified for async RPC by okir@monad.swb.de
- *
- * We do an ugly hack here in order to return proper error codes to the
- * user program when a read request failed: since generic_file_read
- * only checks the return value of inode->i_op->readpage() which is always 0
- * for async RPC, we set the error bit of the page to 1 when an error occurs,
- * and make nfs_readpage transmit requests synchronously when encountering this.
- * This is only a small problem, though, since we now retry all operations
- * within the RPC code when root squashing is suspected.
*/
#include <linux/time.h>
@@ -122,93 +114,6 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
}
}
-/*
- * Read a page synchronously.
- */
-static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page)
-{
- unsigned int rsize = NFS_SERVER(inode)->rsize;
- unsigned int count = PAGE_CACHE_SIZE;
- int result = -ENOMEM;
- struct nfs_read_data *rdata;
-
- rdata = nfs_readdata_alloc(count);
- if (!rdata)
- goto out_unlock;
-
- memset(rdata, 0, sizeof(*rdata));
- rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
- rdata->cred = ctx->cred;
- rdata->inode = inode;
- INIT_LIST_HEAD(&rdata->pages);
- rdata->args.fh = NFS_FH(inode);
- rdata->args.context = ctx;
- rdata->args.pages = &page;
- rdata->args.pgbase = 0UL;
- rdata->args.count = rsize;
- rdata->res.fattr = &rdata->fattr;
-
- dprintk("NFS: nfs_readpage_sync(%p)\n", page);
-
- /*
- * This works now because the socket layer never tries to DMA
- * into this buffer directly.
- */
- do {
- if (count < rsize)
- rdata->args.count = count;
- rdata->res.count = rdata->args.count;
- rdata->args.offset = page_offset(page) + rdata->args.pgbase;
-
- dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
- NFS_SERVER(inode)->nfs_client->cl_hostname,
- inode->i_sb->s_id,
- (long long)NFS_FILEID(inode),
- (unsigned long long)rdata->args.pgbase,
- rdata->args.count);
-
- lock_kernel();
- result = NFS_PROTO(inode)->read(rdata);
- unlock_kernel();
-
- /*
- * Even if we had a partial success we can't mark the page
- * cache valid.
- */
- if (result < 0) {
- if (result == -EISDIR)
- result = -EINVAL;
- goto io_error;
- }
- count -= result;
- rdata->args.pgbase += result;
- nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
-
- /* Note: result == 0 should only happen if we're caching
- * a write that extends the file and punches a hole.
- */
- if (rdata->res.eof != 0 || result == 0)
- break;
- } while (count);
- spin_lock(&inode->i_lock);
- NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
- spin_unlock(&inode->i_lock);
-
- if (rdata->res.eof || rdata->res.count == rdata->args.count) {
- SetPageUptodate(page);
- if (rdata->res.eof && count != 0)
- memclear_highpage_flush(page, rdata->args.pgbase, count);
- }
- result = 0;
-
-io_error:
- nfs_readdata_free(rdata);
-out_unlock:
- unlock_page(page);
- return result;
-}
-
static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -278,7 +183,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
data->task.tk_cookie = (unsigned long)inode;
- dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
@@ -452,7 +357,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
int status;
- dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+ dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid,
task->tk_status);
status = NFS_PROTO(data->inode)->read_done(task, data);
@@ -621,15 +526,9 @@ int nfs_readpage(struct file *file, struct page *page)
} else
ctx = get_nfs_open_context((struct nfs_open_context *)
file->private_data);
- if (!IS_SYNC(inode)) {
- error = nfs_readpage_async(ctx, inode, page);
- goto out;
- }
- error = nfs_readpage_sync(ctx, inode, page);
- if (error < 0 && IS_SWAPFILE(inode))
- printk("Aiee.. nfs swap-in of page failed!\n");
-out:
+ error = nfs_readpage_async(ctx, inode, page);
+
put_nfs_open_context(ctx);
return error;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 28108c82b887..bb516a2cfbaf 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -44,6 +44,7 @@
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
+#include <linux/magic.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -81,7 +82,7 @@ struct file_system_type nfs_xdev_fs_type = {
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-static struct super_operations nfs_sops = {
+static const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
@@ -125,7 +126,7 @@ struct file_system_type nfs4_referral_fs_type = {
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-static struct super_operations nfs4_sops = {
+static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
@@ -1044,7 +1045,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
nfs4_fill_super(s);
}
- mntroot = nfs4_get_root(s, data->fh);
+ mntroot = nfs4_get_root(s, &mntfh);
if (IS_ERR(mntroot)) {
error = PTR_ERR(mntroot);
goto error_splat_super;
diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c
index 525c136c7d8c..f4a0548b9ce8 100644
--- a/fs/nfs/symlink.c
+++ b/fs/nfs/symlink.c
@@ -78,7 +78,7 @@ read_failed:
/*
* symlinks can't do much...
*/
-struct inode_operations nfs_symlink_inode_operations = {
+const struct inode_operations nfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = nfs_follow_link,
.put_link = page_put_link,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 3ea50ac64820..fcdcafbb3293 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -75,7 +75,7 @@ static ctl_table nfs_cb_sysctl_root[] = {
int nfs_register_sysctl(void)
{
- nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0);
+ nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root);
if (nfs_callback_sysctl_table == NULL)
return -ENOMEM;
return 0;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 345492e78643..febdade91670 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1,47 +1,7 @@
/*
* linux/fs/nfs/write.c
*
- * Writing file data over NFS.
- *
- * We do it like this: When a (user) process wishes to write data to an
- * NFS file, a write request is allocated that contains the RPC task data
- * plus some info on the page to be written, and added to the inode's
- * write chain. If the process writes past the end of the page, an async
- * RPC call to write the page is scheduled immediately; otherwise, the call
- * is delayed for a few seconds.
- *
- * Just like readahead, no async I/O is performed if wsize < PAGE_SIZE.
- *
- * Write requests are kept on the inode's writeback list. Each entry in
- * that list references the page (portion) to be written. When the
- * cache timeout has expired, the RPC task is woken up, and tries to
- * lock the page. As soon as it manages to do so, the request is moved
- * from the writeback list to the writelock list.
- *
- * Note: we must make sure never to confuse the inode passed in the
- * write_page request with the one in page->inode. As far as I understand
- * it, these are different when doing a swap-out.
- *
- * To understand everything that goes on here and in the NFS read code,
- * one should be aware that a page is locked in exactly one of the following
- * cases:
- *
- * - A write request is in progress.
- * - A user process is in generic_file_write/nfs_update_page
- * - A user process is in generic_file_read
- *
- * Also note that because of the way pages are invalidated in
- * nfs_revalidate_inode, the following assertions hold:
- *
- * - If a page is dirty, there will be no read requests (a page will
- * not be re-read unless invalidated by nfs_revalidate_inode).
- * - If the page is not uptodate, there will be no pending write
- * requests, and no process will be in nfs_update_page.
- *
- * FIXME: Interaction with the vmscan routines is not optimal yet.
- * Either vmscan must be made nfs-savvy, or we need a different page
- * reclaim concept that supports something like FS-independent
- * buffer_heads with a b_ops-> field.
+ * Write file data over NFS.
*
* Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
*/
@@ -79,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
unsigned int, unsigned int);
static void nfs_mark_request_dirty(struct nfs_page *req);
static int nfs_wait_on_write_congestion(struct address_space *, int);
-static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
static const struct rpc_call_ops nfs_write_partial_ops;
static const struct rpc_call_ops nfs_write_full_ops;
@@ -194,6 +153,13 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
i_size_write(inode, end);
}
+/* A writeback failed: mark the page as bad, and invalidate the page cache */
+static void nfs_set_pageerror(struct page *page)
+{
+ SetPageError(page);
+ nfs_zap_mapping(page->mapping->host, page->mapping);
+}
+
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
@@ -323,7 +289,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
err = 0;
out:
if (!wbc->for_writepages)
- nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
+ nfs_flush_mapping(page->mapping, wbc, FLUSH_STABLE|wb_priority(wbc));
return err;
}
@@ -360,14 +326,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
if (err < 0)
goto out;
nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
- if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
- err = nfs_wait_on_requests(inode, 0, 0);
- if (err < 0)
- goto out;
- }
- err = nfs_commit_inode(inode, wb_priority(wbc));
- if (err > 0)
- err = 0;
+ err = 0;
out:
clear_bit(BDI_write_congested, &bdi->state);
wake_up_all(&nfs_write_congestion);
@@ -516,17 +475,6 @@ static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_st
return res;
}
-static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- int ret;
-
- spin_lock(&nfsi->req_lock);
- ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
- spin_unlock(&nfsi->req_lock);
- return ret;
-}
-
static void nfs_cancel_dirty_list(struct list_head *head)
{
struct nfs_page *req;
@@ -773,7 +721,7 @@ int nfs_updatepage(struct file *file, struct page *page,
dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
status, (long long)i_size_read(inode));
if (status < 0)
- ClearPageUptodate(page);
+ nfs_set_pageerror(page);
return status;
}
@@ -852,7 +800,8 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
data->task.tk_priority = flush_task_priority(how);
data->task.tk_cookie = (unsigned long)inode;
- dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
+ dprintk("NFS: %5u initiated write call "
+ "(req %s/%Ld, %u bytes @ offset %Lu)\n",
data->task.tk_pid,
inode->i_sb->s_id,
(long long)NFS_FILEID(inode),
@@ -1034,8 +983,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
return;
if (task->tk_status < 0) {
- ClearPageUptodate(page);
- SetPageError(page);
+ nfs_set_pageerror(page);
req->wb_context->error = task->tk_status;
dprintk(", error = %d\n", task->tk_status);
} else {
@@ -1092,8 +1040,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
(long long)req_offset(req));
if (task->tk_status < 0) {
- ClearPageUptodate(page);
- SetPageError(page);
+ nfs_set_pageerror(page);
req->wb_context->error = task->tk_status;
end_page_writeback(page);
nfs_inode_remove_request(req);
@@ -1134,7 +1081,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
struct nfs_writeres *resp = &data->res;
int status;
- dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
+ dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
task->tk_pid, task->tk_status);
/*
@@ -1250,7 +1197,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
data->task.tk_priority = flush_task_priority(how);
data->task.tk_cookie = (unsigned long)inode;
- dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid);
+ dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
}
/*
@@ -1291,7 +1238,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
struct nfs_write_data *data = calldata;
struct nfs_page *req;
- dprintk("NFS: %4d nfs_commit_done (status %d)\n",
+ dprintk("NFS: %5u nfs_commit_done (status %d)\n",
task->tk_pid, task->tk_status);
/* Call the NFS version-specific code */
@@ -1516,6 +1463,8 @@ int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
if (ret < 0)
goto out;
}
+ if (!PagePrivate(page))
+ return 0;
ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
if (ret >= 0)
return 0;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 49c310b84923..6f24768272a1 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -16,7 +16,6 @@
#include <linux/unistd.h>
#include <linux/slab.h>
-#include <linux/sched.h>
#include <linux/stat.h>
#include <linux/in.h>
#include <linux/seq_file.h>
@@ -190,18 +189,17 @@ static int expkey_show(struct seq_file *m,
struct cache_head *h)
{
struct svc_expkey *ek ;
+ int i;
if (h ==NULL) {
seq_puts(m, "#domain fsidtype fsid [path]\n");
return 0;
}
ek = container_of(h, struct svc_expkey, h);
- seq_printf(m, "%s %d 0x%08x", ek->ek_client->name,
- ek->ek_fsidtype, ek->ek_fsid[0]);
- if (ek->ek_fsidtype != 1)
- seq_printf(m, "%08x", ek->ek_fsid[1]);
- if (ek->ek_fsidtype == 2)
- seq_printf(m, "%08x", ek->ek_fsid[2]);
+ seq_printf(m, "%s %d 0x", ek->ek_client->name,
+ ek->ek_fsidtype);
+ for (i=0; i < key_len(ek->ek_fsidtype)/4; i++)
+ seq_printf(m, "%08x", ek->ek_fsid[i]);
if (test_bit(CACHE_VALID, &h->flags) &&
!test_bit(CACHE_NEGATIVE, &h->flags)) {
seq_printf(m, " ");
@@ -232,9 +230,8 @@ static inline void expkey_init(struct cache_head *cnew,
kref_get(&item->ek_client->ref);
new->ek_client = item->ek_client;
new->ek_fsidtype = item->ek_fsidtype;
- new->ek_fsid[0] = item->ek_fsid[0];
- new->ek_fsid[1] = item->ek_fsid[1];
- new->ek_fsid[2] = item->ek_fsid[2];
+
+ memcpy(new->ek_fsid, item->ek_fsid, sizeof(new->ek_fsid));
}
static inline void expkey_update(struct cache_head *cnew,
@@ -363,7 +360,7 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
-static int check_export(struct inode *inode, int flags)
+static int check_export(struct inode *inode, int flags, unsigned char *uuid)
{
/* We currently export only dirs and regular files.
@@ -376,12 +373,13 @@ static int check_export(struct inode *inode, int flags)
/* There are two requirements on a filesystem to be exportable.
* 1: We must be able to identify the filesystem from a number.
* either a device number (so FS_REQUIRES_DEV needed)
- * or an FSID number (so NFSEXP_FSID needed).
+ * or an FSID number (so NFSEXP_FSID or ->uuid is needed).
* 2: We must be able to find an inode from a filehandle.
* This means that s_export_op must be set.
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
- !(flags & NFSEXP_FSID)) {
+ !(flags & NFSEXP_FSID) &&
+ uuid == NULL) {
dprintk("exp_export: export of non-dev fs without fsid\n");
return -EINVAL;
}
@@ -406,10 +404,6 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int len;
int migrated, i, err;
- len = qword_get(mesg, buf, PAGE_SIZE);
- if (len != 5 || memcmp(buf, "fsloc", 5))
- return 0;
-
/* listsize */
err = get_int(mesg, &fsloc->locations_count);
if (err)
@@ -520,6 +514,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_fslocs.locations_count = 0;
exp.ex_fslocs.migrated = 0;
+ exp.ex_uuid = NULL;
+
/* flags */
err = get_int(&mesg, &an_int);
if (err == -ENOENT)
@@ -543,12 +539,33 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err) goto out;
exp.ex_fsid = an_int;
- err = check_export(nd.dentry->d_inode, exp.ex_flags);
- if (err) goto out;
+ while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
+ if (strcmp(buf, "fsloc") == 0)
+ err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
+ else if (strcmp(buf, "uuid") == 0) {
+ /* expect a 16 byte uuid encoded as \xXXXX... */
+ len = qword_get(&mesg, buf, PAGE_SIZE);
+ if (len != 16)
+ err = -EINVAL;
+ else {
+ exp.ex_uuid =
+ kmemdup(buf, 16, GFP_KERNEL);
+ if (exp.ex_uuid == NULL)
+ err = -ENOMEM;
+ }
+ } else
+ /* quietly ignore unknown words and anything
+ * following. Newer user-space can try to set
+ * new values, then see what the result was.
+ */
+ break;
+ if (err)
+ goto out;
+ }
- err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
- if (err)
- goto out;
+ err = check_export(nd.dentry->d_inode, exp.ex_flags,
+ exp.ex_uuid);
+ if (err) goto out;
}
expp = svc_export_lookup(&exp);
@@ -562,6 +579,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
else
exp_put(expp);
out:
+ nfsd4_fslocs_free(&exp.ex_fslocs);
+ kfree(exp.ex_uuid);
kfree(exp.ex_path);
if (nd.dentry)
path_release(&nd);
@@ -591,9 +610,19 @@ static int svc_export_show(struct seq_file *m,
seq_escape(m, exp->ex_client->name, " \t\n\\");
seq_putc(m, '(');
if (test_bit(CACHE_VALID, &h->flags) &&
- !test_bit(CACHE_NEGATIVE, &h->flags))
+ !test_bit(CACHE_NEGATIVE, &h->flags)) {
exp_flags(m, exp->ex_flags, exp->ex_fsid,
exp->ex_anon_uid, exp->ex_anon_gid, &exp->ex_fslocs);
+ if (exp->ex_uuid) {
+ int i;
+ seq_puts(m, ",uuid=");
+ for (i=0; i<16; i++) {
+ if ((i&3) == 0 && i)
+ seq_putc(m, ':');
+ seq_printf(m, "%02x", exp->ex_uuid[i]);
+ }
+ }
+ }
seq_puts(m, ")\n");
return 0;
}
@@ -630,6 +659,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
new->ex_anon_uid = item->ex_anon_uid;
new->ex_anon_gid = item->ex_anon_gid;
new->ex_fsid = item->ex_fsid;
+ new->ex_uuid = item->ex_uuid;
+ item->ex_uuid = NULL;
new->ex_path = item->ex_path;
item->ex_path = NULL;
new->ex_fslocs.locations = item->ex_fslocs.locations;
@@ -752,11 +783,11 @@ exp_get_key(svc_client *clp, dev_t dev, ino_t ino)
u32 fsidv[3];
if (old_valid_dev(dev)) {
- mk_fsid_v0(fsidv, dev, ino);
- return exp_find_key(clp, 0, fsidv, NULL);
+ mk_fsid(FSID_DEV, fsidv, dev, ino, 0, NULL);
+ return exp_find_key(clp, FSID_DEV, fsidv, NULL);
}
- mk_fsid_v3(fsidv, dev, ino);
- return exp_find_key(clp, 3, fsidv, NULL);
+ mk_fsid(FSID_ENCODE_DEV, fsidv, dev, ino, 0, NULL);
+ return exp_find_key(clp, FSID_ENCODE_DEV, fsidv, NULL);
}
/*
@@ -767,9 +798,9 @@ exp_get_fsid_key(svc_client *clp, int fsid)
{
u32 fsidv[2];
- mk_fsid_v1(fsidv, fsid);
+ mk_fsid(FSID_NUM, fsidv, 0, 0, fsid, NULL);
- return exp_find_key(clp, 1, fsidv, NULL);
+ return exp_find_key(clp, FSID_NUM, fsidv, NULL);
}
svc_export *
@@ -883,8 +914,8 @@ static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
if ((exp->ex_flags & NFSEXP_FSID) == 0)
return 0;
- mk_fsid_v1(fsid, exp->ex_fsid);
- return exp_set_key(clp, 1, fsid, exp);
+ mk_fsid(FSID_NUM, fsid, 0, 0, exp->ex_fsid, NULL);
+ return exp_set_key(clp, FSID_NUM, fsid, exp);
}
static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
@@ -894,11 +925,11 @@ static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
dev_t dev = inode->i_sb->s_dev;
if (old_valid_dev(dev)) {
- mk_fsid_v0(fsid, dev, inode->i_ino);
- return exp_set_key(clp, 0, fsid, exp);
+ mk_fsid(FSID_DEV, fsid, dev, inode->i_ino, 0, NULL);
+ return exp_set_key(clp, FSID_DEV, fsid, exp);
}
- mk_fsid_v3(fsid, dev, inode->i_ino);
- return exp_set_key(clp, 3, fsid, exp);
+ mk_fsid(FSID_ENCODE_DEV, fsid, dev, inode->i_ino, 0, NULL);
+ return exp_set_key(clp, FSID_ENCODE_DEV, fsid, exp);
}
static void exp_unhash(struct svc_export *exp)
@@ -977,7 +1008,7 @@ exp_export(struct nfsctl_export *nxp)
goto finish;
}
- err = check_export(nd.dentry->d_inode, nxp->ex_flags);
+ err = check_export(nd.dentry->d_inode, nxp->ex_flags, NULL);
if (err) goto finish;
err = -ENOMEM;
@@ -1170,9 +1201,9 @@ exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
__be32 rv;
u32 fsidv[2];
- mk_fsid_v1(fsidv, 0);
+ mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
- exp = exp_find(clp, 1, fsidv, creq);
+ exp = exp_find(clp, FSID_NUM, fsidv, creq);
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
if (exp == NULL)
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index edde5dc5f796..b61742885011 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -287,13 +287,20 @@ static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
return 1;
}
-static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
- struct nfsd_fhandle *resp)
+static int nfsaclsvc_release_attrstat(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd_attrstat *resp)
{
fh_put(&resp->fh);
return 1;
}
+static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
+ struct nfsd3_accessres *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
+
#define nfsaclsvc_decode_voidargs NULL
#define nfsaclsvc_encode_voidres NULL
#define nfsaclsvc_release_void NULL
@@ -322,9 +329,9 @@ struct nfsd3_voidargs { int dummy; };
static struct svc_procedure nfsd_acl_procedures2[] = {
PROC(null, void, void, void, RC_NOCACHE, ST),
PROC(getacl, getacl, getacl, getacl, RC_NOCACHE, ST+1+2*(1+ACL)),
- PROC(setacl, setacl, attrstat, fhandle, RC_NOCACHE, ST+AT),
- PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT),
- PROC(access, access, access, fhandle, RC_NOCACHE, ST+AT+1),
+ PROC(setacl, setacl, attrstat, attrstat, RC_NOCACHE, ST+AT),
+ PROC(getattr, fhandle, attrstat, attrstat, RC_NOCACHE, ST+AT),
+ PROC(access, access, access, access, RC_NOCACHE, ST+AT+1),
};
struct svc_version nfsd_acl_version2 = {
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index e695660921ec..6f677988c71d 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -149,6 +149,27 @@ decode_sattr3(__be32 *p, struct iattr *iap)
return p;
}
+static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp)
+{
+ u64 f;
+ switch(fsid_source(fhp)) {
+ default:
+ case FSIDSOURCE_DEV:
+ p = xdr_encode_hyper(p, (u64)huge_encode_dev
+ (fhp->fh_dentry->d_inode->i_sb->s_dev));
+ break;
+ case FSIDSOURCE_FSID:
+ p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+ break;
+ case FSIDSOURCE_UUID:
+ f = ((u64*)fhp->fh_export->ex_uuid)[0];
+ f ^= ((u64*)fhp->fh_export->ex_uuid)[1];
+ p = xdr_encode_hyper(p, f);
+ break;
+ }
+ return p;
+}
+
static __be32 *
encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct kstat *stat)
@@ -169,10 +190,7 @@ encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
*p++ = htonl((u32) MAJOR(stat->rdev));
*p++ = htonl((u32) MINOR(stat->rdev));
- if (is_fsid(fhp, rqstp->rq_reffh))
- p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
- else
- p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev));
+ p = encode_fsid(p, fhp);
p = xdr_encode_hyper(p, (u64) stat->ino);
p = encode_time3(p, &stat->atime);
lease_get_mtime(dentry->d_inode, &time);
@@ -203,10 +221,7 @@ encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9);
*p++ = fhp->fh_post_rdev[0];
*p++ = fhp->fh_post_rdev[1];
- if (is_fsid(fhp, rqstp->rq_reffh))
- p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
- else
- p = xdr_encode_hyper(p, (u64)huge_encode_dev(inode->i_sb->s_dev));
+ p = encode_fsid(p, fhp);
p = xdr_encode_hyper(p, (u64) inode->i_ino);
p = encode_time3(p, &fhp->fh_post_atime);
p = encode_time3(p, &fhp->fh_post_mtime);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 5d94555cdc83..832673b14587 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -61,9 +61,11 @@
/* flags used to simulate posix default ACLs */
#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
- | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+ | NFS4_ACE_DIRECTORY_INHERIT_ACE)
-#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS | NFS4_ACE_IDENTIFIER_GROUP)
+#define NFS4_SUPPORTED_FLAGS (NFS4_INHERITANCE_FLAGS \
+ | NFS4_ACE_INHERIT_ONLY_ACE \
+ | NFS4_ACE_IDENTIFIER_GROUP)
#define MASK_EQUAL(mask1, mask2) \
( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
@@ -87,12 +89,19 @@ mask_from_posix(unsigned short perm, unsigned int flags)
}
static u32
-deny_mask(u32 allow_mask, unsigned int flags)
+deny_mask_from_posix(unsigned short perm, u32 flags)
{
- u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP;
- if (!(flags & NFS4_ACL_DIR))
- ret &= ~NFS4_ACE_DELETE_CHILD;
- return ret;
+ u32 mask = 0;
+
+ if (perm & ACL_READ)
+ mask |= NFS4_READ_MODE;
+ if (perm & ACL_WRITE)
+ mask |= NFS4_WRITE_MODE;
+ if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR))
+ mask |= NFS4_ACE_DELETE_CHILD;
+ if (perm & ACL_EXECUTE)
+ mask |= NFS4_EXECUTE_MODE;
+ return mask;
}
/* XXX: modify functions to return NFS errors; they're only ever
@@ -126,108 +135,151 @@ struct ace_container {
};
static short ace2type(struct nfs4_ace *);
-static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
-static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
-int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
-static int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
+ unsigned int);
+void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
struct nfs4_acl *
nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
unsigned int flags)
{
struct nfs4_acl *acl;
- int error = -EINVAL;
+ int size = 0;
- if ((pacl != NULL &&
- (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) ||
- (dpacl != NULL &&
- (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0)))
- goto out_err;
-
- acl = nfs4_acl_new();
- if (acl == NULL) {
- error = -ENOMEM;
- goto out_err;
+ if (pacl) {
+ if (posix_acl_valid(pacl) < 0)
+ return ERR_PTR(-EINVAL);
+ size += 2*pacl->a_count;
}
-
- if (pacl != NULL) {
- error = _posix_to_nfsv4_one(pacl, acl,
- flags & ~NFS4_ACL_TYPE_DEFAULT);
- if (error < 0)
- goto out_acl;
+ if (dpacl) {
+ if (posix_acl_valid(dpacl) < 0)
+ return ERR_PTR(-EINVAL);
+ size += 2*dpacl->a_count;
}
- if (dpacl != NULL) {
- error = _posix_to_nfsv4_one(dpacl, acl,
- flags | NFS4_ACL_TYPE_DEFAULT);
- if (error < 0)
- goto out_acl;
- }
+ /* Allocate for worst case: one (deny, allow) pair each: */
+ acl = nfs4_acl_new(size);
+ if (acl == NULL)
+ return ERR_PTR(-ENOMEM);
- return acl;
+ if (pacl)
+ _posix_to_nfsv4_one(pacl, acl, flags & ~NFS4_ACL_TYPE_DEFAULT);
-out_acl:
- nfs4_acl_free(acl);
-out_err:
- acl = ERR_PTR(error);
+ if (dpacl)
+ _posix_to_nfsv4_one(dpacl, acl, flags | NFS4_ACL_TYPE_DEFAULT);
return acl;
}
-static int
-nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype,
- uid_t owner, unsigned int flags)
+struct posix_acl_summary {
+ unsigned short owner;
+ unsigned short users;
+ unsigned short group;
+ unsigned short groups;
+ unsigned short other;
+ unsigned short mask;
+};
+
+static void
+summarize_posix_acl(struct posix_acl *acl, struct posix_acl_summary *pas)
{
- int error;
-
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
- eflag, mask, whotype, owner);
- if (error < 0)
- return error;
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- eflag, deny_mask(mask, flags), whotype, owner);
- return error;
+ struct posix_acl_entry *pa, *pe;
+ pas->users = 0;
+ pas->groups = 0;
+ pas->mask = 07;
+
+ pe = acl->a_entries + acl->a_count;
+
+ FOREACH_ACL_ENTRY(pa, acl, pe) {
+ switch (pa->e_tag) {
+ case ACL_USER_OBJ:
+ pas->owner = pa->e_perm;
+ break;
+ case ACL_GROUP_OBJ:
+ pas->group = pa->e_perm;
+ break;
+ case ACL_USER:
+ pas->users |= pa->e_perm;
+ break;
+ case ACL_GROUP:
+ pas->groups |= pa->e_perm;
+ break;
+ case ACL_OTHER:
+ pas->other = pa->e_perm;
+ break;
+ case ACL_MASK:
+ pas->mask = pa->e_perm;
+ break;
+ }
+ }
+ /* We'll only care about effective permissions: */
+ pas->users &= pas->mask;
+ pas->group &= pas->mask;
+ pas->groups &= pas->mask;
}
/* We assume the acl has been verified with posix_acl_valid. */
-static int
+static void
_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
unsigned int flags)
{
- struct posix_acl_entry *pa, *pe, *group_owner_entry;
- int error = -EINVAL;
- u32 mask, mask_mask;
+ struct posix_acl_entry *pa, *group_owner_entry;
+ struct nfs4_ace *ace;
+ struct posix_acl_summary pas;
+ unsigned short deny;
int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ?
NFS4_INHERITANCE_FLAGS : 0);
BUG_ON(pacl->a_count < 3);
- pe = pacl->a_entries + pacl->a_count;
- pa = pe - 2; /* if mask entry exists, it's second from the last. */
- if (pa->e_tag == ACL_MASK)
- mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags);
- else
- mask_mask = 0;
+ summarize_posix_acl(pacl, &pas);
pa = pacl->a_entries;
- BUG_ON(pa->e_tag != ACL_USER_OBJ);
- mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER);
- error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags);
- if (error < 0)
- goto out;
- pa++;
+ ace = acl->aces + acl->naces;
- while (pa->e_tag == ACL_USER) {
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id);
- if (error < 0)
- goto out;
+ /* We could deny everything not granted by the owner: */
+ deny = ~pas.owner;
+ /*
+ * but it is equivalent (and simpler) to deny only what is not
+ * granted by later entries:
+ */
+ deny &= pas.users | pas.group | pas.groups | pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_OWNER;
+ ace++;
+ acl->naces++;
+ }
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER);
+ ace->whotype = NFS4_ACL_WHO_OWNER;
+ ace++;
+ acl->naces++;
+ pa++;
- error = nfs4_acl_add_pair(acl, eflag, mask,
- NFS4_ACL_WHO_NAMED, pa->e_id, flags);
- if (error < 0)
- goto out;
+ while (pa->e_tag == ACL_USER) {
+ deny = ~(pa->e_perm & pas.mask);
+ deny &= pas.groups | pas.group | pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ }
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
+ flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
pa++;
}
@@ -236,67 +288,65 @@ _posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
/* allow ACEs */
- if (pacl->a_count > 3) {
- BUG_ON(pa->e_tag != ACL_GROUP_OBJ);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
- NFS4_ACL_WHO_GROUP, 0);
- if (error < 0)
- goto out;
- }
group_owner_entry = pa;
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
- NFS4_ACL_WHO_GROUP, 0);
- if (error < 0)
- goto out;
+
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pas.group, flags);
+ ace->whotype = NFS4_ACL_WHO_GROUP;
+ ace++;
+ acl->naces++;
pa++;
while (pa->e_tag == ACL_GROUP) {
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
- NFS4_ACL_WHO_NAMED, pa->e_id);
- if (error < 0)
- goto out;
-
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
- NFS4_ACL_WHO_NAMED, pa->e_id);
- if (error < 0)
- goto out;
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+ ace->access_mask = mask_from_posix(pa->e_perm & pas.mask,
+ flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
pa++;
}
/* deny ACEs */
pa = group_owner_entry;
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag,
- deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0);
- if (error < 0)
- goto out;
+
+ deny = ~pas.group & pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+ ace->access_mask = deny_mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_GROUP;
+ ace++;
+ acl->naces++;
+ }
pa++;
+
while (pa->e_tag == ACL_GROUP) {
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
- NFS4_ACE_IDENTIFIER_GROUP | eflag,
- deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id);
- if (error < 0)
- goto out;
+ deny = ~(pa->e_perm & pas.mask);
+ deny &= pas.other;
+ if (deny) {
+ ace->type = NFS4_ACE_ACCESS_DENIED_ACE_TYPE;
+ ace->flag = eflag | NFS4_ACE_IDENTIFIER_GROUP;
+ ace->access_mask = mask_from_posix(deny, flags);
+ ace->whotype = NFS4_ACL_WHO_NAMED;
+ ace->who = pa->e_id;
+ ace++;
+ acl->naces++;
+ }
pa++;
}
if (pa->e_tag == ACL_MASK)
pa++;
- BUG_ON(pa->e_tag != ACL_OTHER);
- mask = mask_from_posix(pa->e_perm, flags);
- error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags);
-
-out:
- return error;
+ ace->type = NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE;
+ ace->flag = eflag;
+ ace->access_mask = mask_from_posix(pa->e_perm, flags);
+ ace->whotype = NFS4_ACL_WHO_EVERYONE;
+ acl->naces++;
}
static void
@@ -342,46 +392,6 @@ sort_pacl(struct posix_acl *pacl)
return;
}
-int
-nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
- struct posix_acl **dpacl, unsigned int flags)
-{
- struct nfs4_acl *dacl;
- int error = -ENOMEM;
-
- *pacl = NULL;
- *dpacl = NULL;
-
- dacl = nfs4_acl_new();
- if (dacl == NULL)
- goto out;
-
- error = nfs4_acl_split(acl, dacl);
- if (error)
- goto out_acl;
-
- *pacl = _nfsv4_to_posix_one(acl, flags);
- if (IS_ERR(*pacl)) {
- error = PTR_ERR(*pacl);
- *pacl = NULL;
- goto out_acl;
- }
-
- *dpacl = _nfsv4_to_posix_one(dacl, flags);
- if (IS_ERR(*dpacl)) {
- error = PTR_ERR(*dpacl);
- *dpacl = NULL;
- }
-out_acl:
- if (error) {
- posix_acl_release(*pacl);
- *pacl = NULL;
- }
- nfs4_acl_free(dacl);
-out:
- return error;
-}
-
/*
* While processing the NFSv4 ACE, this maintains bitmasks representing
* which permission bits have been allowed and which denied to a given
@@ -406,6 +416,7 @@ struct posix_ace_state_array {
* calculated so far: */
struct posix_acl_state {
+ int empty;
struct posix_ace_state owner;
struct posix_ace_state group;
struct posix_ace_state other;
@@ -421,6 +432,7 @@ init_state(struct posix_acl_state *state, int cnt)
int alloc;
memset(state, 0, sizeof(struct posix_acl_state));
+ state->empty = 1;
/*
* In the worst case, each individual acl could be for a distinct
* named user or group, but we don't no which, so we allocate
@@ -488,6 +500,20 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
int nace;
int i, error = 0;
+ /*
+ * ACLs with no ACEs are treated differently in the inheritable
+ * and effective cases: when there are no inheritable ACEs, we
+ * set a zero-length default posix acl:
+ */
+ if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) {
+ pacl = posix_acl_alloc(0, GFP_KERNEL);
+ return pacl ? pacl : ERR_PTR(-ENOMEM);
+ }
+ /*
+ * When there are no effective ACEs, the following will end
+ * up setting a 3-element effective posix ACL with all
+ * permissions zero.
+ */
nace = 4 + state->users->n + state->groups->n;
pacl = posix_acl_alloc(nace, GFP_KERNEL);
if (!pacl)
@@ -603,6 +629,8 @@ static void process_one_v4_ace(struct posix_acl_state *state,
u32 mask = ace->access_mask;
int i;
+ state->empty = 0;
+
switch (ace2type(ace)) {
case ACL_USER_OBJ:
if (ace->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) {
@@ -666,75 +694,62 @@ static void process_one_v4_ace(struct posix_acl_state *state,
}
}
-static struct posix_acl *
-_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
+int nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
+ struct posix_acl **dpacl, unsigned int flags)
{
- struct posix_acl_state state;
- struct posix_acl *pacl;
+ struct posix_acl_state effective_acl_state, default_acl_state;
struct nfs4_ace *ace;
int ret;
- ret = init_state(&state, n4acl->naces);
+ ret = init_state(&effective_acl_state, acl->naces);
if (ret)
- return ERR_PTR(ret);
-
- list_for_each_entry(ace, &n4acl->ace_head, l_ace)
- process_one_v4_ace(&state, ace);
-
- pacl = posix_state_to_acl(&state, flags);
-
- free_state(&state);
-
- if (!IS_ERR(pacl))
- sort_pacl(pacl);
- return pacl;
-}
-
-static int
-nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
-{
- struct list_head *h, *n;
- struct nfs4_ace *ace;
- int error = 0;
-
- list_for_each_safe(h, n, &acl->ace_head) {
- ace = list_entry(h, struct nfs4_ace, l_ace);
-
+ return ret;
+ ret = init_state(&default_acl_state, acl->naces);
+ if (ret)
+ goto out_estate;
+ ret = -EINVAL;
+ for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
- return -EINVAL;
-
+ goto out_dstate;
if (ace->flag & ~NFS4_SUPPORTED_FLAGS)
- return -EINVAL;
-
- switch (ace->flag & NFS4_INHERITANCE_FLAGS) {
- case 0:
- /* Leave this ace in the effective acl: */
+ goto out_dstate;
+ if ((ace->flag & NFS4_INHERITANCE_FLAGS) == 0) {
+ process_one_v4_ace(&effective_acl_state, ace);
continue;
- case NFS4_INHERITANCE_FLAGS:
- /* Add this ace to the default acl and remove it
- * from the effective acl: */
- error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who);
- if (error)
- return error;
- list_del(h);
- kfree(ace);
- acl->naces--;
- break;
- case NFS4_INHERITANCE_FLAGS & ~NFS4_ACE_INHERIT_ONLY_ACE:
- /* Add this ace to the default, but leave it in
- * the effective acl as well: */
- error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
- ace->access_mask, ace->whotype, ace->who);
- if (error)
- return error;
- break;
- default:
- return -EINVAL;
}
+ if (!(flags & NFS4_ACL_DIR))
+ goto out_dstate;
+ /*
+ * Note that when only one of FILE_INHERIT or DIRECTORY_INHERIT
+ * is set, we're effectively turning on the other. That's OK,
+ * according to rfc 3530.
+ */
+ process_one_v4_ace(&default_acl_state, ace);
+
+ if (!(ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
+ process_one_v4_ace(&effective_acl_state, ace);
}
- return 0;
+ *pacl = posix_state_to_acl(&effective_acl_state, flags);
+ if (IS_ERR(*pacl)) {
+ ret = PTR_ERR(*pacl);
+ goto out_dstate;
+ }
+ *dpacl = posix_state_to_acl(&default_acl_state,
+ flags | NFS4_ACL_TYPE_DEFAULT);
+ if (IS_ERR(*dpacl)) {
+ ret = PTR_ERR(*dpacl);
+ posix_acl_release(*pacl);
+ goto out_dstate;
+ }
+ sort_pacl(*pacl);
+ sort_pacl(*dpacl);
+ ret = 0;
+out_dstate:
+ free_state(&default_acl_state);
+out_estate:
+ free_state(&effective_acl_state);
+ return ret;
}
static short
@@ -759,48 +774,22 @@ EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4);
EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix);
struct nfs4_acl *
-nfs4_acl_new(void)
+nfs4_acl_new(int n)
{
struct nfs4_acl *acl;
- if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL)
+ acl = kmalloc(sizeof(*acl) + n*sizeof(struct nfs4_ace), GFP_KERNEL);
+ if (acl == NULL)
return NULL;
-
acl->naces = 0;
- INIT_LIST_HEAD(&acl->ace_head);
-
return acl;
}
void
-nfs4_acl_free(struct nfs4_acl *acl)
-{
- struct list_head *h;
- struct nfs4_ace *ace;
-
- if (!acl)
- return;
-
- while (!list_empty(&acl->ace_head)) {
- h = acl->ace_head.next;
- list_del(h);
- ace = list_entry(h, struct nfs4_ace, l_ace);
- kfree(ace);
- }
-
- kfree(acl);
-
- return;
-}
-
-int
nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
int whotype, uid_t who)
{
- struct nfs4_ace *ace;
-
- if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
- return -ENOMEM;
+ struct nfs4_ace *ace = acl->aces + acl->naces;
ace->type = type;
ace->flag = flag;
@@ -808,10 +797,7 @@ nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
ace->whotype = whotype;
ace->who = who;
- list_add_tail(&ace->l_ace, &acl->ace_head);
acl->naces++;
-
- return 0;
}
static struct {
@@ -865,7 +851,6 @@ nfs4_acl_write_who(int who, char *p)
}
EXPORT_SYMBOL(nfs4_acl_new);
-EXPORT_SYMBOL(nfs4_acl_free);
EXPORT_SYMBOL(nfs4_acl_add_ace);
EXPORT_SYMBOL(nfs4_acl_get_whotype);
EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f57655a7a2b6..fb14d68eacab 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -387,7 +387,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.address = (struct sockaddr *)&addr,
.addrsize = sizeof(addr),
.timeout = &timeparms,
- .servername = clp->cl_name.data,
.program = program,
.version = nfs_cb_version[1]->number,
.authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */
@@ -397,6 +396,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
.rpc_argp = clp,
};
+ char clientname[16];
int status;
if (atomic_read(&cb->cb_set))
@@ -419,6 +419,11 @@ nfsd4_probe_callback(struct nfs4_client *clp)
memset(program->stats, 0, sizeof(cb->cb_stat));
program->stats->program = program;
+ /* Just here to make some printk's more useful: */
+ snprintf(clientname, sizeof(clientname),
+ "%u.%u.%u.%u", NIPQUAD(addr.sin_addr));
+ args.servername = clientname;
+
/* Create RPC client */
cb->cb_client = rpc_create(&args);
if (IS_ERR(cb->cb_client)) {
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index b1902ebaab41..e4a83d727afd 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -50,7 +50,6 @@
#include <linux/sunrpc/cache.h>
#include <linux/nfsd_idmap.h>
#include <linux/list.h>
-#include <linux/sched.h>
#include <linux/time.h>
#include <linux/seq_file.h>
#include <linux/sunrpc/svcauth.h>
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9de89df961f4..9e4067999209 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -714,7 +714,7 @@ __be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
{
- __be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+ struct sockaddr_in *sin = svc_addr_in(rqstp);
struct xdr_netobj clname = {
.len = setclid->se_namelen,
.data = setclid->se_name,
@@ -749,7 +749,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
*/
status = nfserr_clid_inuse;
if (!cmp_creds(&conf->cl_cred, &rqstp->rq_cred)
- || conf->cl_addr != ip_addr) {
+ || conf->cl_addr != sin->sin_addr.s_addr) {
printk("NFSD: setclientid: string in use by client"
"(clientid %08x/%08x)\n",
conf->cl_clientid.cl_boot, conf->cl_clientid.cl_id);
@@ -769,7 +769,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (new == NULL)
goto out;
copy_verf(new, &clverifier);
- new->cl_addr = ip_addr;
+ new->cl_addr = sin->sin_addr.s_addr;
copy_cred(&new->cl_cred,&rqstp->rq_cred);
gen_clid(new);
gen_confirm(new);
@@ -801,7 +801,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (new == NULL)
goto out;
copy_verf(new,&conf->cl_verifier);
- new->cl_addr = ip_addr;
+ new->cl_addr = sin->sin_addr.s_addr;
copy_cred(&new->cl_cred,&rqstp->rq_cred);
copy_clid(new, conf);
gen_confirm(new);
@@ -820,7 +820,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (new == NULL)
goto out;
copy_verf(new,&clverifier);
- new->cl_addr = ip_addr;
+ new->cl_addr = sin->sin_addr.s_addr;
copy_cred(&new->cl_cred,&rqstp->rq_cred);
gen_clid(new);
gen_confirm(new);
@@ -847,7 +847,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (new == NULL)
goto out;
copy_verf(new,&clverifier);
- new->cl_addr = ip_addr;
+ new->cl_addr = sin->sin_addr.s_addr;
copy_cred(&new->cl_cred,&rqstp->rq_cred);
gen_clid(new);
gen_confirm(new);
@@ -881,7 +881,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid_confirm *setclientid_confirm)
{
- __be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+ struct sockaddr_in *sin = svc_addr_in(rqstp);
struct nfs4_client *conf, *unconf;
nfs4_verifier confirm = setclientid_confirm->sc_confirm;
clientid_t * clid = &setclientid_confirm->sc_clientid;
@@ -900,9 +900,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
unconf = find_unconfirmed_client(clid);
status = nfserr_clid_inuse;
- if (conf && conf->cl_addr != ip_addr)
+ if (conf && conf->cl_addr != sin->sin_addr.s_addr)
goto out;
- if (unconf && unconf->cl_addr != ip_addr)
+ if (unconf && unconf->cl_addr != sin->sin_addr.s_addr)
goto out;
if ((conf && unconf) &&
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 18aa9440df14..5d090f11f2be 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -199,24 +199,22 @@ defer_free(struct nfsd4_compoundargs *argp,
static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
{
- void *new = NULL;
if (p == argp->tmp) {
- new = kmalloc(nbytes, GFP_KERNEL);
- if (!new) return NULL;
- p = new;
+ p = kmalloc(nbytes, GFP_KERNEL);
+ if (!p)
+ return NULL;
memcpy(p, argp->tmp, nbytes);
} else {
BUG_ON(p != argp->tmpp);
argp->tmpp = NULL;
}
if (defer_free(argp, kfree, p)) {
- kfree(new);
+ kfree(p);
return NULL;
} else
return (char *)p;
}
-
static __be32
nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
{
@@ -255,7 +253,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
return status;
/*
- * According to spec, unsupported attributes return ERR_NOTSUPP;
+ * According to spec, unsupported attributes return ERR_ATTRNOTSUPP;
* read-only attributes return ERR_INVAL.
*/
if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
@@ -273,42 +271,42 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
iattr->ia_valid |= ATTR_SIZE;
}
if (bmval[0] & FATTR4_WORD0_ACL) {
- int nace, i;
- struct nfs4_ace ace;
+ int nace;
+ struct nfs4_ace *ace;
READ_BUF(4); len += 4;
READ32(nace);
- *acl = nfs4_acl_new();
+ if (nace > NFS4_ACL_MAX)
+ return nfserr_resource;
+
+ *acl = nfs4_acl_new(nace);
if (*acl == NULL) {
host_err = -ENOMEM;
goto out_nfserr;
}
- defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl);
+ defer_free(argp, kfree, *acl);
- for (i = 0; i < nace; i++) {
+ (*acl)->naces = nace;
+ for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
READ_BUF(16); len += 16;
- READ32(ace.type);
- READ32(ace.flag);
- READ32(ace.access_mask);
+ READ32(ace->type);
+ READ32(ace->flag);
+ READ32(ace->access_mask);
READ32(dummy32);
READ_BUF(dummy32);
len += XDR_QUADLEN(dummy32) << 2;
READMEM(buf, dummy32);
- ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
+ ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
host_err = 0;
- if (ace.whotype != NFS4_ACL_WHO_NAMED)
- ace.who = 0;
- else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP)
+ if (ace->whotype != NFS4_ACL_WHO_NAMED)
+ ace->who = 0;
+ else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
host_err = nfsd_map_name_to_gid(argp->rqstp,
- buf, dummy32, &ace.who);
+ buf, dummy32, &ace->who);
else
host_err = nfsd_map_name_to_uid(argp->rqstp,
- buf, dummy32, &ace.who);
- if (host_err)
- goto out_nfserr;
- host_err = nfs4_acl_add_ace(*acl, ace.type, ace.flag,
- ace.access_mask, ace.whotype, ace.who);
+ buf, dummy32, &ace->who);
if (host_err)
goto out_nfserr;
}
@@ -1563,14 +1561,20 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (exp->ex_fslocs.migrated) {
WRITE64(NFS4_REFERRAL_FSID_MAJOR);
WRITE64(NFS4_REFERRAL_FSID_MINOR);
- } else if (is_fsid(fhp, rqstp->rq_reffh)) {
+ } else switch(fsid_source(fhp)) {
+ case FSIDSOURCE_FSID:
WRITE64((u64)exp->ex_fsid);
WRITE64((u64)0);
- } else {
+ break;
+ case FSIDSOURCE_DEV:
WRITE32(0);
WRITE32(MAJOR(stat.dev));
WRITE32(0);
WRITE32(MINOR(stat.dev));
+ break;
+ case FSIDSOURCE_UUID:
+ WRITEMEM(exp->ex_uuid, 16);
+ break;
}
}
if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
@@ -1590,7 +1594,6 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
}
if (bmval0 & FATTR4_WORD0_ACL) {
struct nfs4_ace *ace;
- struct list_head *h;
if (acl == NULL) {
if ((buflen -= 4) < 0)
@@ -1603,9 +1606,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
goto out_resource;
WRITE32(acl->naces);
- list_for_each(h, &acl->ace_head) {
- ace = list_entry(h, struct nfs4_ace, l_ace);
-
+ for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
if ((buflen -= 4*3) < 0)
goto out_resource;
WRITE32(ace->type);
@@ -1815,7 +1816,7 @@ out_acl:
status = nfs_ok;
out:
- nfs4_acl_free(acl);
+ kfree(acl);
if (fhp == &tempfh)
fh_put(&tempfh);
return status;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index f90d70475854..578f2c9d56be 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -185,7 +185,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
rp->c_state = RC_INPROG;
rp->c_xid = xid;
rp->c_proc = proc;
- rp->c_addr = rqstp->rq_addr;
+ memcpy(&rp->c_addr, svc_addr_in(rqstp), sizeof(rp->c_addr));
rp->c_prot = proto;
rp->c_vers = vers;
rp->c_timestamp = jiffies;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index eedf2e3990a9..71c686dc7257 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -123,7 +123,7 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu
return PTR_ERR(data);
rv = write_op[ino](file, data, size);
- if (rv>0) {
+ if (rv >= 0) {
simple_transaction_set(file, rv);
rv = size;
}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c59d6fbb7a6b..c2660cbfcd96 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -9,7 +9,6 @@
* ... and again Southern-Winter 2001 to support export_operations
*/
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
#include <linux/fs.h>
@@ -20,6 +19,7 @@
#include <linux/mount.h>
#include <asm/pgtable.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
@@ -118,9 +118,6 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
- /* keep this filehandle for possible reference when encoding attributes */
- rqstp->rq_reffh = fh;
-
if (!fhp->fh_dentry) {
__u32 *datap=NULL;
__u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */
@@ -145,10 +142,10 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
}
len = key_len(fh->fh_fsid_type) / 4;
if (len == 0) goto out;
- if (fh->fh_fsid_type == 2) {
+ if (fh->fh_fsid_type == FSID_MAJOR_MINOR) {
/* deprecated, convert to type 3 */
- len = 3;
- fh->fh_fsid_type = 3;
+ len = key_len(FSID_ENCODE_DEV)/4;
+ fh->fh_fsid_type = FSID_ENCODE_DEV;
fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
fh->fh_fsid[1] = fh->fh_fsid[2];
}
@@ -163,8 +160,9 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
/* assume old filehandle format */
xdev = old_decode_dev(fh->ofh_xdev);
xino = u32_to_ino_t(fh->ofh_xino);
- mk_fsid_v0(tfh, xdev, xino);
- exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle);
+ mk_fsid(FSID_DEV, tfh, xdev, xino, 0, NULL);
+ exp = exp_find(rqstp->rq_client, FSID_DEV, tfh,
+ &rqstp->rq_chandle);
}
if (IS_ERR(exp) && (PTR_ERR(exp) == -EAGAIN
@@ -180,10 +178,10 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
/* Check if the request originated from a secure port. */
error = nfserr_perm;
if (!rqstp->rq_secure && EX_SECURE(exp)) {
+ char buf[RPC_MAX_ADDRBUFLEN];
printk(KERN_WARNING
- "nfsd: request from insecure port (%u.%u.%u.%u:%d)!\n",
- NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
- ntohs(rqstp->rq_addr.sin_port));
+ "nfsd: request from insecure port %s!\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
goto out;
}
@@ -211,7 +209,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
fileid_type = 2;
} else
fileid_type = fh->fh_fileid_type;
-
+
if (fileid_type == 0)
dentry = dget(exp->ex_dentry);
else {
@@ -291,7 +289,7 @@ static inline int _fh_update(struct dentry *dentry, struct svc_export *exp,
__u32 *datap, int *maxsize)
{
struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op;
-
+
if (dentry == exp->ex_dentry) {
*maxsize = 0;
return 0;
@@ -316,7 +314,8 @@ static inline void _fh_update_old(struct dentry *dentry,
}
__be32
-fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
+fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
+ struct svc_fh *ref_fh)
{
/* ref_fh is a reference file handle.
* if it is non-null and for the same filesystem, then we should compose
@@ -326,12 +325,13 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
*
*/
- u8 ref_fh_version = 0;
- u8 ref_fh_fsid_type = 0;
+ u8 version = 1;
+ u8 fsid_type = 0;
struct inode * inode = dentry->d_inode;
struct dentry *parent = dentry->d_parent;
__u32 *datap;
dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev;
+ int root_export = (exp->ex_dentry == exp->ex_dentry->d_sb->s_root);
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
MAJOR(ex_dev), MINOR(ex_dev),
@@ -339,57 +339,64 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
parent->d_name.name, dentry->d_name.name,
(inode ? inode->i_ino : 0));
+ /* Choose filehandle version and fsid type based on
+ * the reference filehandle (if it is in the same export)
+ * or the export options.
+ */
if (ref_fh && ref_fh->fh_export == exp) {
- ref_fh_version = ref_fh->fh_handle.fh_version;
- if (ref_fh_version == 0xca)
- ref_fh_fsid_type = 0;
+ version = ref_fh->fh_handle.fh_version;
+ if (version == 0xca)
+ fsid_type = FSID_DEV;
else
- ref_fh_fsid_type = ref_fh->fh_handle.fh_fsid_type;
- if (ref_fh_fsid_type > 3)
- ref_fh_fsid_type = 0;
-
- /* make sure ref_fh type works for given export */
- if (ref_fh_fsid_type == 1 &&
- !(exp->ex_flags & NFSEXP_FSID)) {
- /* if we don't have an fsid, we cannot provide one... */
- ref_fh_fsid_type = 0;
+ fsid_type = ref_fh->fh_handle.fh_fsid_type;
+ /* We know this version/type works for this export
+ * so there is no need for further checks.
+ */
+ } else if (exp->ex_uuid) {
+ if (fhp->fh_maxsize >= 64) {
+ if (root_export)
+ fsid_type = FSID_UUID16;
+ else
+ fsid_type = FSID_UUID16_INUM;
+ } else {
+ if (root_export)
+ fsid_type = FSID_UUID8;
+ else
+ fsid_type = FSID_UUID4_INUM;
}
} else if (exp->ex_flags & NFSEXP_FSID)
- ref_fh_fsid_type = 1;
-
- if (!old_valid_dev(ex_dev) && ref_fh_fsid_type == 0) {
+ fsid_type = FSID_NUM;
+ else if (!old_valid_dev(ex_dev))
/* for newer device numbers, we must use a newer fsid format */
- ref_fh_version = 1;
- ref_fh_fsid_type = 3;
- }
- if (old_valid_dev(ex_dev) &&
- (ref_fh_fsid_type == 2 || ref_fh_fsid_type == 3))
- /* must use type1 for smaller device numbers */
- ref_fh_fsid_type = 0;
+ fsid_type = FSID_ENCODE_DEV;
+ else
+ fsid_type = FSID_DEV;
if (ref_fh == fhp)
fh_put(ref_fh);
if (fhp->fh_locked || fhp->fh_dentry) {
printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
- parent->d_name.name, dentry->d_name.name);
+ parent->d_name.name, dentry->d_name.name);
}
if (fhp->fh_maxsize < NFS_FHSIZE)
printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
- fhp->fh_maxsize, parent->d_name.name, dentry->d_name.name);
+ fhp->fh_maxsize,
+ parent->d_name.name, dentry->d_name.name);
fhp->fh_dentry = dget(dentry); /* our internal copy */
fhp->fh_export = exp;
cache_get(&exp->h);
- if (ref_fh_version == 0xca) {
+ if (version == 0xca) {
/* old style filehandle please */
memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
fhp->fh_handle.fh_size = NFS_FHSIZE;
fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
- fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_dentry->d_inode->i_ino);
+ fhp->fh_handle.ofh_xino =
+ ino_t_to_u32(exp->ex_dentry->d_inode->i_ino);
fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
if (inode)
_fh_update_old(dentry, exp, &fhp->fh_handle);
@@ -398,38 +405,12 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
fhp->fh_handle.fh_version = 1;
fhp->fh_handle.fh_auth_type = 0;
datap = fhp->fh_handle.fh_auth+0;
- fhp->fh_handle.fh_fsid_type = ref_fh_fsid_type;
- switch (ref_fh_fsid_type) {
- case 0:
- /*
- * fsid_type 0:
- * 2byte major, 2byte minor, 4byte inode
- */
- mk_fsid_v0(datap, ex_dev,
- exp->ex_dentry->d_inode->i_ino);
- break;
- case 1:
- /* fsid_type 1 == 4 bytes filesystem id */
- mk_fsid_v1(datap, exp->ex_fsid);
- break;
- case 2:
- /*
- * fsid_type 2:
- * 4byte major, 4byte minor, 4byte inode
- */
- mk_fsid_v2(datap, ex_dev,
- exp->ex_dentry->d_inode->i_ino);
- break;
- case 3:
- /*
- * fsid_type 3:
- * 4byte devicenumber, 4byte inode
- */
- mk_fsid_v3(datap, ex_dev,
- exp->ex_dentry->d_inode->i_ino);
- break;
- }
- len = key_len(ref_fh_fsid_type);
+ fhp->fh_handle.fh_fsid_type = fsid_type;
+ mk_fsid(fsid_type, datap, ex_dev,
+ exp->ex_dentry->d_inode->i_ino,
+ exp->ex_fsid, exp->ex_uuid);
+
+ len = key_len(fsid_type);
datap += len/4;
fhp->fh_handle.fh_size = 4 + len;
@@ -456,7 +437,7 @@ fh_update(struct svc_fh *fhp)
{
struct dentry *dentry;
__u32 *datap;
-
+
if (!fhp->fh_dentry)
goto out_bad;
@@ -533,3 +514,22 @@ char * SVCFH_fmt(struct svc_fh *fhp)
fh->fh_base.fh_pad[5]);
return buf;
}
+
+enum fsid_source fsid_source(struct svc_fh *fhp)
+{
+ if (fhp->fh_handle.fh_version != 1)
+ return FSIDSOURCE_DEV;
+ switch(fhp->fh_handle.fh_fsid_type) {
+ case FSID_DEV:
+ case FSID_ENCODE_DEV:
+ case FSID_MAJOR_MINOR:
+ return FSIDSOURCE_DEV;
+ case FSID_NUM:
+ return FSIDSOURCE_FSID;
+ default:
+ if (fhp->fh_export->ex_flags & NFSEXP_FSID)
+ return FSIDSOURCE_FSID;
+ else
+ return FSIDSOURCE_UUID;
+ }
+}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index ec983b777680..5cc2eec981b8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -19,6 +19,7 @@
#include <linux/unistd.h>
#include <linux/slab.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/svc.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/cache.h>
@@ -147,10 +148,10 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
*/
if (NFSSVC_MAXBLKSIZE_V2 < argp->count) {
+ char buf[RPC_MAX_ADDRBUFLEN];
printk(KERN_NOTICE
- "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
- NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
- ntohs(rqstp->rq_addr.sin_port),
+ "oversized read request from %s (%d bytes)\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)),
argp->count);
argp->count = NFSSVC_MAXBLKSIZE_V2;
}
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index fbf5d51947ea..d7759ce6ed94 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -235,7 +235,8 @@ static int nfsd_init_socks(int port)
error = lockd_up(IPPROTO_UDP);
if (error >= 0) {
- error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+ error = svc_makesock(nfsd_serv, IPPROTO_UDP, port,
+ SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();
}
@@ -245,7 +246,8 @@ static int nfsd_init_socks(int port)
#ifdef CONFIG_NFSD_TCP
error = lockd_up(IPPROTO_TCP);
if (error >= 0) {
- error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port,
+ SVC_SOCK_DEFAULTS);
if (error < 0)
lockd_down();
}
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 6555c50d9006..0c24b9e24fe8 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -153,6 +153,7 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
struct dentry *dentry = fhp->fh_dentry;
int type;
struct timespec time;
+ u32 f;
type = (stat->mode & S_IFMT);
@@ -173,10 +174,22 @@ encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
else
*p++ = htonl(0xffffffff);
*p++ = htonl((u32) stat->blocks);
- if (is_fsid(fhp, rqstp->rq_reffh))
- *p++ = htonl((u32) fhp->fh_export->ex_fsid);
- else
+ switch (fsid_source(fhp)) {
+ default:
+ case FSIDSOURCE_DEV:
*p++ = htonl(new_encode_dev(stat->dev));
+ break;
+ case FSIDSOURCE_FSID:
+ *p++ = htonl((u32) fhp->fh_export->ex_fsid);
+ break;
+ case FSIDSOURCE_UUID:
+ f = ((u32*)fhp->fh_export->ex_uuid)[0];
+ f ^= ((u32*)fhp->fh_export->ex_uuid)[1];
+ f ^= ((u32*)fhp->fh_export->ex_uuid)[2];
+ f ^= ((u32*)fhp->fh_export->ex_uuid)[3];
+ *p++ = htonl(f);
+ break;
+ }
*p++ = htonl((u32) stat->ino);
*p++ = htonl((u32) stat->atime.tv_sec);
*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8283236c6a0f..7e6aa245b5d5 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -466,7 +466,10 @@ out:
posix_acl_release(dpacl);
return (error);
out_nfserr:
- error = nfserrno(host_error);
+ if (host_error == -EOPNOTSUPP)
+ error = nfserr_attrnotsupp;
+ else
+ error = nfserrno(host_error);
goto out;
}
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index c577d8e1bd95..7659cc192995 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -1921,7 +1921,7 @@ s64 ntfs_attr_extend_allocation(ntfs_inode *ni, s64 new_alloc_size,
u32 attr_len = 0; /* Silence stupid gcc warning. */
bool mp_rebuilt;
-#ifdef NTFS_DEBUG
+#ifdef DEBUG
read_lock_irqsave(&ni->size_lock, flags);
allocated_size = ni->allocated_size;
read_unlock_irqrestore(&ni->size_lock, flags);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 076c9420c257..d69c4595ccd0 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2328,7 +2328,7 @@ const struct file_operations ntfs_file_ops = {
the data source. */
};
-struct inode_operations ntfs_file_inode_ops = {
+const struct inode_operations ntfs_file_inode_ops = {
#ifdef NTFS_RW
.truncate = ntfs_truncate_vfs,
.setattr = ntfs_setattr,
@@ -2337,4 +2337,4 @@ struct inode_operations ntfs_file_inode_ops = {
const struct file_operations ntfs_empty_file_ops = {};
-struct inode_operations ntfs_empty_inode_ops = {};
+const struct inode_operations ntfs_empty_inode_ops = {};
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index eddb2247cec5..bff01a54675a 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -359,7 +359,7 @@ err_out:
/**
* Inode operations for directories.
*/
-struct inode_operations ntfs_dir_inode_ops = {
+const struct inode_operations ntfs_dir_inode_ops = {
.lookup = ntfs_lookup, /* VFS: Lookup directory. */
};
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index a12847ae467d..d73f5a9ac341 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -61,13 +61,13 @@ extern const struct address_space_operations ntfs_aops;
extern const struct address_space_operations ntfs_mst_aops;
extern const struct file_operations ntfs_file_ops;
-extern struct inode_operations ntfs_file_inode_ops;
+extern const struct inode_operations ntfs_file_inode_ops;
extern const struct file_operations ntfs_dir_ops;
-extern struct inode_operations ntfs_dir_inode_ops;
+extern const struct inode_operations ntfs_dir_inode_ops;
extern const struct file_operations ntfs_empty_file_ops;
-extern struct inode_operations ntfs_empty_inode_ops;
+extern const struct inode_operations ntfs_empty_inode_ops;
extern struct export_operations ntfs_export_ops;
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index babf94d90def..1594c90b7164 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2699,7 +2699,7 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
/**
* The complete super operations.
*/
-static struct super_operations ntfs_sops = {
+static const struct super_operations ntfs_sops = {
.alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
.destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
#ifdef NTFS_RW
diff --git a/fs/ntfs/sysctl.c b/fs/ntfs/sysctl.c
index 1c23138d00b3..4847fbfb0107 100644
--- a/fs/ntfs/sysctl.c
+++ b/fs/ntfs/sysctl.c
@@ -33,20 +33,28 @@
#include "sysctl.h"
#include "debug.h"
-#define FS_NTFS 1
-
/* Definition of the ntfs sysctl. */
static ctl_table ntfs_sysctls[] = {
- { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */
- &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */
- 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */
- { 0 }
+ {
+ .ctl_name = CTL_UNNUMBERED, /* Binary and text IDs. */
+ .procname = "ntfs-debug",
+ .data = &debug_msgs, /* Data pointer and size. */
+ .maxlen = sizeof(debug_msgs),
+ .mode = 0644, /* Mode, proc handler. */
+ .proc_handler = &proc_dointvec
+ },
+ {}
};
/* Define the parent directory /proc/sys/fs. */
static ctl_table sysctls_root[] = {
- { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls },
- { 0 }
+ {
+ .ctl_name = CTL_FS,
+ .procname = "fs",
+ .mode = 0555,
+ .child = ntfs_sysctls
+ },
+ {}
};
/* Storage for the sysctls header. */
@@ -62,17 +70,9 @@ int ntfs_sysctl(int add)
{
if (add) {
BUG_ON(sysctls_root_table);
- sysctls_root_table = register_sysctl_table(sysctls_root, 0);
+ sysctls_root_table = register_sysctl_table(sysctls_root);
if (!sysctls_root_table)
return -ENOMEM;
-#ifdef CONFIG_PROC_FS
- /*
- * If the proc filesystem is in use and we are a module, need
- * to set the owner of our proc entry to our module. In the
- * non-modular case, THIS_MODULE is NULL, so this is ok.
- */
- ntfs_sysctls[0].de->owner = THIS_MODULE;
-#endif
} else {
BUG_ON(!sysctls_root_table);
unregister_sysctl_table(sysctls_root_table);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 277ca67a2ad6..5a9779bb9236 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -184,10 +184,9 @@ static void o2hb_disarm_write_timeout(struct o2hb_region *reg)
flush_scheduled_work();
}
-static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
- unsigned int num_ios)
+static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc)
{
- atomic_set(&wc->wc_num_reqs, num_ios);
+ atomic_set(&wc->wc_num_reqs, 1);
init_completion(&wc->wc_io_complete);
wc->wc_error = 0;
}
@@ -212,6 +211,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg,
struct address_space *mapping = reg->hr_bdev->bd_inode->i_mapping;
blk_run_address_space(mapping);
+ o2hb_bio_wait_dec(wc, 1);
wait_for_completion(&wc->wc_io_complete);
}
@@ -231,6 +231,7 @@ static int o2hb_bio_end_io(struct bio *bio,
return 1;
o2hb_bio_wait_dec(wc, 1);
+ bio_put(bio);
return 0;
}
@@ -238,23 +239,22 @@ static int o2hb_bio_end_io(struct bio *bio,
* start_slot. */
static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
struct o2hb_bio_wait_ctxt *wc,
- unsigned int start_slot,
- unsigned int num_slots)
+ unsigned int *current_slot,
+ unsigned int max_slots)
{
- int i, nr_vecs, len, first_page, last_page;
+ int len, current_page;
unsigned int vec_len, vec_start;
unsigned int bits = reg->hr_block_bits;
unsigned int spp = reg->hr_slots_per_page;
+ unsigned int cs = *current_slot;
struct bio *bio;
struct page *page;
- nr_vecs = (num_slots + spp - 1) / spp;
-
/* Testing has shown this allocation to take long enough under
* GFP_KERNEL that the local node can get fenced. It would be
* nicest if we could pre-allocate these bios and avoid this
* all together. */
- bio = bio_alloc(GFP_ATOMIC, nr_vecs);
+ bio = bio_alloc(GFP_ATOMIC, 16);
if (!bio) {
mlog(ML_ERROR, "Could not alloc slots BIO!\n");
bio = ERR_PTR(-ENOMEM);
@@ -262,137 +262,53 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
}
/* Must put everything in 512 byte sectors for the bio... */
- bio->bi_sector = (reg->hr_start_block + start_slot) << (bits - 9);
+ bio->bi_sector = (reg->hr_start_block + cs) << (bits - 9);
bio->bi_bdev = reg->hr_bdev;
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
- first_page = start_slot / spp;
- last_page = first_page + nr_vecs;
- vec_start = (start_slot << bits) % PAGE_CACHE_SIZE;
- for(i = first_page; i < last_page; i++) {
- page = reg->hr_slot_data[i];
+ vec_start = (cs << bits) % PAGE_CACHE_SIZE;
+ while(cs < max_slots) {
+ current_page = cs / spp;
+ page = reg->hr_slot_data[current_page];
- vec_len = PAGE_CACHE_SIZE;
- /* last page might be short */
- if (((i + 1) * spp) > (start_slot + num_slots))
- vec_len = ((num_slots + start_slot) % spp) << bits;
- vec_len -= vec_start;
+ vec_len = min(PAGE_CACHE_SIZE,
+ (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
- i, vec_len, vec_start);
+ current_page, vec_len, vec_start);
len = bio_add_page(bio, page, vec_len, vec_start);
- if (len != vec_len) {
- bio_put(bio);
- bio = ERR_PTR(-EIO);
-
- mlog(ML_ERROR, "Error adding page to bio i = %d, "
- "vec_len = %u, len = %d\n, start = %u\n",
- i, vec_len, len, vec_start);
- goto bail;
- }
+ if (len != vec_len) break;
+ cs += vec_len / (PAGE_CACHE_SIZE/spp);
vec_start = 0;
}
bail:
+ *current_slot = cs;
return bio;
}
-/*
- * Compute the maximum number of sectors the bdev can handle in one bio,
- * as a power of two.
- *
- * Stolen from oracleasm, thanks Joel!
- */
-static int compute_max_sectors(struct block_device *bdev)
-{
- int max_pages, max_sectors, pow_two_sectors;
-
- struct request_queue *q;
-
- q = bdev_get_queue(bdev);
- max_pages = q->max_sectors >> (PAGE_SHIFT - 9);
- if (max_pages > BIO_MAX_PAGES)
- max_pages = BIO_MAX_PAGES;
- if (max_pages > q->max_phys_segments)
- max_pages = q->max_phys_segments;
- if (max_pages > q->max_hw_segments)
- max_pages = q->max_hw_segments;
- max_pages--; /* Handle I/Os that straddle a page */
-
- if (max_pages) {
- max_sectors = max_pages << (PAGE_SHIFT - 9);
- } else {
- /* If BIO contains 1 or less than 1 page. */
- max_sectors = q->max_sectors;
- }
- /* Why is fls() 1-based???? */
- pow_two_sectors = 1 << (fls(max_sectors) - 1);
-
- return pow_two_sectors;
-}
-
-static inline void o2hb_compute_request_limits(struct o2hb_region *reg,
- unsigned int num_slots,
- unsigned int *num_bios,
- unsigned int *slots_per_bio)
-{
- unsigned int max_sectors, io_sectors;
-
- max_sectors = compute_max_sectors(reg->hr_bdev);
-
- io_sectors = num_slots << (reg->hr_block_bits - 9);
-
- *num_bios = (io_sectors + max_sectors - 1) / max_sectors;
- *slots_per_bio = max_sectors >> (reg->hr_block_bits - 9);
-
- mlog(ML_HB_BIO, "My io size is %u sectors for %u slots. This "
- "device can handle %u sectors of I/O\n", io_sectors, num_slots,
- max_sectors);
- mlog(ML_HB_BIO, "Will need %u bios holding %u slots each\n",
- *num_bios, *slots_per_bio);
-}
-
static int o2hb_read_slots(struct o2hb_region *reg,
unsigned int max_slots)
{
- unsigned int num_bios, slots_per_bio, start_slot, num_slots;
- int i, status;
+ unsigned int current_slot=0;
+ int status;
struct o2hb_bio_wait_ctxt wc;
- struct bio **bios;
struct bio *bio;
- o2hb_compute_request_limits(reg, max_slots, &num_bios, &slots_per_bio);
+ o2hb_bio_wait_init(&wc);
- bios = kcalloc(num_bios, sizeof(struct bio *), GFP_KERNEL);
- if (!bios) {
- status = -ENOMEM;
- mlog_errno(status);
- return status;
- }
-
- o2hb_bio_wait_init(&wc, num_bios);
-
- num_slots = slots_per_bio;
- for(i = 0; i < num_bios; i++) {
- start_slot = i * slots_per_bio;
-
- /* adjust num_slots at last bio */
- if (max_slots < (start_slot + num_slots))
- num_slots = max_slots - start_slot;
-
- bio = o2hb_setup_one_bio(reg, &wc, start_slot, num_slots);
+ while(current_slot < max_slots) {
+ bio = o2hb_setup_one_bio(reg, &wc, &current_slot, max_slots);
if (IS_ERR(bio)) {
- o2hb_bio_wait_dec(&wc, num_bios - i);
-
status = PTR_ERR(bio);
mlog_errno(status);
goto bail_and_wait;
}
- bios[i] = bio;
+ atomic_inc(&wc.wc_num_reqs);
submit_bio(READ, bio);
}
@@ -403,38 +319,30 @@ bail_and_wait:
if (wc.wc_error && !status)
status = wc.wc_error;
- if (bios) {
- for(i = 0; i < num_bios; i++)
- if (bios[i])
- bio_put(bios[i]);
- kfree(bios);
- }
-
return status;
}
static int o2hb_issue_node_write(struct o2hb_region *reg,
- struct bio **write_bio,
struct o2hb_bio_wait_ctxt *write_wc)
{
int status;
unsigned int slot;
struct bio *bio;
- o2hb_bio_wait_init(write_wc, 1);
+ o2hb_bio_wait_init(write_wc);
slot = o2nm_this_node();
- bio = o2hb_setup_one_bio(reg, write_wc, slot, 1);
+ bio = o2hb_setup_one_bio(reg, write_wc, &slot, slot+1);
if (IS_ERR(bio)) {
status = PTR_ERR(bio);
mlog_errno(status);
goto bail;
}
+ atomic_inc(&write_wc->wc_num_reqs);
submit_bio(WRITE, bio);
- *write_bio = bio;
status = 0;
bail:
return status;
@@ -826,7 +734,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
int i, ret, highest_node, change = 0;
unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
- struct bio *write_bio;
struct o2hb_bio_wait_ctxt write_wc;
ret = o2nm_configured_node_map(configured_nodes,
@@ -864,7 +771,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
/* And fire off the write. Note that we don't wait on this I/O
* until later. */
- ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
+ ret = o2hb_issue_node_write(reg, &write_wc);
if (ret < 0) {
mlog_errno(ret);
return ret;
@@ -882,7 +789,6 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
* people we find in our steady state have seen us.
*/
o2hb_wait_on_io(reg, &write_wc);
- bio_put(write_bio);
if (write_wc.wc_error) {
/* Do not re-arm the write timeout on I/O error - we
* can't be sure that the new block ever made it to
@@ -943,7 +849,6 @@ static int o2hb_thread(void *data)
{
int i, ret;
struct o2hb_region *reg = data;
- struct bio *write_bio;
struct o2hb_bio_wait_ctxt write_wc;
struct timeval before_hb, after_hb;
unsigned int elapsed_msec;
@@ -993,10 +898,9 @@ static int o2hb_thread(void *data)
*
* XXX: Should we skip this on unclean_stop? */
o2hb_prepare_block(reg, 0);
- ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
+ ret = o2hb_issue_node_write(reg, &write_wc);
if (ret == 0) {
o2hb_wait_on_io(reg, &write_wc);
- bio_put(write_bio);
} else {
mlog_errno(ret);
}
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index b17333a0606b..9f5ad0f01ce0 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -55,7 +55,7 @@ static ctl_table ocfs2_nm_table[] = {
static ctl_table ocfs2_mod_table[] = {
{
- .ctl_name = KERN_OCFS2_NM,
+ .ctl_name = FS_OCFS2_NM,
.procname = "nm",
.data = NULL,
.maxlen = 0,
@@ -67,7 +67,7 @@ static ctl_table ocfs2_mod_table[] = {
static ctl_table ocfs2_kern_table[] = {
{
- .ctl_name = KERN_OCFS2,
+ .ctl_name = FS_OCFS2,
.procname = "ocfs2",
.data = NULL,
.maxlen = 0,
@@ -922,7 +922,7 @@ static int __init init_o2nm(void)
o2hb_init();
o2net_init();
- ocfs2_table_header = register_sysctl_table(ocfs2_root_table, 0);
+ ocfs2_table_header = register_sysctl_table(ocfs2_root_table);
if (!ocfs2_table_header) {
printk(KERN_ERR "nodemanager: unable to register sysctl\n");
ret = -ENOMEM; /* or something. */
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index 8fb23cacc2f5..070522138ae2 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,8 +33,7 @@
#include <linux/configfs.h>
#include <linux/rbtree.h>
-#define KERN_OCFS2 988
-#define KERN_OCFS2_NM 1
+#define FS_OCFS2_NM 1
const char *o2nm_get_hb_ctl_path(void);
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index ae4ff4a6636b..1718215fc018 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -556,6 +556,8 @@ static void o2net_register_callbacks(struct sock *sk,
sk->sk_data_ready = o2net_data_ready;
sk->sk_state_change = o2net_state_change;
+ mutex_init(&sc->sc_send_lock);
+
write_unlock_bh(&sk->sk_callback_lock);
}
@@ -688,6 +690,7 @@ static void o2net_handler_put(struct o2net_msg_handler *nmh)
* be given to the handler if their payload is longer than the max. */
int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
o2net_msg_handler_func *func, void *data,
+ o2net_post_msg_handler_func *post_func,
struct list_head *unreg_list)
{
struct o2net_msg_handler *nmh = NULL;
@@ -722,6 +725,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
nmh->nh_func = func;
nmh->nh_func_data = data;
+ nmh->nh_post_func = post_func;
nmh->nh_msg_type = msg_type;
nmh->nh_max_len = max_len;
nmh->nh_key = key;
@@ -856,10 +860,12 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
ssize_t ret;
+ mutex_lock(&sc->sc_send_lock);
ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
virt_to_page(kmalloced_virt),
(long)kmalloced_virt & ~PAGE_MASK,
size, MSG_DONTWAIT);
+ mutex_unlock(&sc->sc_send_lock);
if (ret != size) {
mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
" failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
@@ -974,8 +980,10 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
/* finally, convert the message header to network byte-order
* and send */
+ mutex_lock(&sc->sc_send_lock);
ret = o2net_send_tcp_msg(sc->sc_sock, vec, veclen,
sizeof(struct o2net_msg) + caller_bytes);
+ mutex_unlock(&sc->sc_send_lock);
msglog(msg, "sending returned %d\n", ret);
if (ret < 0) {
mlog(0, "error returned from o2net_send_tcp_msg=%d\n", ret);
@@ -1049,6 +1057,7 @@ static int o2net_process_message(struct o2net_sock_container *sc,
int ret = 0, handler_status;
enum o2net_system_error syserr;
struct o2net_msg_handler *nmh = NULL;
+ void *ret_data = NULL;
msglog(hdr, "processing message\n");
@@ -1101,17 +1110,26 @@ static int o2net_process_message(struct o2net_sock_container *sc,
sc->sc_msg_type = be16_to_cpu(hdr->msg_type);
handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) +
be16_to_cpu(hdr->data_len),
- nmh->nh_func_data);
+ nmh->nh_func_data, &ret_data);
do_gettimeofday(&sc->sc_tv_func_stop);
out_respond:
/* this destroys the hdr, so don't use it after this */
+ mutex_lock(&sc->sc_send_lock);
ret = o2net_send_status_magic(sc->sc_sock, hdr, syserr,
handler_status);
+ mutex_unlock(&sc->sc_send_lock);
hdr = NULL;
mlog(0, "sending handler status %d, syserr %d returned %d\n",
handler_status, syserr, ret);
+ if (nmh) {
+ BUG_ON(ret_data != NULL && nmh->nh_post_func == NULL);
+ if (nmh->nh_post_func)
+ (nmh->nh_post_func)(handler_status, nmh->nh_func_data,
+ ret_data);
+ }
+
out:
if (nmh)
o2net_handler_put(nmh);
@@ -1795,13 +1813,13 @@ out:
ready(sk, bytes);
}
-static int o2net_open_listening_sock(__be16 port)
+static int o2net_open_listening_sock(__be32 addr, __be16 port)
{
struct socket *sock = NULL;
int ret;
struct sockaddr_in sin = {
.sin_family = PF_INET,
- .sin_addr = { .s_addr = (__force u32)htonl(INADDR_ANY) },
+ .sin_addr = { .s_addr = (__force u32)addr },
.sin_port = (__force u16)port,
};
@@ -1824,15 +1842,15 @@ static int o2net_open_listening_sock(__be16 port)
sock->sk->sk_reuse = 1;
ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin));
if (ret < 0) {
- mlog(ML_ERROR, "unable to bind socket to port %d, ret=%d\n",
- ntohs(port), ret);
+ mlog(ML_ERROR, "unable to bind socket at %u.%u.%u.%u:%u, "
+ "ret=%d\n", NIPQUAD(addr), ntohs(port), ret);
goto out;
}
ret = sock->ops->listen(sock, 64);
if (ret < 0) {
- mlog(ML_ERROR, "unable to listen on port %d, ret=%d\n",
- ntohs(port), ret);
+ mlog(ML_ERROR, "unable to listen on %u.%u.%u.%u:%u, ret=%d\n",
+ NIPQUAD(addr), ntohs(port), ret);
}
out:
@@ -1865,7 +1883,8 @@ int o2net_start_listening(struct o2nm_node *node)
return -ENOMEM; /* ? */
}
- ret = o2net_open_listening_sock(node->nd_ipv4_port);
+ ret = o2net_open_listening_sock(node->nd_ipv4_address,
+ node->nd_ipv4_port);
if (ret) {
destroy_workqueue(o2net_wq);
o2net_wq = NULL;
diff --git a/fs/ocfs2/cluster/tcp.h b/fs/ocfs2/cluster/tcp.h
index 21a4e43df836..da880fc215f0 100644
--- a/fs/ocfs2/cluster/tcp.h
+++ b/fs/ocfs2/cluster/tcp.h
@@ -50,7 +50,10 @@ struct o2net_msg
__u8 buf[0];
};
-typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data);
+typedef int (o2net_msg_handler_func)(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+typedef void (o2net_post_msg_handler_func)(int status, void *data,
+ void *ret_data);
#define O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(struct o2net_msg))
@@ -99,6 +102,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *vec,
int o2net_register_handler(u32 msg_type, u32 key, u32 max_len,
o2net_msg_handler_func *func, void *data,
+ o2net_post_msg_handler_func *post_func,
struct list_head *unreg_list);
void o2net_unregister_handler_list(struct list_head *list);
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index b700dc9624d1..4dae5df5e467 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -38,6 +38,12 @@
* locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
+ * New in version 7:
+ * - DLM join domain includes the live nodemap
+ *
+ * New in version 6:
+ * - DLM lockres remote refcount fixes.
+ *
* New in version 5:
* - Network timeout checking protocol
*
@@ -51,7 +57,7 @@
* - full 64 bit i_size in the metadata lock lvbs
* - introduction of "rw" lock and pushing meta/data locking down
*/
-#define O2NET_PROTOCOL_VERSION 5ULL
+#define O2NET_PROTOCOL_VERSION 7ULL
struct o2net_handshake {
__be64 protocol_version;
__be64 connector_id;
@@ -149,6 +155,8 @@ struct o2net_sock_container {
struct timeval sc_tv_func_stop;
u32 sc_msg_key;
u16 sc_msg_type;
+
+ struct mutex sc_send_lock;
};
struct o2net_msg_handler {
@@ -158,6 +166,8 @@ struct o2net_msg_handler {
u32 nh_key;
o2net_msg_handler_func *nh_func;
o2net_msg_handler_func *nh_func_data;
+ o2net_post_msg_handler_func
+ *nh_post_func;
struct kref nh_kref;
struct list_head nh_unregister_item;
};
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 681046d51393..241cad342a48 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -263,7 +263,8 @@ void dlm_do_local_bast(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
-int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
int ret;
unsigned int locklen;
@@ -311,8 +312,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
past->type != DLM_BAST) {
mlog(ML_ERROR, "Unknown ast type! %d, cookie=%u:%llu"
"name=%.*s\n", past->type,
- dlm_get_lock_cookie_node(cookie),
- dlm_get_lock_cookie_seq(cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name);
ret = DLM_IVLOCKID;
goto leave;
@@ -323,8 +324,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
mlog(0, "got %sast for unknown lockres! "
"cookie=%u:%llu, name=%.*s, namelen=%u\n",
past->type == DLM_AST ? "" : "b",
- dlm_get_lock_cookie_node(cookie),
- dlm_get_lock_cookie_seq(cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, locklen);
ret = DLM_IVLOCKID;
goto leave;
@@ -369,7 +370,8 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data)
mlog(0, "got %sast for unknown lock! cookie=%u:%llu, "
"name=%.*s, namelen=%u\n", past->type == DLM_AST ? "" : "b",
- dlm_get_lock_cookie_node(cookie), dlm_get_lock_cookie_seq(cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cookie)),
locklen, name, locklen);
ret = DLM_NORMAL;
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 6b6ff76538c5..e90b92f9ece1 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -180,6 +180,11 @@ struct dlm_assert_master_priv
unsigned ignore_higher:1;
};
+struct dlm_deref_lockres_priv
+{
+ struct dlm_lock_resource *deref_res;
+ u8 deref_node;
+};
struct dlm_work_item
{
@@ -191,6 +196,7 @@ struct dlm_work_item
struct dlm_request_all_locks_priv ral;
struct dlm_mig_lockres_priv ml;
struct dlm_assert_master_priv am;
+ struct dlm_deref_lockres_priv dl;
} u;
};
@@ -222,6 +228,9 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
#define DLM_LOCK_RES_DIRTY 0x00000008
#define DLM_LOCK_RES_IN_PROGRESS 0x00000010
#define DLM_LOCK_RES_MIGRATING 0x00000020
+#define DLM_LOCK_RES_DROPPING_REF 0x00000040
+#define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000
+#define DLM_LOCK_RES_SETREF_INPROG 0x00002000
/* max milliseconds to wait to sync up a network failure with a node death */
#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
@@ -265,6 +274,8 @@ struct dlm_lock_resource
u8 owner; //node which owns the lock resource, or unknown
u16 state;
char lvb[DLM_LVB_LEN];
+ unsigned int inflight_locks;
+ unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
};
struct dlm_migratable_lock
@@ -367,7 +378,7 @@ enum {
DLM_CONVERT_LOCK_MSG, /* 504 */
DLM_PROXY_AST_MSG, /* 505 */
DLM_UNLOCK_LOCK_MSG, /* 506 */
- DLM_UNUSED_MSG2, /* 507 */
+ DLM_DEREF_LOCKRES_MSG, /* 507 */
DLM_MIGRATE_REQUEST_MSG, /* 508 */
DLM_MIG_LOCKRES_MSG, /* 509 */
DLM_QUERY_JOIN_MSG, /* 510 */
@@ -417,6 +428,9 @@ struct dlm_master_request
u8 name[O2NM_MAX_NAME_LEN];
};
+#define DLM_ASSERT_RESPONSE_REASSERT 0x00000001
+#define DLM_ASSERT_RESPONSE_MASTERY_REF 0x00000002
+
#define DLM_ASSERT_MASTER_MLE_CLEANUP 0x00000001
#define DLM_ASSERT_MASTER_REQUERY 0x00000002
#define DLM_ASSERT_MASTER_FINISH_MIGRATION 0x00000004
@@ -430,6 +444,8 @@ struct dlm_assert_master
u8 name[O2NM_MAX_NAME_LEN];
};
+#define DLM_MIGRATE_RESPONSE_MASTERY_REF 0x00000001
+
struct dlm_migrate_request
{
u8 master;
@@ -609,12 +625,16 @@ struct dlm_begin_reco
};
+#define BITS_PER_BYTE 8
+#define BITS_TO_BYTES(bits) (((bits)+BITS_PER_BYTE-1)/BITS_PER_BYTE)
+
struct dlm_query_join_request
{
u8 node_idx;
u8 pad1[2];
u8 name_len;
u8 domain[O2NM_MAX_NAME_LEN];
+ u8 node_map[BITS_TO_BYTES(O2NM_MAX_NODES)];
};
struct dlm_assert_joined
@@ -648,6 +668,16 @@ struct dlm_finalize_reco
__be32 pad2;
};
+struct dlm_deref_lockres
+{
+ u32 pad1;
+ u16 pad2;
+ u8 node_idx;
+ u8 namelen;
+
+ u8 name[O2NM_MAX_NAME_LEN];
+};
+
static inline enum dlm_status
__dlm_lockres_state_to_status(struct dlm_lock_resource *res)
{
@@ -688,16 +718,20 @@ void dlm_lock_put(struct dlm_lock *lock);
void dlm_lock_attach_lockres(struct dlm_lock *lock,
struct dlm_lock_resource *res);
-int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
void dlm_revert_pending_convert(struct dlm_lock_resource *res,
struct dlm_lock *lock);
void dlm_revert_pending_lock(struct dlm_lock_resource *res,
struct dlm_lock *lock);
-int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
void dlm_commit_pending_cancel(struct dlm_lock_resource *res,
struct dlm_lock *lock);
void dlm_commit_pending_unlock(struct dlm_lock_resource *res,
@@ -721,8 +755,6 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
-void dlm_purge_lockres(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *lockres);
static inline void dlm_lockres_get(struct dlm_lock_resource *res)
{
/* This is called on every lookup, so it might be worth
@@ -733,6 +765,10 @@ void dlm_lockres_put(struct dlm_lock_resource *res);
void __dlm_unhash_lockres(struct dlm_lock_resource *res);
void __dlm_insert_lockres(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
+struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
+ const char *name,
+ unsigned int len,
+ unsigned int hash);
struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
const char *name,
unsigned int len,
@@ -753,6 +789,47 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
const char *name,
unsigned int namelen);
+#define dlm_lockres_set_refmap_bit(bit,res) \
+ __dlm_lockres_set_refmap_bit(bit,res,__FILE__,__LINE__)
+#define dlm_lockres_clear_refmap_bit(bit,res) \
+ __dlm_lockres_clear_refmap_bit(bit,res,__FILE__,__LINE__)
+
+static inline void __dlm_lockres_set_refmap_bit(int bit,
+ struct dlm_lock_resource *res,
+ const char *file,
+ int line)
+{
+ //printk("%s:%d:%.*s: setting bit %d\n", file, line,
+ // res->lockname.len, res->lockname.name, bit);
+ set_bit(bit, res->refmap);
+}
+
+static inline void __dlm_lockres_clear_refmap_bit(int bit,
+ struct dlm_lock_resource *res,
+ const char *file,
+ int line)
+{
+ //printk("%s:%d:%.*s: clearing bit %d\n", file, line,
+ // res->lockname.len, res->lockname.name, bit);
+ clear_bit(bit, res->refmap);
+}
+
+void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ const char *file,
+ int line);
+void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ int new_lockres,
+ const char *file,
+ int line);
+#define dlm_lockres_drop_inflight_ref(d,r) \
+ __dlm_lockres_drop_inflight_ref(d,r,__FILE__,__LINE__)
+#define dlm_lockres_grab_inflight_ref(d,r) \
+ __dlm_lockres_grab_inflight_ref(d,r,0,__FILE__,__LINE__)
+#define dlm_lockres_grab_inflight_ref_new(d,r) \
+ __dlm_lockres_grab_inflight_ref(d,r,1,__FILE__,__LINE__)
+
void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
void dlm_do_local_ast(struct dlm_ctxt *dlm,
@@ -801,10 +878,7 @@ int dlm_heartbeat_init(struct dlm_ctxt *dlm);
void dlm_hb_node_down_cb(struct o2nm_node *node, int idx, void *data);
void dlm_hb_node_up_cb(struct o2nm_node *node, int idx, void *data);
-int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
-int dlm_migrate_lockres(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *res,
- u8 target);
+int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res);
int dlm_finish_migration(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
u8 old_master);
@@ -812,15 +886,27 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res);
void __dlm_lockres_reserve_ast(struct dlm_lock_resource *res);
-int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data);
-int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data);
+int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+void dlm_assert_master_post_handler(int status, void *data, void *ret_data);
+int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
u8 nodenum, u8 *real_master);
@@ -856,10 +942,12 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
int dlm_init_mle_cache(void);
void dlm_destroy_mle_cache(void);
void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up);
+int dlm_drop_lockres_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res);
void dlm_clean_master_list(struct dlm_ctxt *dlm,
u8 dead_node);
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock);
-
+int __dlm_lockres_has_locks(struct dlm_lock_resource *res);
int __dlm_lockres_unused(struct dlm_lock_resource *res);
static inline const char * dlm_lock_mode_name(int mode)
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index c764dc8e40a2..ecb4d997221e 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -286,8 +286,8 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
__dlm_print_one_lock_resource(res);
mlog(ML_ERROR, "converting a remote lock that is already "
"converting! (cookie=%u:%llu, conv=%d)\n",
- dlm_get_lock_cookie_node(lock->ml.cookie),
- dlm_get_lock_cookie_seq(lock->ml.cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
lock->ml.convert_type);
status = DLM_DENIED;
goto bail;
@@ -418,7 +418,8 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
* returns: DLM_NORMAL, DLM_IVLOCKID, DLM_BADARGS,
* status from __dlmconvert_master
*/
-int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf;
@@ -428,7 +429,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
struct dlm_lockstatus *lksb;
enum dlm_status status = DLM_NORMAL;
u32 flags;
- int call_ast = 0, kick_thread = 0, ast_reserved = 0;
+ int call_ast = 0, kick_thread = 0, ast_reserved = 0, wake = 0;
if (!dlm_grab(dlm)) {
dlm_error(DLM_REJECTED);
@@ -479,25 +480,14 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
}
lock = NULL;
}
- if (!lock) {
- __dlm_print_one_lock_resource(res);
- list_for_each(iter, &res->granted) {
- lock = list_entry(iter, struct dlm_lock, list);
- if (lock->ml.node == cnv->node_idx) {
- mlog(ML_ERROR, "There is something here "
- "for node %u, lock->ml.cookie=%llu, "
- "cnv->cookie=%llu\n", cnv->node_idx,
- (unsigned long long)lock->ml.cookie,
- (unsigned long long)cnv->cookie);
- break;
- }
- }
- lock = NULL;
- }
spin_unlock(&res->spinlock);
if (!lock) {
status = DLM_IVLOCKID;
- dlm_error(status);
+ mlog(ML_ERROR, "did not find lock to convert on grant queue! "
+ "cookie=%u:%llu\n",
+ dlm_get_lock_cookie_node(be64_to_cpu(cnv->cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(cnv->cookie)));
+ __dlm_print_one_lock_resource(res);
goto leave;
}
@@ -524,8 +514,11 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
cnv->requested_type,
&call_ast, &kick_thread);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
+ wake = 1;
}
spin_unlock(&res->spinlock);
+ if (wake)
+ wake_up(&res->wq);
if (status != DLM_NORMAL) {
if (status != DLM_NOTQUEUED)
@@ -534,12 +527,7 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data)
}
leave:
- if (!lock)
- mlog(ML_ERROR, "did not find lock to convert on grant queue! "
- "cookie=%u:%llu\n",
- dlm_get_lock_cookie_node(cnv->cookie),
- dlm_get_lock_cookie_seq(cnv->cookie));
- else
+ if (lock)
dlm_lock_put(lock);
/* either queue the ast or release it, if reserved */
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 3f6c8d88f7af..64239b37e5d4 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -53,6 +53,23 @@ void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
spin_unlock(&res->spinlock);
}
+static void dlm_print_lockres_refmap(struct dlm_lock_resource *res)
+{
+ int bit;
+ assert_spin_locked(&res->spinlock);
+
+ mlog(ML_NOTICE, " refmap nodes: [ ");
+ bit = 0;
+ while (1) {
+ bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
+ if (bit >= O2NM_MAX_NODES)
+ break;
+ printk("%u ", bit);
+ bit++;
+ }
+ printk("], inflight=%u\n", res->inflight_locks);
+}
+
void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
{
struct list_head *iter2;
@@ -65,6 +82,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
res->owner, res->state);
mlog(ML_NOTICE, " last used: %lu, on purge list: %s\n",
res->last_used, list_empty(&res->purge) ? "no" : "yes");
+ dlm_print_lockres_refmap(res);
mlog(ML_NOTICE, " granted queue: \n");
list_for_each(iter2, &res->granted) {
lock = list_entry(iter2, struct dlm_lock, list);
@@ -72,8 +90,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
lock->ml.type, lock->ml.convert_type, lock->ml.node,
- dlm_get_lock_cookie_node(lock->ml.cookie),
- dlm_get_lock_cookie_seq(lock->ml.cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
list_empty(&lock->ast_list) ? 'y' : 'n',
lock->ast_pending ? 'y' : 'n',
list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -87,8 +105,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
lock->ml.type, lock->ml.convert_type, lock->ml.node,
- dlm_get_lock_cookie_node(lock->ml.cookie),
- dlm_get_lock_cookie_seq(lock->ml.cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
list_empty(&lock->ast_list) ? 'y' : 'n',
lock->ast_pending ? 'y' : 'n',
list_empty(&lock->bast_list) ? 'y' : 'n',
@@ -102,8 +120,8 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
mlog(ML_NOTICE, " type=%d, conv=%d, node=%u, "
"cookie=%u:%llu, ast=(empty=%c,pend=%c), bast=(empty=%c,pend=%c)\n",
lock->ml.type, lock->ml.convert_type, lock->ml.node,
- dlm_get_lock_cookie_node(lock->ml.cookie),
- dlm_get_lock_cookie_seq(lock->ml.cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
list_empty(&lock->ast_list) ? 'y' : 'n',
lock->ast_pending ? 'y' : 'n',
list_empty(&lock->bast_list) ? 'y' : 'n',
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index f0b25f2dd205..6087c4749fee 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -48,6 +48,36 @@
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
#include "cluster/masklog.h"
+/*
+ * ocfs2 node maps are array of long int, which limits to send them freely
+ * across the wire due to endianness issues. To workaround this, we convert
+ * long ints to byte arrays. Following 3 routines are helper functions to
+ * set/test/copy bits within those array of bytes
+ */
+static inline void byte_set_bit(u8 nr, u8 map[])
+{
+ map[nr >> 3] |= (1UL << (nr & 7));
+}
+
+static inline int byte_test_bit(u8 nr, u8 map[])
+{
+ return ((1UL << (nr & 7)) & (map[nr >> 3])) != 0;
+}
+
+static inline void byte_copymap(u8 dmap[], unsigned long smap[],
+ unsigned int sz)
+{
+ unsigned int nn;
+
+ if (!sz)
+ return;
+
+ memset(dmap, 0, ((sz + 7) >> 3));
+ for (nn = 0 ; nn < sz; nn++)
+ if (test_bit(nn, smap))
+ byte_set_bit(nn, dmap);
+}
+
static void dlm_free_pagevec(void **vec, int pages)
{
while (pages--)
@@ -95,10 +125,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
#define DLM_DOMAIN_BACKOFF_MS 200
-static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data);
-static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data);
+static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
+static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
static void dlm_unregister_domain_handlers(struct dlm_ctxt *dlm);
@@ -125,10 +159,10 @@ void __dlm_insert_lockres(struct dlm_ctxt *dlm,
hlist_add_head(&res->hash_node, bucket);
}
-struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
- const char *name,
- unsigned int len,
- unsigned int hash)
+struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
+ const char *name,
+ unsigned int len,
+ unsigned int hash)
{
struct hlist_head *bucket;
struct hlist_node *list;
@@ -154,6 +188,37 @@ struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
return NULL;
}
+/* intended to be called by functions which do not care about lock
+ * resources which are being purged (most net _handler functions).
+ * this will return NULL for any lock resource which is found but
+ * currently in the process of dropping its mastery reference.
+ * use __dlm_lookup_lockres_full when you need the lock resource
+ * regardless (e.g. dlm_get_lock_resource) */
+struct dlm_lock_resource * __dlm_lookup_lockres(struct dlm_ctxt *dlm,
+ const char *name,
+ unsigned int len,
+ unsigned int hash)
+{
+ struct dlm_lock_resource *res = NULL;
+
+ mlog_entry("%.*s\n", len, name);
+
+ assert_spin_locked(&dlm->spinlock);
+
+ res = __dlm_lookup_lockres_full(dlm, name, len, hash);
+ if (res) {
+ spin_lock(&res->spinlock);
+ if (res->state & DLM_LOCK_RES_DROPPING_REF) {
+ spin_unlock(&res->spinlock);
+ dlm_lockres_put(res);
+ return NULL;
+ }
+ spin_unlock(&res->spinlock);
+ }
+
+ return res;
+}
+
struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
const char *name,
unsigned int len)
@@ -330,43 +395,60 @@ static void dlm_complete_dlm_shutdown(struct dlm_ctxt *dlm)
wake_up(&dlm_domain_events);
}
-static void dlm_migrate_all_locks(struct dlm_ctxt *dlm)
+static int dlm_migrate_all_locks(struct dlm_ctxt *dlm)
{
- int i;
+ int i, num, n, ret = 0;
struct dlm_lock_resource *res;
+ struct hlist_node *iter;
+ struct hlist_head *bucket;
+ int dropped;
mlog(0, "Migrating locks from domain %s\n", dlm->name);
-restart:
+
+ num = 0;
spin_lock(&dlm->spinlock);
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
- while (!hlist_empty(dlm_lockres_hash(dlm, i))) {
- res = hlist_entry(dlm_lockres_hash(dlm, i)->first,
- struct dlm_lock_resource, hash_node);
- /* need reference when manually grabbing lockres */
+redo_bucket:
+ n = 0;
+ bucket = dlm_lockres_hash(dlm, i);
+ iter = bucket->first;
+ while (iter) {
+ n++;
+ res = hlist_entry(iter, struct dlm_lock_resource,
+ hash_node);
dlm_lockres_get(res);
- /* this should unhash the lockres
- * and exit with dlm->spinlock */
- mlog(0, "purging res=%p\n", res);
- if (dlm_lockres_is_dirty(dlm, res)) {
- /* HACK! this should absolutely go.
- * need to figure out why some empty
- * lockreses are still marked dirty */
- mlog(ML_ERROR, "lockres %.*s dirty!\n",
- res->lockname.len, res->lockname.name);
-
- spin_unlock(&dlm->spinlock);
- dlm_kick_thread(dlm, res);
- wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
- dlm_lockres_put(res);
- goto restart;
- }
- dlm_purge_lockres(dlm, res);
+ /* migrate, if necessary. this will drop the dlm
+ * spinlock and retake it if it does migration. */
+ dropped = dlm_empty_lockres(dlm, res);
+
+ spin_lock(&res->spinlock);
+ __dlm_lockres_calc_usage(dlm, res);
+ iter = res->hash_node.next;
+ spin_unlock(&res->spinlock);
+
dlm_lockres_put(res);
+
+ cond_resched_lock(&dlm->spinlock);
+
+ if (dropped)
+ goto redo_bucket;
}
+ num += n;
+ mlog(0, "%s: touched %d lockreses in bucket %d "
+ "(tot=%d)\n", dlm->name, n, i, num);
}
spin_unlock(&dlm->spinlock);
-
+ wake_up(&dlm->dlm_thread_wq);
+
+ /* let the dlm thread take care of purging, keep scanning until
+ * nothing remains in the hash */
+ if (num) {
+ mlog(0, "%s: %d lock resources in hash last pass\n",
+ dlm->name, num);
+ ret = -EAGAIN;
+ }
mlog(0, "DONE Migrating locks from domain %s\n", dlm->name);
+ return ret;
}
static int dlm_no_joining_node(struct dlm_ctxt *dlm)
@@ -418,7 +500,8 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
printk("\n");
}
-static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
unsigned int node;
@@ -571,7 +654,9 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
/* We changed dlm state, notify the thread */
dlm_kick_thread(dlm, NULL);
- dlm_migrate_all_locks(dlm);
+ while (dlm_migrate_all_locks(dlm)) {
+ mlog(0, "%s: more migration to do\n", dlm->name);
+ }
dlm_mark_domain_leaving(dlm);
dlm_leave_domain(dlm);
dlm_complete_dlm_shutdown(dlm);
@@ -580,11 +665,13 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm)
}
EXPORT_SYMBOL_GPL(dlm_unregister_domain);
-static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_query_join_request *query;
enum dlm_query_join_response response;
struct dlm_ctxt *dlm = NULL;
+ u8 nodenum;
query = (struct dlm_query_join_request *) msg->buf;
@@ -608,6 +695,28 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
spin_lock(&dlm_domain_lock);
dlm = __dlm_lookup_domain_full(query->domain, query->name_len);
+ if (!dlm)
+ goto unlock_respond;
+
+ /*
+ * There is a small window where the joining node may not see the
+ * node(s) that just left but still part of the cluster. DISALLOW
+ * join request if joining node has different node map.
+ */
+ nodenum=0;
+ while (nodenum < O2NM_MAX_NODES) {
+ if (test_bit(nodenum, dlm->domain_map)) {
+ if (!byte_test_bit(nodenum, query->node_map)) {
+ mlog(0, "disallow join as node %u does not "
+ "have node %u in its nodemap\n",
+ query->node_idx, nodenum);
+ response = JOIN_DISALLOW;
+ goto unlock_respond;
+ }
+ }
+ nodenum++;
+ }
+
/* Once the dlm ctxt is marked as leaving then we don't want
* to be put in someone's domain map.
* Also, explicitly disallow joining at certain troublesome
@@ -626,15 +735,15 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
/* Disallow parallel joins. */
response = JOIN_DISALLOW;
} else if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
- mlog(ML_NOTICE, "node %u trying to join, but recovery "
+ mlog(0, "node %u trying to join, but recovery "
"is ongoing.\n", bit);
response = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->recovery_map)) {
- mlog(ML_NOTICE, "node %u trying to join, but it "
+ mlog(0, "node %u trying to join, but it "
"still needs recovery.\n", bit);
response = JOIN_DISALLOW;
} else if (test_bit(bit, dlm->domain_map)) {
- mlog(ML_NOTICE, "node %u trying to join, but it "
+ mlog(0, "node %u trying to join, but it "
"is still in the domain! needs recovery?\n",
bit);
response = JOIN_DISALLOW;
@@ -649,6 +758,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data)
spin_unlock(&dlm->spinlock);
}
+unlock_respond:
spin_unlock(&dlm_domain_lock);
respond:
@@ -657,7 +767,8 @@ respond:
return response;
}
-static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_assert_joined *assert;
struct dlm_ctxt *dlm = NULL;
@@ -694,7 +805,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data)
return 0;
}
-static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data)
+static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_cancel_join *cancel;
struct dlm_ctxt *dlm = NULL;
@@ -796,6 +908,9 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
join_msg.name_len = strlen(dlm->name);
memcpy(join_msg.domain, dlm->name, join_msg.name_len);
+ /* copy live node map to join message */
+ byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
+
status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
sizeof(join_msg), node, &retval);
if (status < 0 && status != -ENOPROTOOPT) {
@@ -1036,98 +1151,106 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
status = o2net_register_handler(DLM_MASTER_REQUEST_MSG, dlm->key,
sizeof(struct dlm_master_request),
dlm_master_request_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_ASSERT_MASTER_MSG, dlm->key,
sizeof(struct dlm_assert_master),
dlm_assert_master_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, dlm_assert_master_post_handler,
+ &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_CREATE_LOCK_MSG, dlm->key,
sizeof(struct dlm_create_lock),
dlm_create_lock_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_CONVERT_LOCK_MSG, dlm->key,
DLM_CONVERT_LOCK_MAX_LEN,
dlm_convert_lock_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_UNLOCK_LOCK_MSG, dlm->key,
DLM_UNLOCK_LOCK_MAX_LEN,
dlm_unlock_lock_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_PROXY_AST_MSG, dlm->key,
DLM_PROXY_AST_MAX_LEN,
dlm_proxy_ast_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_EXIT_DOMAIN_MSG, dlm->key,
sizeof(struct dlm_exit_domain),
dlm_exit_domain_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
+ if (status)
+ goto bail;
+
+ status = o2net_register_handler(DLM_DEREF_LOCKRES_MSG, dlm->key,
+ sizeof(struct dlm_deref_lockres),
+ dlm_deref_lockres_handler,
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_MIGRATE_REQUEST_MSG, dlm->key,
sizeof(struct dlm_migrate_request),
dlm_migrate_request_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_MIG_LOCKRES_MSG, dlm->key,
DLM_MIG_LOCKRES_MAX_LEN,
dlm_mig_lockres_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_MASTER_REQUERY_MSG, dlm->key,
sizeof(struct dlm_master_requery),
dlm_master_requery_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_LOCK_REQUEST_MSG, dlm->key,
sizeof(struct dlm_lock_request),
dlm_request_all_locks_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_RECO_DATA_DONE_MSG, dlm->key,
sizeof(struct dlm_reco_data_done),
dlm_reco_data_done_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_BEGIN_RECO_MSG, dlm->key,
sizeof(struct dlm_begin_reco),
dlm_begin_reco_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_FINALIZE_RECO_MSG, dlm->key,
sizeof(struct dlm_finalize_reco),
dlm_finalize_reco_handler,
- dlm, &dlm->dlm_domain_handlers);
+ dlm, NULL, &dlm->dlm_domain_handlers);
if (status)
goto bail;
@@ -1141,6 +1264,8 @@ bail:
static int dlm_join_domain(struct dlm_ctxt *dlm)
{
int status;
+ unsigned int backoff;
+ unsigned int total_backoff = 0;
BUG_ON(!dlm);
@@ -1172,18 +1297,27 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
}
do {
- unsigned int backoff;
status = dlm_try_to_join_domain(dlm);
/* If we're racing another node to the join, then we
* need to back off temporarily and let them
* complete. */
+#define DLM_JOIN_TIMEOUT_MSECS 90000
if (status == -EAGAIN) {
if (signal_pending(current)) {
status = -ERESTARTSYS;
goto bail;
}
+ if (total_backoff >
+ msecs_to_jiffies(DLM_JOIN_TIMEOUT_MSECS)) {
+ status = -ERESTARTSYS;
+ mlog(ML_NOTICE, "Timed out joining dlm domain "
+ "%s after %u msecs\n", dlm->name,
+ jiffies_to_msecs(total_backoff));
+ goto bail;
+ }
+
/*
* <chip> After you!
* <dale> No, after you!
@@ -1193,6 +1327,7 @@ static int dlm_join_domain(struct dlm_ctxt *dlm)
*/
backoff = (unsigned int)(jiffies & 0x3);
backoff *= DLM_DOMAIN_BACKOFF_MS;
+ total_backoff += backoff;
mlog(0, "backoff %d\n", backoff);
msleep(backoff);
}
@@ -1421,21 +1556,21 @@ static int dlm_register_net_handlers(void)
status = o2net_register_handler(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
sizeof(struct dlm_query_join_request),
dlm_query_join_handler,
- NULL, &dlm_join_handlers);
+ NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
sizeof(struct dlm_assert_joined),
dlm_assert_joined_handler,
- NULL, &dlm_join_handlers);
+ NULL, NULL, &dlm_join_handlers);
if (status)
goto bail;
status = o2net_register_handler(DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
sizeof(struct dlm_cancel_join),
dlm_cancel_join_handler,
- NULL, &dlm_join_handlers);
+ NULL, NULL, &dlm_join_handlers);
bail:
if (status < 0)
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index b7f0ba97a1a2..de952eba29a9 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -61,11 +61,11 @@
#define MLOG_MASK_PREFIX ML_DLMFS
#include "cluster/masklog.h"
-static struct super_operations dlmfs_ops;
-static struct file_operations dlmfs_file_operations;
-static struct inode_operations dlmfs_dir_inode_operations;
-static struct inode_operations dlmfs_root_inode_operations;
-static struct inode_operations dlmfs_file_inode_operations;
+static const struct super_operations dlmfs_ops;
+static const struct file_operations dlmfs_file_operations;
+static const struct inode_operations dlmfs_dir_inode_operations;
+static const struct inode_operations dlmfs_root_inode_operations;
+static const struct inode_operations dlmfs_file_inode_operations;
static struct kmem_cache *dlmfs_inode_cache;
struct workqueue_struct *user_dlm_worker;
@@ -540,27 +540,27 @@ static int dlmfs_fill_super(struct super_block * sb,
return 0;
}
-static struct file_operations dlmfs_file_operations = {
+static const struct file_operations dlmfs_file_operations = {
.open = dlmfs_file_open,
.release = dlmfs_file_release,
.read = dlmfs_file_read,
.write = dlmfs_file_write,
};
-static struct inode_operations dlmfs_dir_inode_operations = {
+static const struct inode_operations dlmfs_dir_inode_operations = {
.create = dlmfs_create,
.lookup = simple_lookup,
.unlink = dlmfs_unlink,
};
/* this way we can restrict mkdir to only the toplevel of the fs. */
-static struct inode_operations dlmfs_root_inode_operations = {
+static const struct inode_operations dlmfs_root_inode_operations = {
.lookup = simple_lookup,
.mkdir = dlmfs_mkdir,
.rmdir = simple_rmdir,
};
-static struct super_operations dlmfs_ops = {
+static const struct super_operations dlmfs_ops = {
.statfs = simple_statfs,
.alloc_inode = dlmfs_alloc_inode,
.destroy_inode = dlmfs_destroy_inode,
@@ -568,7 +568,7 @@ static struct super_operations dlmfs_ops = {
.drop_inode = generic_delete_inode,
};
-static struct inode_operations dlmfs_file_inode_operations = {
+static const struct inode_operations dlmfs_file_inode_operations = {
.getattr = simple_getattr,
};
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index e5ca3db197f6..52578d907d9a 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -163,6 +163,10 @@ static enum dlm_status dlmlock_master(struct dlm_ctxt *dlm,
kick_thread = 1;
}
}
+ /* reduce the inflight count, this may result in the lockres
+ * being purged below during calc_usage */
+ if (lock->ml.node == dlm->node_num)
+ dlm_lockres_drop_inflight_ref(dlm, res);
spin_unlock(&res->spinlock);
wake_up(&res->wq);
@@ -437,7 +441,8 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
* held on exit: none
* returns: DLM_NORMAL, DLM_SYSERR, DLM_IVLOCKID, DLM_NOTQUEUED
*/
-int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_create_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_create_lock *create = (struct dlm_create_lock *)msg->buf;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 0ad872055cb3..77e4e6169a0d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -99,9 +99,10 @@ static void dlm_mle_node_up(struct dlm_ctxt *dlm,
int idx);
static void dlm_assert_master_worker(struct dlm_work_item *item, void *data);
-static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
- unsigned int namelen, void *nodemap,
- u32 flags);
+static int dlm_do_assert_master(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ void *nodemap, u32 flags);
+static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data);
static inline int dlm_mle_equal(struct dlm_ctxt *dlm,
struct dlm_master_list_entry *mle,
@@ -237,7 +238,8 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
struct dlm_master_list_entry **mle,
char *name, unsigned int namelen);
-static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to);
+static int dlm_do_master_request(struct dlm_lock_resource *res,
+ struct dlm_master_list_entry *mle, int to);
static int dlm_wait_for_lock_mastery(struct dlm_ctxt *dlm,
@@ -687,6 +689,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
INIT_LIST_HEAD(&res->purge);
atomic_set(&res->asts_reserved, 0);
res->migration_pending = 0;
+ res->inflight_locks = 0;
kref_init(&res->refs);
@@ -700,6 +703,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
res->last_used = 0;
memset(res->lvb, 0, DLM_LVB_LEN);
+ memset(res->refmap, 0, sizeof(res->refmap));
}
struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
@@ -722,6 +726,42 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
return res;
}
+void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ int new_lockres,
+ const char *file,
+ int line)
+{
+ if (!new_lockres)
+ assert_spin_locked(&res->spinlock);
+
+ if (!test_bit(dlm->node_num, res->refmap)) {
+ BUG_ON(res->inflight_locks != 0);
+ dlm_lockres_set_refmap_bit(dlm->node_num, res);
+ }
+ res->inflight_locks++;
+ mlog(0, "%s:%.*s: inflight++: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_locks);
+}
+
+void __dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ const char *file,
+ int line)
+{
+ assert_spin_locked(&res->spinlock);
+
+ BUG_ON(res->inflight_locks == 0);
+ res->inflight_locks--;
+ mlog(0, "%s:%.*s: inflight--: now %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ res->inflight_locks);
+ if (res->inflight_locks == 0)
+ dlm_lockres_clear_refmap_bit(dlm->node_num, res);
+ wake_up(&res->wq);
+}
+
/*
* lookup a lock resource by name.
* may already exist in the hashtable.
@@ -752,6 +792,7 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
unsigned int hash;
int tries = 0;
int bit, wait_on_recovery = 0;
+ int drop_inflight_if_nonlocal = 0;
BUG_ON(!lockid);
@@ -761,9 +802,30 @@ struct dlm_lock_resource * dlm_get_lock_resource(struct dlm_ctxt *dlm,
lookup:
spin_lock(&dlm->spinlock);
- tmpres = __dlm_lookup_lockres(dlm, lockid, namelen, hash);
+ tmpres = __dlm_lookup_lockres_full(dlm, lockid, namelen, hash);
if (tmpres) {
+ int dropping_ref = 0;
+
+ spin_lock(&tmpres->spinlock);
+ if (tmpres->owner == dlm->node_num) {
+ BUG_ON(tmpres->state & DLM_LOCK_RES_DROPPING_REF);
+ dlm_lockres_grab_inflight_ref(dlm, tmpres);
+ } else if (tmpres->state & DLM_LOCK_RES_DROPPING_REF)
+ dropping_ref = 1;
+ spin_unlock(&tmpres->spinlock);
spin_unlock(&dlm->spinlock);
+
+ /* wait until done messaging the master, drop our ref to allow
+ * the lockres to be purged, start over. */
+ if (dropping_ref) {
+ spin_lock(&tmpres->spinlock);
+ __dlm_wait_on_lockres_flags(tmpres, DLM_LOCK_RES_DROPPING_REF);
+ spin_unlock(&tmpres->spinlock);
+ dlm_lockres_put(tmpres);
+ tmpres = NULL;
+ goto lookup;
+ }
+
mlog(0, "found in hash!\n");
if (res)
dlm_lockres_put(res);
@@ -793,6 +855,7 @@ lookup:
spin_lock(&res->spinlock);
dlm_change_lockres_owner(dlm, res, dlm->node_num);
__dlm_insert_lockres(dlm, res);
+ dlm_lockres_grab_inflight_ref(dlm, res);
spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
/* lockres still marked IN_PROGRESS */
@@ -805,29 +868,40 @@ lookup:
/* if we found a block, wait for lock to be mastered by another node */
blocked = dlm_find_mle(dlm, &mle, (char *)lockid, namelen);
if (blocked) {
+ int mig;
if (mle->type == DLM_MLE_MASTER) {
mlog(ML_ERROR, "master entry for nonexistent lock!\n");
BUG();
- } else if (mle->type == DLM_MLE_MIGRATION) {
- /* migration is in progress! */
- /* the good news is that we now know the
- * "current" master (mle->master). */
-
+ }
+ mig = (mle->type == DLM_MLE_MIGRATION);
+ /* if there is a migration in progress, let the migration
+ * finish before continuing. we can wait for the absence
+ * of the MIGRATION mle: either the migrate finished or
+ * one of the nodes died and the mle was cleaned up.
+ * if there is a BLOCK here, but it already has a master
+ * set, we are too late. the master does not have a ref
+ * for us in the refmap. detach the mle and drop it.
+ * either way, go back to the top and start over. */
+ if (mig || mle->master != O2NM_MAX_NODES) {
+ BUG_ON(mig && mle->master == dlm->node_num);
+ /* we arrived too late. the master does not
+ * have a ref for us. retry. */
+ mlog(0, "%s:%.*s: late on %s\n",
+ dlm->name, namelen, lockid,
+ mig ? "MIGRATION" : "BLOCK");
spin_unlock(&dlm->master_lock);
- assert_spin_locked(&dlm->spinlock);
-
- /* set the lockres owner and hash it */
- spin_lock(&res->spinlock);
- dlm_set_lockres_owner(dlm, res, mle->master);
- __dlm_insert_lockres(dlm, res);
- spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
/* master is known, detach */
- dlm_mle_detach_hb_events(dlm, mle);
+ if (!mig)
+ dlm_mle_detach_hb_events(dlm, mle);
dlm_put_mle(mle);
mle = NULL;
- goto wake_waiters;
+ /* this is lame, but we cant wait on either
+ * the mle or lockres waitqueue here */
+ if (mig)
+ msleep(100);
+ goto lookup;
}
} else {
/* go ahead and try to master lock on this node */
@@ -858,6 +932,13 @@ lookup:
/* finally add the lockres to its hash bucket */
__dlm_insert_lockres(dlm, res);
+ /* since this lockres is new it doesnt not require the spinlock */
+ dlm_lockres_grab_inflight_ref_new(dlm, res);
+
+ /* if this node does not become the master make sure to drop
+ * this inflight reference below */
+ drop_inflight_if_nonlocal = 1;
+
/* get an extra ref on the mle in case this is a BLOCK
* if so, the creator of the BLOCK may try to put the last
* ref at this time in the assert master handler, so we
@@ -910,7 +991,7 @@ redo_request:
ret = -EINVAL;
dlm_node_iter_init(mle->vote_map, &iter);
while ((nodenum = dlm_node_iter_next(&iter)) >= 0) {
- ret = dlm_do_master_request(mle, nodenum);
+ ret = dlm_do_master_request(res, mle, nodenum);
if (ret < 0)
mlog_errno(ret);
if (mle->master != O2NM_MAX_NODES) {
@@ -960,6 +1041,8 @@ wait:
wake_waiters:
spin_lock(&res->spinlock);
+ if (res->owner != dlm->node_num && drop_inflight_if_nonlocal)
+ dlm_lockres_drop_inflight_ref(dlm, res);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
spin_unlock(&res->spinlock);
wake_up(&res->wq);
@@ -998,7 +1081,7 @@ recheck:
/* this will cause the master to re-assert across
* the whole cluster, freeing up mles */
if (res->owner != dlm->node_num) {
- ret = dlm_do_master_request(mle, res->owner);
+ ret = dlm_do_master_request(res, mle, res->owner);
if (ret < 0) {
/* give recovery a chance to run */
mlog(ML_ERROR, "link to %u went down?: %d\n", res->owner, ret);
@@ -1062,6 +1145,8 @@ recheck:
* now tell other nodes that I am
* mastering this. */
mle->master = dlm->node_num;
+ /* ref was grabbed in get_lock_resource
+ * will be dropped in dlmlock_master */
assert = 1;
sleep = 0;
}
@@ -1087,7 +1172,8 @@ recheck:
(atomic_read(&mle->woken) == 1),
timeo);
if (res->owner == O2NM_MAX_NODES) {
- mlog(0, "waiting again\n");
+ mlog(0, "%s:%.*s: waiting again\n", dlm->name,
+ res->lockname.len, res->lockname.name);
goto recheck;
}
mlog(0, "done waiting, master is %u\n", res->owner);
@@ -1100,8 +1186,7 @@ recheck:
m = dlm->node_num;
mlog(0, "about to master %.*s here, this=%u\n",
res->lockname.len, res->lockname.name, m);
- ret = dlm_do_assert_master(dlm, res->lockname.name,
- res->lockname.len, mle->vote_map, 0);
+ ret = dlm_do_assert_master(dlm, res, mle->vote_map, 0);
if (ret) {
/* This is a failure in the network path,
* not in the response to the assert_master
@@ -1117,6 +1202,8 @@ recheck:
/* set the lockres owner */
spin_lock(&res->spinlock);
+ /* mastery reference obtained either during
+ * assert_master_handler or in get_lock_resource */
dlm_change_lockres_owner(dlm, res, m);
spin_unlock(&res->spinlock);
@@ -1283,7 +1370,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm,
*
*/
-static int dlm_do_master_request(struct dlm_master_list_entry *mle, int to)
+static int dlm_do_master_request(struct dlm_lock_resource *res,
+ struct dlm_master_list_entry *mle, int to)
{
struct dlm_ctxt *dlm = mle->dlm;
struct dlm_master_request request;
@@ -1339,6 +1427,9 @@ again:
case DLM_MASTER_RESP_YES:
set_bit(to, mle->response_map);
mlog(0, "node %u is the master, response=YES\n", to);
+ mlog(0, "%s:%.*s: master node %u now knows I have a "
+ "reference\n", dlm->name, res->lockname.len,
+ res->lockname.name, to);
mle->master = to;
break;
case DLM_MASTER_RESP_NO:
@@ -1379,7 +1470,8 @@ out:
*
* if possible, TRIM THIS DOWN!!!
*/
-int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
u8 response = DLM_MASTER_RESP_MAYBE;
struct dlm_ctxt *dlm = data;
@@ -1417,10 +1509,11 @@ way_up_top:
/* take care of the easy cases up front */
spin_lock(&res->spinlock);
- if (res->state & DLM_LOCK_RES_RECOVERING) {
+ if (res->state & (DLM_LOCK_RES_RECOVERING|
+ DLM_LOCK_RES_MIGRATING)) {
spin_unlock(&res->spinlock);
mlog(0, "returning DLM_MASTER_RESP_ERROR since res is "
- "being recovered\n");
+ "being recovered/migrated\n");
response = DLM_MASTER_RESP_ERROR;
if (mle)
kmem_cache_free(dlm_mle_cache, mle);
@@ -1428,8 +1521,10 @@ way_up_top:
}
if (res->owner == dlm->node_num) {
+ mlog(0, "%s:%.*s: setting bit %u in refmap\n",
+ dlm->name, namelen, name, request->node_idx);
+ dlm_lockres_set_refmap_bit(request->node_idx, res);
spin_unlock(&res->spinlock);
- // mlog(0, "this node is the master\n");
response = DLM_MASTER_RESP_YES;
if (mle)
kmem_cache_free(dlm_mle_cache, mle);
@@ -1477,7 +1572,6 @@ way_up_top:
mlog(0, "node %u is master, but trying to migrate to "
"node %u.\n", tmpmle->master, tmpmle->new_master);
if (tmpmle->master == dlm->node_num) {
- response = DLM_MASTER_RESP_YES;
mlog(ML_ERROR, "no owner on lockres, but this "
"node is trying to migrate it to %u?!\n",
tmpmle->new_master);
@@ -1494,6 +1588,10 @@ way_up_top:
* go back and clean the mles on any
* other nodes */
dispatch_assert = 1;
+ dlm_lockres_set_refmap_bit(request->node_idx, res);
+ mlog(0, "%s:%.*s: setting bit %u in refmap\n",
+ dlm->name, namelen, name,
+ request->node_idx);
} else
response = DLM_MASTER_RESP_NO;
} else {
@@ -1607,17 +1705,24 @@ send_response:
* can periodically run all locks owned by this node
* and re-assert across the cluster...
*/
-static int dlm_do_assert_master(struct dlm_ctxt *dlm, const char *lockname,
- unsigned int namelen, void *nodemap,
- u32 flags)
+int dlm_do_assert_master(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ void *nodemap, u32 flags)
{
struct dlm_assert_master assert;
int to, tmpret;
struct dlm_node_iter iter;
int ret = 0;
int reassert;
+ const char *lockname = res->lockname.name;
+ unsigned int namelen = res->lockname.len;
BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+
+ spin_lock(&res->spinlock);
+ res->state |= DLM_LOCK_RES_SETREF_INPROG;
+ spin_unlock(&res->spinlock);
+
again:
reassert = 0;
@@ -1647,6 +1752,7 @@ again:
mlog(0, "link to %d went down!\n", to);
/* any nonzero status return will do */
ret = tmpret;
+ r = 0;
} else if (r < 0) {
/* ok, something horribly messed. kill thyself. */
mlog(ML_ERROR,"during assert master of %.*s to %u, "
@@ -1661,17 +1767,39 @@ again:
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
BUG();
- } else if (r == EAGAIN) {
+ }
+
+ if (r & DLM_ASSERT_RESPONSE_REASSERT &&
+ !(r & DLM_ASSERT_RESPONSE_MASTERY_REF)) {
+ mlog(ML_ERROR, "%.*s: very strange, "
+ "master MLE but no lockres on %u\n",
+ namelen, lockname, to);
+ }
+
+ if (r & DLM_ASSERT_RESPONSE_REASSERT) {
mlog(0, "%.*s: node %u create mles on other "
"nodes and requests a re-assert\n",
namelen, lockname, to);
reassert = 1;
}
+ if (r & DLM_ASSERT_RESPONSE_MASTERY_REF) {
+ mlog(0, "%.*s: node %u has a reference to this "
+ "lockres, set the bit in the refmap\n",
+ namelen, lockname, to);
+ spin_lock(&res->spinlock);
+ dlm_lockres_set_refmap_bit(to, res);
+ spin_unlock(&res->spinlock);
+ }
}
if (reassert)
goto again;
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
+ spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
+
return ret;
}
@@ -1684,7 +1812,8 @@ again:
*
* if possible, TRIM THIS DOWN!!!
*/
-int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_master_list_entry *mle = NULL;
@@ -1693,7 +1822,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data)
char *name;
unsigned int namelen, hash;
u32 flags;
- int master_request = 0;
+ int master_request = 0, have_lockres_ref = 0;
int ret = 0;
if (!dlm_grab(dlm))
@@ -1851,6 +1980,7 @@ ok:
spin_unlock(&mle->spinlock);
if (res) {
+ int wake = 0;
spin_lock(&res->spinlock);
if (mle->type == DLM_MLE_MIGRATION) {
mlog(0, "finishing off migration of lockres %.*s, "
@@ -1858,12 +1988,16 @@ ok:
res->lockname.len, res->lockname.name,
dlm->node_num, mle->new_master);
res->state &= ~DLM_LOCK_RES_MIGRATING;
+ wake = 1;
dlm_change_lockres_owner(dlm, res, mle->new_master);
BUG_ON(res->state & DLM_LOCK_RES_DIRTY);
} else {
dlm_change_lockres_owner(dlm, res, mle->master);
}
spin_unlock(&res->spinlock);
+ have_lockres_ref = 1;
+ if (wake)
+ wake_up(&res->wq);
}
/* master is known, detach if not already detached.
@@ -1913,12 +2047,28 @@ ok:
done:
ret = 0;
- if (res)
- dlm_lockres_put(res);
+ if (res) {
+ spin_lock(&res->spinlock);
+ res->state |= DLM_LOCK_RES_SETREF_INPROG;
+ spin_unlock(&res->spinlock);
+ *ret_data = (void *)res;
+ }
dlm_put(dlm);
if (master_request) {
mlog(0, "need to tell master to reassert\n");
- ret = EAGAIN; // positive. negative would shoot down the node.
+ /* positive. negative would shoot down the node. */
+ ret |= DLM_ASSERT_RESPONSE_REASSERT;
+ if (!have_lockres_ref) {
+ mlog(ML_ERROR, "strange, got assert from %u, MASTER "
+ "mle present here for %s:%.*s, but no lockres!\n",
+ assert->node_idx, dlm->name, namelen, name);
+ }
+ }
+ if (have_lockres_ref) {
+ /* let the master know we have a reference to the lockres */
+ ret |= DLM_ASSERT_RESPONSE_MASTERY_REF;
+ mlog(0, "%s:%.*s: got assert from %u, need a ref\n",
+ dlm->name, namelen, name, assert->node_idx);
}
return ret;
@@ -1929,11 +2079,25 @@ kill:
__dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
- dlm_lockres_put(res);
+ *ret_data = (void *)res;
dlm_put(dlm);
return -EINVAL;
}
+void dlm_assert_master_post_handler(int status, void *data, void *ret_data)
+{
+ struct dlm_lock_resource *res = (struct dlm_lock_resource *)ret_data;
+
+ if (ret_data) {
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_SETREF_INPROG;
+ spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
+ dlm_lockres_put(res);
+ }
+ return;
+}
+
int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res,
int ignore_higher, u8 request_from, u32 flags)
@@ -2023,9 +2187,7 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data)
* even if one or more nodes die */
mlog(0, "worker about to master %.*s here, this=%u\n",
res->lockname.len, res->lockname.name, dlm->node_num);
- ret = dlm_do_assert_master(dlm, res->lockname.name,
- res->lockname.len,
- nodemap, flags);
+ ret = dlm_do_assert_master(dlm, res, nodemap, flags);
if (ret < 0) {
/* no need to restart, we are done */
if (!dlm_is_host_down(ret))
@@ -2097,14 +2259,180 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
return ret;
}
+/*
+ * DLM_DEREF_LOCKRES_MSG
+ */
+
+int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
+{
+ struct dlm_deref_lockres deref;
+ int ret = 0, r;
+ const char *lockname;
+ unsigned int namelen;
+
+ lockname = res->lockname.name;
+ namelen = res->lockname.len;
+ BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+
+ mlog(0, "%s:%.*s: sending deref to %d\n",
+ dlm->name, namelen, lockname, res->owner);
+ memset(&deref, 0, sizeof(deref));
+ deref.node_idx = dlm->node_num;
+ deref.namelen = namelen;
+ memcpy(deref.name, lockname, namelen);
+
+ ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
+ &deref, sizeof(deref), res->owner, &r);
+ if (ret < 0)
+ mlog_errno(ret);
+ else if (r < 0) {
+ /* BAD. other node says I did not have a ref. */
+ mlog(ML_ERROR,"while dropping ref on %s:%.*s "
+ "(master=%u) got %d.\n", dlm->name, namelen,
+ lockname, res->owner, r);
+ dlm_print_one_lock_resource(res);
+ BUG();
+ }
+ return ret;
+}
+
+int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
+{
+ struct dlm_ctxt *dlm = data;
+ struct dlm_deref_lockres *deref = (struct dlm_deref_lockres *)msg->buf;
+ struct dlm_lock_resource *res = NULL;
+ char *name;
+ unsigned int namelen;
+ int ret = -EINVAL;
+ u8 node;
+ unsigned int hash;
+ struct dlm_work_item *item;
+ int cleared = 0;
+ int dispatch = 0;
+
+ if (!dlm_grab(dlm))
+ return 0;
+
+ name = deref->name;
+ namelen = deref->namelen;
+ node = deref->node_idx;
+
+ if (namelen > DLM_LOCKID_NAME_MAX) {
+ mlog(ML_ERROR, "Invalid name length!");
+ goto done;
+ }
+ if (deref->node_idx >= O2NM_MAX_NODES) {
+ mlog(ML_ERROR, "Invalid node number: %u\n", node);
+ goto done;
+ }
+
+ hash = dlm_lockid_hash(name, namelen);
+
+ spin_lock(&dlm->spinlock);
+ res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
+ if (!res) {
+ spin_unlock(&dlm->spinlock);
+ mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
+ dlm->name, namelen, name);
+ goto done;
+ }
+ spin_unlock(&dlm->spinlock);
+
+ spin_lock(&res->spinlock);
+ if (res->state & DLM_LOCK_RES_SETREF_INPROG)
+ dispatch = 1;
+ else {
+ BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
+ if (test_bit(node, res->refmap)) {
+ dlm_lockres_clear_refmap_bit(node, res);
+ cleared = 1;
+ }
+ }
+ spin_unlock(&res->spinlock);
+
+ if (!dispatch) {
+ if (cleared)
+ dlm_lockres_calc_usage(dlm, res);
+ else {
+ mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
+ "but it is already dropped!\n", dlm->name,
+ res->lockname.len, res->lockname.name, node);
+ __dlm_print_one_lock_resource(res);
+ }
+ ret = 0;
+ goto done;
+ }
+
+ item = kzalloc(sizeof(*item), GFP_NOFS);
+ if (!item) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto done;
+ }
+
+ dlm_init_work_item(dlm, item, dlm_deref_lockres_worker, NULL);
+ item->u.dl.deref_res = res;
+ item->u.dl.deref_node = node;
+
+ spin_lock(&dlm->work_lock);
+ list_add_tail(&item->list, &dlm->work_list);
+ spin_unlock(&dlm->work_lock);
+
+ queue_work(dlm->dlm_worker, &dlm->dispatched_work);
+ return 0;
+
+done:
+ if (res)
+ dlm_lockres_put(res);
+ dlm_put(dlm);
+
+ return ret;
+}
+
+static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
+{
+ struct dlm_ctxt *dlm;
+ struct dlm_lock_resource *res;
+ u8 node;
+ u8 cleared = 0;
+
+ dlm = item->dlm;
+ res = item->u.dl.deref_res;
+ node = item->u.dl.deref_node;
+
+ spin_lock(&res->spinlock);
+ BUG_ON(res->state & DLM_LOCK_RES_DROPPING_REF);
+ if (test_bit(node, res->refmap)) {
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
+ dlm_lockres_clear_refmap_bit(node, res);
+ cleared = 1;
+ }
+ spin_unlock(&res->spinlock);
+
+ if (cleared) {
+ mlog(0, "%s:%.*s node %u ref dropped in dispatch\n",
+ dlm->name, res->lockname.len, res->lockname.name, node);
+ dlm_lockres_calc_usage(dlm, res);
+ } else {
+ mlog(ML_ERROR, "%s:%.*s: node %u trying to drop ref "
+ "but it is already dropped!\n", dlm->name,
+ res->lockname.len, res->lockname.name, node);
+ __dlm_print_one_lock_resource(res);
+ }
+
+ dlm_lockres_put(res);
+}
+
/*
* DLM_MIGRATE_LOCKRES
*/
-int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
- u8 target)
+static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res,
+ u8 target)
{
struct dlm_master_list_entry *mle = NULL;
struct dlm_master_list_entry *oldmle = NULL;
@@ -2116,7 +2444,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct list_head *queue, *iter;
int i;
struct dlm_lock *lock;
- int empty = 1;
+ int empty = 1, wake = 0;
if (!dlm_grab(dlm))
return -EINVAL;
@@ -2241,6 +2569,7 @@ int dlm_migrate_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
res->lockname.name, target);
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_MIGRATING;
+ wake = 1;
spin_unlock(&res->spinlock);
ret = -EINVAL;
}
@@ -2268,6 +2597,9 @@ fail:
* the lockres
*/
+ /* now that remote nodes are spinning on the MIGRATING flag,
+ * ensure that all assert_master work is flushed. */
+ flush_workqueue(dlm->dlm_worker);
/* get an extra reference on the mle.
* otherwise the assert_master from the new
@@ -2296,6 +2628,7 @@ fail:
dlm_put_mle_inuse(mle);
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_MIGRATING;
+ wake = 1;
spin_unlock(&res->spinlock);
goto leave;
}
@@ -2322,7 +2655,8 @@ fail:
res->owner == target)
break;
- mlog(0, "timed out during migration\n");
+ mlog(0, "%s:%.*s: timed out during migration\n",
+ dlm->name, res->lockname.len, res->lockname.name);
/* avoid hang during shutdown when migrating lockres
* to a node which also goes down */
if (dlm_is_node_dead(dlm, target)) {
@@ -2330,20 +2664,20 @@ fail:
"target %u is no longer up, restarting\n",
dlm->name, res->lockname.len,
res->lockname.name, target);
- ret = -ERESTARTSYS;
+ ret = -EINVAL;
+ /* migration failed, detach and clean up mle */
+ dlm_mle_detach_hb_events(dlm, mle);
+ dlm_put_mle(mle);
+ dlm_put_mle_inuse(mle);
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_MIGRATING;
+ wake = 1;
+ spin_unlock(&res->spinlock);
+ goto leave;
}
- }
- if (ret == -ERESTARTSYS) {
- /* migration failed, detach and clean up mle */
- dlm_mle_detach_hb_events(dlm, mle);
- dlm_put_mle(mle);
- dlm_put_mle_inuse(mle);
- spin_lock(&res->spinlock);
- res->state &= ~DLM_LOCK_RES_MIGRATING;
- spin_unlock(&res->spinlock);
- goto leave;
- }
- /* TODO: if node died: stop, clean up, return error */
+ } else
+ mlog(0, "%s:%.*s: caught signal during migration\n",
+ dlm->name, res->lockname.len, res->lockname.name);
}
/* all done, set the owner, clear the flag */
@@ -2366,6 +2700,11 @@ leave:
if (ret < 0)
dlm_kick_thread(dlm, res);
+ /* wake up waiters if the MIGRATING flag got set
+ * but migration failed */
+ if (wake)
+ wake_up(&res->wq);
+
/* TODO: cleanup */
if (mres)
free_page((unsigned long)mres);
@@ -2376,6 +2715,53 @@ leave:
return ret;
}
+#define DLM_MIGRATION_RETRY_MS 100
+
+/* Should be called only after beginning the domain leave process.
+ * There should not be any remaining locks on nonlocal lock resources,
+ * and there should be no local locks left on locally mastered resources.
+ *
+ * Called with the dlm spinlock held, may drop it to do migration, but
+ * will re-acquire before exit.
+ *
+ * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */
+int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
+{
+ int ret;
+ int lock_dropped = 0;
+
+ if (res->owner != dlm->node_num) {
+ if (!__dlm_lockres_unused(res)) {
+ mlog(ML_ERROR, "%s:%.*s: this node is not master, "
+ "trying to free this but locks remain\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ }
+ goto leave;
+ }
+
+ /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
+ spin_unlock(&dlm->spinlock);
+ lock_dropped = 1;
+ while (1) {
+ ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES);
+ if (ret >= 0)
+ break;
+ if (ret == -ENOTEMPTY) {
+ mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
+ res->lockname.len, res->lockname.name);
+ BUG();
+ }
+
+ mlog(0, "lockres %.*s: migrate failed, "
+ "retrying\n", res->lockname.len,
+ res->lockname.name);
+ msleep(DLM_MIGRATION_RETRY_MS);
+ }
+ spin_lock(&dlm->spinlock);
+leave:
+ return lock_dropped;
+}
+
int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock)
{
int ret;
@@ -2405,7 +2791,8 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
return can_proceed;
}
-int dlm_lockres_is_dirty(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
+static int dlm_lockres_is_dirty(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
{
int ret;
spin_lock(&res->spinlock);
@@ -2434,8 +2821,15 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm,
__dlm_lockres_reserve_ast(res);
spin_unlock(&res->spinlock);
- /* now flush all the pending asts.. hang out for a bit */
+ /* now flush all the pending asts */
dlm_kick_thread(dlm, res);
+ /* before waiting on DIRTY, block processes which may
+ * try to dirty the lockres before MIGRATING is set */
+ spin_lock(&res->spinlock);
+ BUG_ON(res->state & DLM_LOCK_RES_BLOCK_DIRTY);
+ res->state |= DLM_LOCK_RES_BLOCK_DIRTY;
+ spin_unlock(&res->spinlock);
+ /* now wait on any pending asts and the DIRTY state */
wait_event(dlm->ast_wq, !dlm_lockres_is_dirty(dlm, res));
dlm_lockres_release_ast(dlm, res);
@@ -2461,6 +2855,13 @@ again:
mlog(0, "trying again...\n");
goto again;
}
+ /* now that we are sure the MIGRATING state is there, drop
+ * the unneded state which blocked threads trying to DIRTY */
+ spin_lock(&res->spinlock);
+ BUG_ON(!(res->state & DLM_LOCK_RES_BLOCK_DIRTY));
+ BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING));
+ res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY;
+ spin_unlock(&res->spinlock);
/* did the target go down or die? */
spin_lock(&dlm->spinlock);
@@ -2490,7 +2891,7 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
{
struct list_head *iter, *iter2;
struct list_head *queue = &res->granted;
- int i;
+ int i, bit;
struct dlm_lock *lock;
assert_spin_locked(&res->spinlock);
@@ -2508,12 +2909,28 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
BUG_ON(!list_empty(&lock->bast_list));
BUG_ON(lock->ast_pending);
BUG_ON(lock->bast_pending);
+ dlm_lockres_clear_refmap_bit(lock->ml.node, res);
list_del_init(&lock->list);
dlm_lock_put(lock);
}
}
queue++;
}
+ bit = 0;
+ while (1) {
+ bit = find_next_bit(res->refmap, O2NM_MAX_NODES, bit);
+ if (bit >= O2NM_MAX_NODES)
+ break;
+ /* do not clear the local node reference, if there is a
+ * process holding this, let it drop the ref itself */
+ if (bit != dlm->node_num) {
+ mlog(0, "%s:%.*s: node %u had a ref to this "
+ "migrating lockres, clearing\n", dlm->name,
+ res->lockname.len, res->lockname.name, bit);
+ dlm_lockres_clear_refmap_bit(bit, res);
+ }
+ bit++;
+ }
}
/* for now this is not too intelligent. we will
@@ -2601,6 +3018,16 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
mlog(0, "migrate request (node %u) returned %d!\n",
nodenum, status);
ret = status;
+ } else if (status == DLM_MIGRATE_RESPONSE_MASTERY_REF) {
+ /* during the migration request we short-circuited
+ * the mastery of the lockres. make sure we have
+ * a mastery ref for nodenum */
+ mlog(0, "%s:%.*s: need ref for node %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ nodenum);
+ spin_lock(&res->spinlock);
+ dlm_lockres_set_refmap_bit(nodenum, res);
+ spin_unlock(&res->spinlock);
}
}
@@ -2619,7 +3046,8 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
* we will have no mle in the list to start with. now we can add an mle for
* the migration and this should be the only one found for those scanning the
* list. */
-int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_lock_resource *res = NULL;
@@ -2745,7 +3173,13 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* remove it from the list so that only one
* mle will be found */
list_del_init(&tmp->list);
- __dlm_mle_detach_hb_events(dlm, mle);
+ /* this was obviously WRONG. mle is uninited here. should be tmp. */
+ __dlm_mle_detach_hb_events(dlm, tmp);
+ ret = DLM_MIGRATE_RESPONSE_MASTERY_REF;
+ mlog(0, "%s:%.*s: master=%u, newmaster=%u, "
+ "telling master to get ref for cleared out mle "
+ "during migration\n", dlm->name, namelen, name,
+ master, new_master);
}
spin_unlock(&tmp->spinlock);
}
@@ -2753,6 +3187,8 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm,
/* now add a migration mle to the tail of the list */
dlm_init_mle(mle, DLM_MLE_MIGRATION, dlm, res, name, namelen);
mle->new_master = new_master;
+ /* the new master will be sending an assert master for this.
+ * at that point we will get the refmap reference */
mle->master = master;
/* do this for consistency with other mle types */
set_bit(new_master, mle->maybe_map);
@@ -2902,6 +3338,13 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
clear_bit(dlm->node_num, iter.node_map);
spin_unlock(&dlm->spinlock);
+ /* ownership of the lockres is changing. account for the
+ * mastery reference here since old_master will briefly have
+ * a reference after the migration completes */
+ spin_lock(&res->spinlock);
+ dlm_lockres_set_refmap_bit(old_master, res);
+ spin_unlock(&res->spinlock);
+
mlog(0, "now time to do a migrate request to other nodes\n");
ret = dlm_do_migrate_request(dlm, res, old_master,
dlm->node_num, &iter);
@@ -2914,8 +3357,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
res->lockname.len, res->lockname.name);
/* this call now finishes out the nodemap
* even if one or more nodes die */
- ret = dlm_do_assert_master(dlm, res->lockname.name,
- res->lockname.len, iter.node_map,
+ ret = dlm_do_assert_master(dlm, res, iter.node_map,
DLM_ASSERT_MASTER_FINISH_MIGRATION);
if (ret < 0) {
/* no longer need to retry. all living nodes contacted. */
@@ -2927,8 +3369,7 @@ int dlm_finish_migration(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
set_bit(old_master, iter.node_map);
mlog(0, "doing assert master of %.*s back to %u\n",
res->lockname.len, res->lockname.name, old_master);
- ret = dlm_do_assert_master(dlm, res->lockname.name,
- res->lockname.len, iter.node_map,
+ ret = dlm_do_assert_master(dlm, res, iter.node_map,
DLM_ASSERT_MASTER_FINISH_MIGRATION);
if (ret < 0) {
mlog(0, "assert master to original master failed "
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 367a11e9e2ed..6d4a83d50152 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -163,9 +163,6 @@ void dlm_dispatch_work(struct work_struct *work)
dlm_workfunc_t *workfunc;
int tot=0;
- if (!dlm_joined(dlm))
- return;
-
spin_lock(&dlm->work_lock);
list_splice_init(&dlm->work_list, &tmp_list);
spin_unlock(&dlm->work_lock);
@@ -821,7 +818,8 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
}
-int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_request_all_locks_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_lock_request *lr = (struct dlm_lock_request *)msg->buf;
@@ -978,7 +976,8 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
}
-int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_reco_data_done_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_reco_data_done *done = (struct dlm_reco_data_done *)msg->buf;
@@ -1129,6 +1128,11 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
if (total_locks == mres_total_locks)
mres->flags |= DLM_MRES_ALL_DONE;
+ mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery",
+ send_to);
+
/* send it */
ret = o2net_send_message(DLM_MIG_LOCKRES_MSG, dlm->key, mres,
sz, send_to, &status);
@@ -1213,6 +1217,34 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
return 0;
}
+static void dlm_add_dummy_lock(struct dlm_ctxt *dlm,
+ struct dlm_migratable_lockres *mres)
+{
+ struct dlm_lock dummy;
+ memset(&dummy, 0, sizeof(dummy));
+ dummy.ml.cookie = 0;
+ dummy.ml.type = LKM_IVMODE;
+ dummy.ml.convert_type = LKM_IVMODE;
+ dummy.ml.highest_blocked = LKM_IVMODE;
+ dummy.lksb = NULL;
+ dummy.ml.node = dlm->node_num;
+ dlm_add_lock_to_array(&dummy, mres, DLM_BLOCKED_LIST);
+}
+
+static inline int dlm_is_dummy_lock(struct dlm_ctxt *dlm,
+ struct dlm_migratable_lock *ml,
+ u8 *nodenum)
+{
+ if (unlikely(ml->cookie == 0 &&
+ ml->type == LKM_IVMODE &&
+ ml->convert_type == LKM_IVMODE &&
+ ml->highest_blocked == LKM_IVMODE &&
+ ml->list == DLM_BLOCKED_LIST)) {
+ *nodenum = ml->node;
+ return 1;
+ }
+ return 0;
+}
int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
struct dlm_migratable_lockres *mres,
@@ -1260,6 +1292,14 @@ int dlm_send_one_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
goto error;
}
}
+ if (total_locks == 0) {
+ /* send a dummy lock to indicate a mastery reference only */
+ mlog(0, "%s:%.*s: sending dummy lock to %u, %s\n",
+ dlm->name, res->lockname.len, res->lockname.name,
+ send_to, flags & DLM_MRES_RECOVERY ? "recovery" :
+ "migration");
+ dlm_add_dummy_lock(dlm, mres);
+ }
/* flush any remaining locks */
ret = dlm_send_mig_lockres_msg(dlm, mres, send_to, res, total_locks);
if (ret < 0)
@@ -1293,7 +1333,8 @@ error:
* do we spin? returning an error only delays the problem really
*/
-int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_migratable_lockres *mres =
@@ -1382,17 +1423,21 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
/* add an extra ref for just-allocated lockres
* otherwise the lockres will be purged immediately */
dlm_lockres_get(res);
-
}
/* at this point we have allocated everything we need,
* and we have a hashed lockres with an extra ref and
* the proper res->state flags. */
ret = 0;
+ spin_lock(&res->spinlock);
+ /* drop this either when master requery finds a different master
+ * or when a lock is added by the recovery worker */
+ dlm_lockres_grab_inflight_ref(dlm, res);
if (mres->master == DLM_LOCK_RES_OWNER_UNKNOWN) {
/* migration cannot have an unknown master */
BUG_ON(!(mres->flags & DLM_MRES_RECOVERY));
@@ -1400,10 +1445,11 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data)
"unknown owner.. will need to requery: "
"%.*s\n", mres->lockname_len, mres->lockname);
} else {
- spin_lock(&res->spinlock);
+ /* take a reference now to pin the lockres, drop it
+ * when locks are added in the worker */
dlm_change_lockres_owner(dlm, res, dlm->node_num);
- spin_unlock(&res->spinlock);
}
+ spin_unlock(&res->spinlock);
/* queue up work for dlm_mig_lockres_worker */
dlm_grab(dlm); /* get an extra ref for the work item */
@@ -1459,6 +1505,9 @@ again:
"this node will take it.\n",
res->lockname.len, res->lockname.name);
} else {
+ spin_lock(&res->spinlock);
+ dlm_lockres_drop_inflight_ref(dlm, res);
+ spin_unlock(&res->spinlock);
mlog(0, "master needs to respond to sender "
"that node %u still owns %.*s\n",
real_master, res->lockname.len,
@@ -1578,7 +1627,8 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
/* this function cannot error, so unless the sending
* or receiving of the message failed, the owner can
* be trusted */
-int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_master_requery *req = (struct dlm_master_requery *)msg->buf;
@@ -1660,21 +1710,38 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
{
struct dlm_migratable_lock *ml;
struct list_head *queue;
+ struct list_head *tmpq = NULL;
struct dlm_lock *newlock = NULL;
struct dlm_lockstatus *lksb = NULL;
int ret = 0;
- int i, bad;
+ int i, j, bad;
struct list_head *iter;
struct dlm_lock *lock = NULL;
+ u8 from = O2NM_MAX_NODES;
+ unsigned int added = 0;
mlog(0, "running %d locks for this lockres\n", mres->num_locks);
for (i=0; i<mres->num_locks; i++) {
ml = &(mres->ml[i]);
+
+ if (dlm_is_dummy_lock(dlm, ml, &from)) {
+ /* placeholder, just need to set the refmap bit */
+ BUG_ON(mres->num_locks != 1);
+ mlog(0, "%s:%.*s: dummy lock for %u\n",
+ dlm->name, mres->lockname_len, mres->lockname,
+ from);
+ spin_lock(&res->spinlock);
+ dlm_lockres_set_refmap_bit(from, res);
+ spin_unlock(&res->spinlock);
+ added++;
+ break;
+ }
BUG_ON(ml->highest_blocked != LKM_IVMODE);
newlock = NULL;
lksb = NULL;
queue = dlm_list_num_to_pointer(res, ml->list);
+ tmpq = NULL;
/* if the lock is for the local node it needs to
* be moved to the proper location within the queue.
@@ -1684,11 +1751,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
spin_lock(&res->spinlock);
- list_for_each(iter, queue) {
- lock = list_entry (iter, struct dlm_lock, list);
- if (lock->ml.cookie != ml->cookie)
- lock = NULL;
- else
+ for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
+ tmpq = dlm_list_idx_to_ptr(res, j);
+ list_for_each(iter, tmpq) {
+ lock = list_entry (iter, struct dlm_lock, list);
+ if (lock->ml.cookie != ml->cookie)
+ lock = NULL;
+ else
+ break;
+ }
+ if (lock)
break;
}
@@ -1698,12 +1770,20 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
u64 c = ml->cookie;
mlog(ML_ERROR, "could not find local lock "
"with cookie %u:%llu!\n",
- dlm_get_lock_cookie_node(c),
- dlm_get_lock_cookie_seq(c));
+ dlm_get_lock_cookie_node(be64_to_cpu(c)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)));
+ __dlm_print_one_lock_resource(res);
BUG();
}
BUG_ON(lock->ml.node != ml->node);
+ if (tmpq != queue) {
+ mlog(0, "lock was on %u instead of %u for %.*s\n",
+ j, ml->list, res->lockname.len, res->lockname.name);
+ spin_unlock(&res->spinlock);
+ continue;
+ }
+
/* see NOTE above about why we do not update
* to match the master here */
@@ -1711,6 +1791,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* do not alter lock refcount. switching lists. */
list_move_tail(&lock->list, queue);
spin_unlock(&res->spinlock);
+ added++;
mlog(0, "just reordered a local lock!\n");
continue;
@@ -1799,14 +1880,14 @@ skip_lvb:
mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
"exists on this lockres!\n", dlm->name,
res->lockname.len, res->lockname.name,
- dlm_get_lock_cookie_node(c),
- dlm_get_lock_cookie_seq(c));
+ dlm_get_lock_cookie_node(be64_to_cpu(c)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)));
mlog(ML_NOTICE, "sent lock: type=%d, conv=%d, "
"node=%u, cookie=%u:%llu, queue=%d\n",
ml->type, ml->convert_type, ml->node,
- dlm_get_lock_cookie_node(ml->cookie),
- dlm_get_lock_cookie_seq(ml->cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(ml->cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(ml->cookie)),
ml->list);
__dlm_print_one_lock_resource(res);
@@ -1817,12 +1898,22 @@ skip_lvb:
if (!bad) {
dlm_lock_get(newlock);
list_add_tail(&newlock->list, queue);
+ mlog(0, "%s:%.*s: added lock for node %u, "
+ "setting refmap bit\n", dlm->name,
+ res->lockname.len, res->lockname.name, ml->node);
+ dlm_lockres_set_refmap_bit(ml->node, res);
+ added++;
}
spin_unlock(&res->spinlock);
}
mlog(0, "done running all the locks\n");
leave:
+ /* balance the ref taken when the work was queued */
+ spin_lock(&res->spinlock);
+ dlm_lockres_drop_inflight_ref(dlm, res);
+ spin_unlock(&res->spinlock);
+
if (ret < 0) {
mlog_errno(ret);
if (newlock)
@@ -1935,9 +2026,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
if (res->owner == dead_node) {
list_del_init(&res->recovering);
spin_lock(&res->spinlock);
+ /* new_master has our reference from
+ * the lock state sent during recovery */
dlm_change_lockres_owner(dlm, res, new_master);
res->state &= ~DLM_LOCK_RES_RECOVERING;
- if (!__dlm_lockres_unused(res))
+ if (__dlm_lockres_has_locks(res))
__dlm_dirty_lockres(dlm, res);
spin_unlock(&res->spinlock);
wake_up(&res->wq);
@@ -1977,9 +2070,11 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
dlm_lockres_put(res);
}
spin_lock(&res->spinlock);
+ /* new_master has our reference from
+ * the lock state sent during recovery */
dlm_change_lockres_owner(dlm, res, new_master);
res->state &= ~DLM_LOCK_RES_RECOVERING;
- if (!__dlm_lockres_unused(res))
+ if (__dlm_lockres_has_locks(res))
__dlm_dirty_lockres(dlm, res);
spin_unlock(&res->spinlock);
wake_up(&res->wq);
@@ -2048,6 +2143,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
{
struct list_head *iter, *tmpiter;
struct dlm_lock *lock;
+ unsigned int freed = 0;
/* this node is the lockres master:
* 1) remove any stale locks for the dead node
@@ -2062,6 +2158,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);
+ freed++;
}
}
list_for_each_safe(iter, tmpiter, &res->converting) {
@@ -2069,6 +2166,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);
+ freed++;
}
}
list_for_each_safe(iter, tmpiter, &res->blocked) {
@@ -2076,9 +2174,23 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
if (lock->ml.node == dead_node) {
list_del_init(&lock->list);
dlm_lock_put(lock);
+ freed++;
}
}
+ if (freed) {
+ mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
+ "dropping ref from lockres\n", dlm->name,
+ res->lockname.len, res->lockname.name, freed, dead_node);
+ BUG_ON(!test_bit(dead_node, res->refmap));
+ dlm_lockres_clear_refmap_bit(dead_node, res);
+ } else if (test_bit(dead_node, res->refmap)) {
+ mlog(0, "%s:%.*s: dead node %u had a ref, but had "
+ "no locks and had not purged before dying\n", dlm->name,
+ res->lockname.len, res->lockname.name, dead_node);
+ dlm_lockres_clear_refmap_bit(dead_node, res);
+ }
+
/* do not kick thread yet */
__dlm_dirty_lockres(dlm, res);
}
@@ -2141,9 +2253,21 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
spin_lock(&res->spinlock);
/* zero the lvb if necessary */
dlm_revalidate_lvb(dlm, res, dead_node);
- if (res->owner == dead_node)
+ if (res->owner == dead_node) {
+ if (res->state & DLM_LOCK_RES_DROPPING_REF)
+ mlog(0, "%s:%.*s: owned by "
+ "dead node %u, this node was "
+ "dropping its ref when it died. "
+ "continue, dropping the flag.\n",
+ dlm->name, res->lockname.len,
+ res->lockname.name, dead_node);
+
+ /* the wake_up for this will happen when the
+ * RECOVERING flag is dropped later */
+ res->state &= ~DLM_LOCK_RES_DROPPING_REF;
+
dlm_move_lockres_to_recovery_list(dlm, res);
- else if (res->owner == dlm->node_num) {
+ } else if (res->owner == dlm->node_num) {
dlm_free_dead_locks(dlm, res, dead_node);
__dlm_lockres_calc_usage(dlm, res);
}
@@ -2480,7 +2604,8 @@ retry:
return ret;
}
-int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_begin_reco *br = (struct dlm_begin_reco *)msg->buf;
@@ -2608,7 +2733,8 @@ stage2:
return ret;
}
-int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_finalize_reco *fr = (struct dlm_finalize_reco *)msg->buf;
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 0c822f3ffb05..8ffa0916eb86 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -54,9 +54,6 @@
#include "cluster/masklog.h"
static int dlm_thread(void *data);
-static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *lockres);
-
static void dlm_flush_asts(struct dlm_ctxt *dlm);
#define dlm_lock_is_remote(dlm, lock) ((lock)->ml.node != (dlm)->node_num)
@@ -82,14 +79,33 @@ repeat:
current->state = TASK_RUNNING;
}
-
-int __dlm_lockres_unused(struct dlm_lock_resource *res)
+int __dlm_lockres_has_locks(struct dlm_lock_resource *res)
{
if (list_empty(&res->granted) &&
list_empty(&res->converting) &&
- list_empty(&res->blocked) &&
- list_empty(&res->dirty))
- return 1;
+ list_empty(&res->blocked))
+ return 0;
+ return 1;
+}
+
+/* "unused": the lockres has no locks, is not on the dirty list,
+ * has no inflight locks (in the gap between mastery and acquiring
+ * the first lock), and has no bits in its refmap.
+ * truly ready to be freed. */
+int __dlm_lockres_unused(struct dlm_lock_resource *res)
+{
+ if (!__dlm_lockres_has_locks(res) &&
+ (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) {
+ /* try not to scan the bitmap unless the first two
+ * conditions are already true */
+ int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+ if (bit >= O2NM_MAX_NODES) {
+ /* since the bit for dlm->node_num is not
+ * set, inflight_locks better be zero */
+ BUG_ON(res->inflight_locks != 0);
+ return 1;
+ }
+ }
return 0;
}
@@ -106,46 +122,21 @@ void __dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
if (__dlm_lockres_unused(res)){
- /* For now, just keep any resource we master */
- if (res->owner == dlm->node_num)
- {
- if (!list_empty(&res->purge)) {
- mlog(0, "we master %s:%.*s, but it is on "
- "the purge list. Removing\n",
- dlm->name, res->lockname.len,
- res->lockname.name);
- list_del_init(&res->purge);
- dlm->purge_count--;
- }
- return;
- }
-
if (list_empty(&res->purge)) {
- mlog(0, "putting lockres %.*s from purge list\n",
- res->lockname.len, res->lockname.name);
+ mlog(0, "putting lockres %.*s:%p onto purge list\n",
+ res->lockname.len, res->lockname.name, res);
res->last_used = jiffies;
+ dlm_lockres_get(res);
list_add_tail(&res->purge, &dlm->purge_list);
dlm->purge_count++;
-
- /* if this node is not the owner, there is
- * no way to keep track of who the owner could be.
- * unhash it to avoid serious problems. */
- if (res->owner != dlm->node_num) {
- mlog(0, "%s:%.*s: doing immediate "
- "purge of lockres owned by %u\n",
- dlm->name, res->lockname.len,
- res->lockname.name, res->owner);
-
- dlm_purge_lockres_now(dlm, res);
- }
}
} else if (!list_empty(&res->purge)) {
- mlog(0, "removing lockres %.*s from purge list, "
- "owner=%u\n", res->lockname.len, res->lockname.name,
- res->owner);
+ mlog(0, "removing lockres %.*s:%p from purge list, owner=%u\n",
+ res->lockname.len, res->lockname.name, res, res->owner);
list_del_init(&res->purge);
+ dlm_lockres_put(res);
dlm->purge_count--;
}
}
@@ -163,68 +154,65 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm,
spin_unlock(&dlm->spinlock);
}
-/* TODO: Eventual API: Called with the dlm spinlock held, may drop it
- * to do migration, but will re-acquire before exit. */
-void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *lockres)
+static int dlm_purge_lockres(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
{
int master;
- int ret;
-
- spin_lock(&lockres->spinlock);
- master = lockres->owner == dlm->node_num;
- spin_unlock(&lockres->spinlock);
+ int ret = 0;
- mlog(0, "purging lockres %.*s, master = %d\n", lockres->lockname.len,
- lockres->lockname.name, master);
-
- /* Non master is the easy case -- no migration required, just
- * quit. */
+ spin_lock(&res->spinlock);
+ if (!__dlm_lockres_unused(res)) {
+ spin_unlock(&res->spinlock);
+ mlog(0, "%s:%.*s: tried to purge but not unused\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ return -ENOTEMPTY;
+ }
+ master = (res->owner == dlm->node_num);
if (!master)
- goto finish;
-
- /* Wheee! Migrate lockres here! */
- spin_unlock(&dlm->spinlock);
-again:
+ res->state |= DLM_LOCK_RES_DROPPING_REF;
+ spin_unlock(&res->spinlock);
- ret = dlm_migrate_lockres(dlm, lockres, O2NM_MAX_NODES);
- if (ret == -ENOTEMPTY) {
- mlog(ML_ERROR, "lockres %.*s still has local locks!\n",
- lockres->lockname.len, lockres->lockname.name);
+ mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len,
+ res->lockname.name, master);
- BUG();
- } else if (ret < 0) {
- mlog(ML_NOTICE, "lockres %.*s: migrate failed, retrying\n",
- lockres->lockname.len, lockres->lockname.name);
- msleep(100);
- goto again;
+ if (!master) {
+ spin_lock(&res->spinlock);
+ /* This ensures that clear refmap is sent after the set */
+ __dlm_wait_on_lockres_flags(res, DLM_LOCK_RES_SETREF_INPROG);
+ spin_unlock(&res->spinlock);
+ /* drop spinlock to do messaging, retake below */
+ spin_unlock(&dlm->spinlock);
+ /* clear our bit from the master's refmap, ignore errors */
+ ret = dlm_drop_lockres_ref(dlm, res);
+ if (ret < 0) {
+ mlog_errno(ret);
+ if (!dlm_is_host_down(ret))
+ BUG();
+ }
+ mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n",
+ dlm->name, res->lockname.len, res->lockname.name, ret);
+ spin_lock(&dlm->spinlock);
}
- spin_lock(&dlm->spinlock);
-
-finish:
- if (!list_empty(&lockres->purge)) {
- list_del_init(&lockres->purge);
+ if (!list_empty(&res->purge)) {
+ mlog(0, "removing lockres %.*s:%p from purgelist, "
+ "master = %d\n", res->lockname.len, res->lockname.name,
+ res, master);
+ list_del_init(&res->purge);
+ dlm_lockres_put(res);
dlm->purge_count--;
}
- __dlm_unhash_lockres(lockres);
-}
-
-/* make an unused lockres go away immediately.
- * as soon as the dlm spinlock is dropped, this lockres
- * will not be found. kfree still happens on last put. */
-static void dlm_purge_lockres_now(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *lockres)
-{
- assert_spin_locked(&dlm->spinlock);
- assert_spin_locked(&lockres->spinlock);
+ __dlm_unhash_lockres(res);
- BUG_ON(!__dlm_lockres_unused(lockres));
-
- if (!list_empty(&lockres->purge)) {
- list_del_init(&lockres->purge);
- dlm->purge_count--;
+ /* lockres is not in the hash now. drop the flag and wake up
+ * any processes waiting in dlm_get_lock_resource. */
+ if (!master) {
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_DROPPING_REF;
+ spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
}
- __dlm_unhash_lockres(lockres);
+ return 0;
}
static void dlm_run_purge_list(struct dlm_ctxt *dlm,
@@ -268,13 +256,17 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm,
break;
}
+ mlog(0, "removing lockres %.*s:%p from purgelist\n",
+ lockres->lockname.len, lockres->lockname.name, lockres);
list_del_init(&lockres->purge);
+ dlm_lockres_put(lockres);
dlm->purge_count--;
/* This may drop and reacquire the dlm spinlock if it
* has to do migration. */
mlog(0, "calling dlm_purge_lockres!\n");
- dlm_purge_lockres(dlm, lockres);
+ if (dlm_purge_lockres(dlm, lockres))
+ BUG();
mlog(0, "DONE calling dlm_purge_lockres!\n");
/* Avoid adding any scheduling latencies */
@@ -467,12 +459,17 @@ void __dlm_dirty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
assert_spin_locked(&res->spinlock);
/* don't shuffle secondary queues */
- if ((res->owner == dlm->node_num) &&
- !(res->state & DLM_LOCK_RES_DIRTY)) {
- /* ref for dirty_list */
- dlm_lockres_get(res);
- list_add_tail(&res->dirty, &dlm->dirty_list);
- res->state |= DLM_LOCK_RES_DIRTY;
+ if ((res->owner == dlm->node_num)) {
+ if (res->state & (DLM_LOCK_RES_MIGRATING |
+ DLM_LOCK_RES_BLOCK_DIRTY))
+ return;
+
+ if (list_empty(&res->dirty)) {
+ /* ref for dirty_list */
+ dlm_lockres_get(res);
+ list_add_tail(&res->dirty, &dlm->dirty_list);
+ res->state |= DLM_LOCK_RES_DIRTY;
+ }
}
}
@@ -651,7 +648,7 @@ static int dlm_thread(void *data)
dlm_lockres_get(res);
spin_lock(&res->spinlock);
- res->state &= ~DLM_LOCK_RES_DIRTY;
+ /* We clear the DLM_LOCK_RES_DIRTY state once we shuffle lists below */
list_del_init(&res->dirty);
spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
@@ -675,10 +672,11 @@ static int dlm_thread(void *data)
/* it is now ok to move lockreses in these states
* to the dirty list, assuming that they will only be
* dirty for a short while. */
+ BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
if (res->state & (DLM_LOCK_RES_IN_PROGRESS |
- DLM_LOCK_RES_MIGRATING |
DLM_LOCK_RES_RECOVERING)) {
/* move it to the tail and keep going */
+ res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
mlog(0, "delaying list shuffling for in-"
"progress lockres %.*s, state=%d\n",
@@ -699,6 +697,7 @@ static int dlm_thread(void *data)
/* called while holding lockres lock */
dlm_shuffle_lists(dlm, res);
+ res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
dlm_lockres_calc_usage(dlm, res);
@@ -709,11 +708,8 @@ in_progress:
/* if the lock was in-progress, stick
* it on the back of the list */
if (delay) {
- /* ref for dirty_list */
- dlm_lockres_get(res);
spin_lock(&res->spinlock);
- list_add_tail(&res->dirty, &dlm->dirty_list);
- res->state |= DLM_LOCK_RES_DIRTY;
+ __dlm_dirty_lockres(dlm, res);
spin_unlock(&res->spinlock);
}
dlm_lockres_put(res);
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 37be4b2e0d4a..86ca085ef324 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -147,6 +147,10 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
goto leave;
}
+ if (res->state & DLM_LOCK_RES_MIGRATING) {
+ status = DLM_MIGRATING;
+ goto leave;
+ }
/* see above for what the spec says about
* LKM_CANCEL and the lock queue state */
@@ -244,8 +248,8 @@ leave:
/* this should always be coupled with list removal */
BUG_ON(!(actions & DLM_UNLOCK_REMOVE_LOCK));
mlog(0, "lock %u:%llu should be gone now! refs=%d\n",
- dlm_get_lock_cookie_node(lock->ml.cookie),
- dlm_get_lock_cookie_seq(lock->ml.cookie),
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
atomic_read(&lock->lock_refs.refcount)-1);
dlm_lock_put(lock);
}
@@ -379,7 +383,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
* returns: DLM_NORMAL, DLM_BADARGS, DLM_IVLOCKID,
* return value from dlmunlock_master
*/
-int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data)
+int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
{
struct dlm_ctxt *dlm = data;
struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
@@ -502,8 +507,8 @@ not_found:
if (!found)
mlog(ML_ERROR, "failed to find lock to unlock! "
"cookie=%u:%llu\n",
- dlm_get_lock_cookie_node(unlock->cookie),
- dlm_get_lock_cookie_seq(unlock->cookie));
+ dlm_get_lock_cookie_node(be64_to_cpu(unlock->cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(unlock->cookie)));
else
dlm_lock_put(lock);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 10953a508f2f..f2cd3bf9efb2 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1365,13 +1365,13 @@ bail:
return ret;
}
-struct inode_operations ocfs2_file_iops = {
+const struct inode_operations ocfs2_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
};
-struct inode_operations ocfs2_special_file_iops = {
+const struct inode_operations ocfs2_special_file_iops = {
.setattr = ocfs2_setattr,
.getattr = ocfs2_getattr,
.permission = ocfs2_permission,
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 601a453f18a8..cc973f01f6ce 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,8 +28,8 @@
extern const struct file_operations ocfs2_fops;
extern const struct file_operations ocfs2_dops;
-extern struct inode_operations ocfs2_file_iops;
-extern struct inode_operations ocfs2_special_file_iops;
+extern const struct inode_operations ocfs2_file_iops;
+extern const struct inode_operations ocfs2_special_file_iops;
struct ocfs2_alloc_context;
enum ocfs2_alloc_restarted {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index e1216364d191..d026b4f27757 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -306,8 +306,8 @@ int ocfs2_journal_dirty_data(handle_t *handle,
* for the dinode, one for the new block. */
#define OCFS2_SIMPLE_DIR_EXTEND_CREDITS (2)
-/* file update (nlink, etc) + dir entry block */
-#define OCFS2_LINK_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
+/* file update (nlink, etc) + directory mtime/ctime + dir entry block */
+#define OCFS2_LINK_CREDITS (2*OCFS2_INODE_UPDATE_CREDITS + 1)
/* inode + dir inode (if we unlink a dir), + dir entry block + orphan
* dir inode link */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f3d7803b4b46..28dd757ff67d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1098,7 +1098,7 @@ static int ocfs2_rename(struct inode *old_dir,
BUG();
}
- /* Assume a directory heirarchy thusly:
+ /* Assume a directory hierarchy thusly:
* a/b/c
* a/d
* a,b,c, and d are all directories.
@@ -2306,7 +2306,7 @@ leave:
return status;
}
-struct inode_operations ocfs2_dir_iops = {
+const struct inode_operations ocfs2_dir_iops = {
.create = ocfs2_create,
.lookup = ocfs2_lookup,
.link = ocfs2_link,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 8425944fcccd..0975c7b7212b 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -26,7 +26,7 @@
#ifndef OCFS2_NAMEI_H
#define OCFS2_NAMEI_H
-extern struct inode_operations ocfs2_dir_iops;
+extern const struct inode_operations ocfs2_dir_iops;
struct dentry *ocfs2_get_parent(struct dentry *child);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 6e300a88a47e..6534f92424dd 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -116,7 +116,7 @@ static void ocfs2_destroy_inode(struct inode *inode);
static unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
-static struct super_operations ocfs2_sops = {
+static const struct super_operations ocfs2_sops = {
.statfs = ocfs2_statfs,
.alloc_inode = ocfs2_alloc_inode,
.destroy_inode = ocfs2_destroy_inode,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 03b0191534d5..40dc1a51f4a9 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -170,12 +170,12 @@ bail:
return ERR_PTR(status);
}
-struct inode_operations ocfs2_symlink_inode_operations = {
+const struct inode_operations ocfs2_symlink_inode_operations = {
.readlink = page_readlink,
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
};
-struct inode_operations ocfs2_fast_symlink_inode_operations = {
+const struct inode_operations ocfs2_fast_symlink_inode_operations = {
.readlink = ocfs2_readlink,
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h
index 1ea9e4d9e9eb..65a6c9c6ad51 100644
--- a/fs/ocfs2/symlink.h
+++ b/fs/ocfs2/symlink.h
@@ -26,8 +26,8 @@
#ifndef OCFS2_SYMLINK_H
#define OCFS2_SYMLINK_H
-extern struct inode_operations ocfs2_symlink_inode_operations;
-extern struct inode_operations ocfs2_fast_symlink_inode_operations;
+extern const struct inode_operations ocfs2_symlink_inode_operations;
+extern const struct inode_operations ocfs2_fast_symlink_inode_operations;
/*
* Test whether an inode is a fast symlink.
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c
index 0afd8b9af70f..f30e63b9910c 100644
--- a/fs/ocfs2/vote.c
+++ b/fs/ocfs2/vote.c
@@ -887,7 +887,7 @@ static inline int ocfs2_translate_response(int response)
static int ocfs2_handle_response_message(struct o2net_msg *msg,
u32 len,
- void *data)
+ void *data, void **ret_data)
{
unsigned int response_id, node_num;
int response_status;
@@ -943,7 +943,7 @@ bail:
static int ocfs2_handle_vote_message(struct o2net_msg *msg,
u32 len,
- void *data)
+ void *data, void **ret_data)
{
int status;
struct ocfs2_super *osb = data;
@@ -1007,7 +1007,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
osb->net_key,
sizeof(struct ocfs2_response_msg),
ocfs2_handle_response_message,
- osb, &osb->osb_net_handlers);
+ osb, NULL, &osb->osb_net_handlers);
if (status) {
mlog_errno(status);
goto bail;
@@ -1017,7 +1017,7 @@ int ocfs2_register_net_handlers(struct ocfs2_super *osb)
osb->net_key,
sizeof(struct ocfs2_vote_msg),
ocfs2_handle_vote_message,
- osb, &osb->osb_net_handlers);
+ osb, NULL, &osb->osb_net_handlers);
if (status) {
mlog_errno(status);
goto bail;
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 99c0bc37ba09..bde1c164417d 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -169,7 +169,7 @@ static const struct file_operations openprom_operations = {
static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
-static struct inode_operations openprom_inode_operations = {
+static const struct inode_operations openprom_inode_operations = {
.lookup = openpromfs_lookup,
};
@@ -364,7 +364,7 @@ static int openprom_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations openprom_sops = {
+static const struct super_operations openprom_sops = {
.alloc_inode = openprom_alloc_inode,
.destroy_inode = openprom_destroy_inode,
.read_inode = openprom_read_inode,
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 3d73d94d93a7..22d38ffc9ef0 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -358,14 +358,13 @@ void delete_partition(struct gendisk *disk, int part)
p->ios[0] = p->ios[1] = 0;
p->sectors[0] = p->sectors[1] = 0;
sysfs_remove_link(&p->kobj, "subsystem");
- if (p->holder_dir)
- kobject_unregister(p->holder_dir);
+ kobject_unregister(p->holder_dir);
kobject_uevent(&p->kobj, KOBJ_REMOVE);
kobject_del(&p->kobj);
kobject_put(&p->kobj);
}
-void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
+void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
{
struct hd_struct *p;
@@ -390,6 +389,15 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len)
if (!disk->part_uevent_suppress)
kobject_uevent(&p->kobj, KOBJ_ADD);
sysfs_create_link(&p->kobj, &block_subsys.kset.kobj, "subsystem");
+ if (flags & ADDPART_FLAG_WHOLEDISK) {
+ static struct attribute addpartattr = {
+ .name = "whole_disk",
+ .mode = S_IRUSR | S_IRGRP | S_IROTH,
+ .owner = THIS_MODULE,
+ };
+
+ sysfs_create_file(&p->kobj, &addpartattr);
+ }
partition_sysfs_add_subdir(p);
disk->part[part-1] = p;
}
@@ -543,9 +551,9 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
printk(" %s: p%d exceeds device capacity\n",
disk->disk_name, p);
}
- add_partition(disk, p, from, size);
+ add_partition(disk, p, from, size, state->parts[p].flags);
#ifdef CONFIG_BLK_DEV_MD
- if (state->parts[p].flags)
+ if (state->parts[p].flags & ADDPART_FLAG_RAID)
md_autodetect_dev(bdev->bd_dev+p);
#endif
}
@@ -594,10 +602,8 @@ void del_gendisk(struct gendisk *disk)
disk->stamp = 0;
kobject_uevent(&disk->kobj, KOBJ_REMOVE);
- if (disk->holder_dir)
- kobject_unregister(disk->holder_dir);
- if (disk->slave_dir)
- kobject_unregister(disk->slave_dir);
+ kobject_unregister(disk->holder_dir);
+ kobject_unregister(disk->slave_dir);
if (disk->driverfs_dev) {
char *disk_name = make_block_name(disk);
sysfs_remove_link(&disk->kobj, "device");
diff --git a/fs/partitions/msdos.c b/fs/partitions/msdos.c
index 8c7af1777819..4ccec4cd1367 100644
--- a/fs/partitions/msdos.c
+++ b/fs/partitions/msdos.c
@@ -63,15 +63,25 @@ msdos_magic_present(unsigned char *p)
#define AIX_LABEL_MAGIC4 0xC1
static int aix_magic_present(unsigned char *p, struct block_device *bdev)
{
+ struct partition *pt = (struct partition *) (p + 0x1be);
Sector sect;
unsigned char *d;
- int ret = 0;
+ int slot, ret = 0;
- if (p[0] != AIX_LABEL_MAGIC1 &&
- p[1] != AIX_LABEL_MAGIC2 &&
- p[2] != AIX_LABEL_MAGIC3 &&
- p[3] != AIX_LABEL_MAGIC4)
+ if (!(p[0] == AIX_LABEL_MAGIC1 &&
+ p[1] == AIX_LABEL_MAGIC2 &&
+ p[2] == AIX_LABEL_MAGIC3 &&
+ p[3] == AIX_LABEL_MAGIC4))
return 0;
+ /* Assume the partition table is valid if Linux partitions exists */
+ for (slot = 1; slot <= 4; slot++, pt++) {
+ if (pt->sys_ind == LINUX_SWAP_PARTITION ||
+ pt->sys_ind == LINUX_RAID_PARTITION ||
+ pt->sys_ind == LINUX_DATA_PARTITION ||
+ pt->sys_ind == LINUX_LVM_PARTITION ||
+ is_extended_partition(pt))
+ return 0;
+ }
d = read_dev_sector(bdev, 7, &sect);
if (d) {
if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M')
@@ -155,7 +165,7 @@ parse_extended(struct parsed_partitions *state, struct block_device *bdev,
put_partition(state, state->next, next, size);
if (SYS_IND(p) == LINUX_RAID_PARTITION)
- state->parts[state->next].flags = 1;
+ state->parts[state->next].flags = ADDPART_FLAG_RAID;
loopct = 0;
if (++state->next == state->limit)
goto done;
diff --git a/fs/partitions/sgi.c b/fs/partitions/sgi.c
index 6fa4ff895104..ed5ac83fe83a 100644
--- a/fs/partitions/sgi.c
+++ b/fs/partitions/sgi.c
@@ -72,7 +72,7 @@ int sgi_partition(struct parsed_partitions *state, struct block_device *bdev)
if (blocks) {
put_partition(state, slot, start, blocks);
if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION)
- state->parts[slot].flags = 1;
+ state->parts[slot].flags = ADDPART_FLAG_RAID;
}
slot++;
}
diff --git a/fs/partitions/sun.c b/fs/partitions/sun.c
index 0a5927c806ca..123f8b46c8ba 100644
--- a/fs/partitions/sun.c
+++ b/fs/partitions/sun.c
@@ -80,8 +80,11 @@ int sun_partition(struct parsed_partitions *state, struct block_device *bdev)
num_sectors = be32_to_cpu(p->num_sectors);
if (num_sectors) {
put_partition(state, slot, st_sector, num_sectors);
+ state->parts[slot].flags = 0;
if (label->infos[i].id == LINUX_RAID_PARTITION)
- state->parts[slot].flags = 1;
+ state->parts[slot].flags |= ADDPART_FLAG_RAID;
+ if (label->infos[i].id == SUN_WHOLE_DISK)
+ state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK;
}
slot++;
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 68090e84f589..ebafde7d6aba 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -16,6 +16,7 @@
#include <linux/uio.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
+#include <linux/audit.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
@@ -985,6 +986,10 @@ int do_pipe(int *fd)
goto err_fdr;
fdw = error;
+ error = audit_fd_pair(fdr, fdw);
+ if (error < 0)
+ goto err_fdw;
+
fd_install(fdr, fr);
fd_install(fdw, fw);
fd[0] = fdr;
@@ -992,6 +997,8 @@ int do_pipe(int *fd)
return 0;
+ err_fdw:
+ put_unused_fd(fdw);
err_fdr:
put_unused_fd(fdr);
err_read_pipe:
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index f6c776272572..a6b3a8f878f0 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -8,7 +8,7 @@ proc-y := nommu.o task_nommu.o
proc-$(CONFIG_MMU) := mmu.o task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
- proc_tty.o proc_misc.o
+ proc_tty.o proc_misc.o proc_sysctl.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 70e4fab117b1..07c9cdbcdcac 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -351,7 +351,7 @@ static int do_task_stat(struct task_struct *task, char * buffer, int whole)
struct signal_struct *sig = task->signal;
if (sig->tty) {
- tty_pgrp = sig->tty->pgrp;
+ tty_pgrp = pid_nr(sig->tty->pgrp);
tty_nr = new_encode_dev(tty_devnum(sig->tty));
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1a979ea3b379..4f5745af8c19 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -93,8 +93,8 @@ struct pid_entry {
int len;
char *name;
mode_t mode;
- struct inode_operations *iop;
- struct file_operations *fop;
+ const struct inode_operations *iop;
+ const struct file_operations *fop;
union proc_op op;
};
@@ -352,7 +352,7 @@ static int proc_setattr(struct dentry *dentry, struct iattr *attr)
return error;
}
-static struct inode_operations proc_def_inode_operations = {
+static const struct inode_operations proc_def_inode_operations = {
.setattr = proc_setattr,
};
@@ -424,7 +424,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait)
return res;
}
-static struct file_operations proc_mounts_operations = {
+static const struct file_operations proc_mounts_operations = {
.open = mounts_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -462,7 +462,7 @@ static int mountstats_open(struct inode *inode, struct file *file)
return ret;
}
-static struct file_operations proc_mountstats_operations = {
+static const struct file_operations proc_mountstats_operations = {
.open = mountstats_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -501,7 +501,7 @@ out_no_task:
return length;
}
-static struct file_operations proc_info_file_operations = {
+static const struct file_operations proc_info_file_operations = {
.read = proc_info_read,
};
@@ -646,7 +646,7 @@ static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
return file->f_pos;
}
-static struct file_operations proc_mem_operations = {
+static const struct file_operations proc_mem_operations = {
.llseek = mem_lseek,
.read = mem_read,
.write = mem_write,
@@ -710,7 +710,7 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
return end - buffer;
}
-static struct file_operations proc_oom_adjust_operations = {
+static const struct file_operations proc_oom_adjust_operations = {
.read = oom_adjust_read,
.write = oom_adjust_write,
};
@@ -777,7 +777,7 @@ out_free_page:
return length;
}
-static struct file_operations proc_loginuid_operations = {
+static const struct file_operations proc_loginuid_operations = {
.read = proc_loginuid_read,
.write = proc_loginuid_write,
};
@@ -849,7 +849,7 @@ out_no_task:
return result;
}
-static struct file_operations proc_seccomp_operations = {
+static const struct file_operations proc_seccomp_operations = {
.read = seccomp_read,
.write = seccomp_write,
};
@@ -908,7 +908,7 @@ static ssize_t proc_fault_inject_write(struct file * file,
return end - buffer;
}
-static struct file_operations proc_fault_inject_operations = {
+static const struct file_operations proc_fault_inject_operations = {
.read = proc_fault_inject_read,
.write = proc_fault_inject_write,
};
@@ -980,7 +980,7 @@ out:
return error;
}
-static struct inode_operations proc_pid_link_inode_operations = {
+static const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
.follow_link = proc_pid_follow_link,
.setattr = proc_setattr,
@@ -1408,7 +1408,7 @@ out_no_task:
return retval;
}
-static struct file_operations proc_fd_operations = {
+static const struct file_operations proc_fd_operations = {
.read = generic_read_dir,
.readdir = proc_readfd,
};
@@ -1416,7 +1416,7 @@ static struct file_operations proc_fd_operations = {
/*
* proc directories can do almost nothing..
*/
-static struct inode_operations proc_fd_inode_operations = {
+static const struct inode_operations proc_fd_inode_operations = {
.lookup = proc_lookupfd,
.setattr = proc_setattr,
};
@@ -1623,7 +1623,7 @@ out_no_task:
return length;
}
-static struct file_operations proc_pid_attr_operations = {
+static const struct file_operations proc_pid_attr_operations = {
.read = proc_pid_attr_read,
.write = proc_pid_attr_write,
};
@@ -1644,7 +1644,7 @@ static int proc_attr_dir_readdir(struct file * filp,
attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
}
-static struct file_operations proc_attr_dir_operations = {
+static const struct file_operations proc_attr_dir_operations = {
.read = generic_read_dir,
.readdir = proc_attr_dir_readdir,
};
@@ -1656,7 +1656,7 @@ static struct dentry *proc_attr_dir_lookup(struct inode *dir,
attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
}
-static struct inode_operations proc_attr_dir_inode_operations = {
+static const struct inode_operations proc_attr_dir_inode_operations = {
.lookup = proc_attr_dir_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
@@ -1682,7 +1682,7 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
return ERR_PTR(vfs_follow_link(nd,tmp));
}
-static struct inode_operations proc_self_inode_operations = {
+static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
.follow_link = proc_self_follow_link,
};
@@ -1810,17 +1810,21 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, filldir_t filld
static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
{
return sprintf(buffer,
+#ifdef CONFIG_TASK_XACCT
"rchar: %llu\n"
"wchar: %llu\n"
"syscr: %llu\n"
"syscw: %llu\n"
+#endif
"read_bytes: %llu\n"
"write_bytes: %llu\n"
"cancelled_write_bytes: %llu\n",
+#ifdef CONFIG_TASK_XACCT
(unsigned long long)task->rchar,
(unsigned long long)task->wchar,
(unsigned long long)task->syscr,
(unsigned long long)task->syscw,
+#endif
(unsigned long long)task->ioac.read_bytes,
(unsigned long long)task->ioac.write_bytes,
(unsigned long long)task->ioac.cancelled_write_bytes);
@@ -1830,8 +1834,8 @@ static int proc_pid_io_accounting(struct task_struct *task, char *buffer)
/*
* Thread groups
*/
-static struct file_operations proc_task_operations;
-static struct inode_operations proc_task_inode_operations;
+static const struct file_operations proc_task_operations;
+static const struct inode_operations proc_task_inode_operations;
static struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, task),
@@ -1890,7 +1894,7 @@ static int proc_tgid_base_readdir(struct file * filp,
tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
}
-static struct file_operations proc_tgid_base_operations = {
+static const struct file_operations proc_tgid_base_operations = {
.read = generic_read_dir,
.readdir = proc_tgid_base_readdir,
};
@@ -1900,7 +1904,7 @@ static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *de
tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
-static struct inode_operations proc_tgid_base_inode_operations = {
+static const struct inode_operations proc_tgid_base_inode_operations = {
.lookup = proc_tgid_base_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
@@ -2173,12 +2177,12 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
}
-static struct file_operations proc_tid_base_operations = {
+static const struct file_operations proc_tid_base_operations = {
.read = generic_read_dir,
.readdir = proc_tid_base_readdir,
};
-static struct inode_operations proc_tid_base_inode_operations = {
+static const struct inode_operations proc_tid_base_inode_operations = {
.lookup = proc_tid_base_lookup,
.getattr = pid_getattr,
.setattr = proc_setattr,
@@ -2404,13 +2408,13 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct
return 0;
}
-static struct inode_operations proc_task_inode_operations = {
+static const struct inode_operations proc_task_inode_operations = {
.lookup = proc_task_lookup,
.getattr = proc_task_getattr,
.setattr = proc_setattr,
};
-static struct file_operations proc_task_operations = {
+static const struct file_operations proc_task_operations = {
.read = generic_read_dir,
.readdir = proc_task_readdir,
};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 853cb877d5f3..775fb21294d8 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -32,14 +32,14 @@ static loff_t proc_file_lseek(struct file *, loff_t, int);
DEFINE_SPINLOCK(proc_subdir_lock);
-int proc_match(int len, const char *name, struct proc_dir_entry *de)
+static int proc_match(int len, const char *name, struct proc_dir_entry *de)
{
if (de->namelen != len)
return 0;
return !memcmp(name, de->name, len);
}
-static struct file_operations proc_file_operations = {
+static const struct file_operations proc_file_operations = {
.llseek = proc_file_lseek,
.read = proc_file_read,
.write = proc_file_write,
@@ -265,7 +265,7 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
return 0;
}
-static struct inode_operations proc_file_inode_operations = {
+static const struct inode_operations proc_file_inode_operations = {
.setattr = proc_notify_change,
};
@@ -357,7 +357,7 @@ static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-static struct inode_operations proc_link_inode_operations = {
+static const struct inode_operations proc_link_inode_operations = {
.readlink = generic_readlink,
.follow_link = proc_follow_link,
};
@@ -497,7 +497,7 @@ out: unlock_kernel();
* use the in-memory "struct proc_dir_entry" tree to parse
* the /proc directory.
*/
-static struct file_operations proc_dir_operations = {
+static const struct file_operations proc_dir_operations = {
.read = generic_read_dir,
.readdir = proc_readdir,
};
@@ -505,7 +505,7 @@ static struct file_operations proc_dir_operations = {
/*
* proc directories can do almost nothing..
*/
-static struct inode_operations proc_dir_inode_operations = {
+static const struct inode_operations proc_dir_inode_operations = {
.lookup = proc_lookup,
.getattr = proc_getattr,
.setattr = proc_notify_change,
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index e26945ba685b..c372eb151a3a 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -132,7 +132,7 @@ static int proc_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations proc_sops = {
+static const struct super_operations proc_sops = {
.alloc_inode = proc_alloc_inode,
.destroy_inode = proc_destroy_inode,
.read_inode = proc_read_inode,
@@ -161,6 +161,7 @@ struct inode *proc_get_inode(struct super_block *sb, unsigned int ino,
if (!inode)
goto out_ino;
+ PROC_I(inode)->fd = 0;
PROC_I(inode)->pde = de;
if (de) {
if (de->mode) {
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 987c773dbb20..c932aa65e198 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,6 +11,8 @@
#include <linux/proc_fs.h>
+extern int proc_sys_init(void);
+
struct vmalloc_info {
unsigned long used;
unsigned long largest_chunk;
@@ -38,13 +40,13 @@ extern int proc_tgid_stat(struct task_struct *, char *);
extern int proc_pid_status(struct task_struct *, char *);
extern int proc_pid_statm(struct task_struct *, char *);
-extern struct file_operations proc_maps_operations;
-extern struct file_operations proc_numa_maps_operations;
-extern struct file_operations proc_smaps_operations;
+extern const struct file_operations proc_maps_operations;
+extern const struct file_operations proc_numa_maps_operations;
+extern const struct file_operations proc_smaps_operations;
-extern struct file_operations proc_maps_operations;
-extern struct file_operations proc_numa_maps_operations;
-extern struct file_operations proc_smaps_operations;
+extern const struct file_operations proc_maps_operations;
+extern const struct file_operations proc_numa_maps_operations;
+extern const struct file_operations proc_smaps_operations;
void free_proc_entry(struct proc_dir_entry *de);
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
index 5ec67257e5f9..22f789de3909 100644
--- a/fs/proc/nommu.c
+++ b/fs/proc/nommu.c
@@ -128,7 +128,7 @@ static int proc_nommu_vma_list_open(struct inode *inode, struct file *file)
return seq_open(file, &proc_nommu_vma_list_seqop);
}
-static struct file_operations proc_nommu_vma_list_operations = {
+static const struct file_operations proc_nommu_vma_list_operations = {
.open = proc_nommu_vma_list_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index b37ce33f67ea..e2c4c0a5c90d 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -121,16 +121,11 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
{
struct sysinfo i;
int len;
- unsigned long inactive;
- unsigned long active;
- unsigned long free;
unsigned long committed;
unsigned long allowed;
struct vmalloc_info vmi;
long cached;
- get_zone_counts(&active, &inactive, &free);
-
/*
* display in kilobytes.
*/
@@ -187,8 +182,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(i.bufferram),
K(cached),
K(total_swapcache_pages),
- K(active),
- K(inactive),
+ K(global_page_state(NR_ACTIVE)),
+ K(global_page_state(NR_INACTIVE)),
#ifdef CONFIG_HIGHMEM
K(i.totalhigh),
K(i.freehigh),
@@ -228,7 +223,7 @@ static int fragmentation_open(struct inode *inode, struct file *file)
return seq_open(file, &fragmentation_op);
}
-static struct file_operations fragmentation_file_operations = {
+static const struct file_operations fragmentation_file_operations = {
.open = fragmentation_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -241,7 +236,7 @@ static int zoneinfo_open(struct inode *inode, struct file *file)
return seq_open(file, &zoneinfo_op);
}
-static struct file_operations proc_zoneinfo_file_operations = {
+static const struct file_operations proc_zoneinfo_file_operations = {
.open = zoneinfo_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -266,7 +261,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
return seq_open(file, &cpuinfo_op);
}
-static struct file_operations proc_cpuinfo_operations = {
+static const struct file_operations proc_cpuinfo_operations = {
.open = cpuinfo_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -325,7 +320,7 @@ static int devinfo_open(struct inode *inode, struct file *filp)
return seq_open(filp, &devinfo_ops);
}
-static struct file_operations proc_devinfo_operations = {
+static const struct file_operations proc_devinfo_operations = {
.open = devinfo_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -337,7 +332,7 @@ static int vmstat_open(struct inode *inode, struct file *file)
{
return seq_open(file, &vmstat_op);
}
-static struct file_operations proc_vmstat_file_operations = {
+static const struct file_operations proc_vmstat_file_operations = {
.open = vmstat_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -368,7 +363,7 @@ static int partitions_open(struct inode *inode, struct file *file)
{
return seq_open(file, &partitions_op);
}
-static struct file_operations proc_partitions_operations = {
+static const struct file_operations proc_partitions_operations = {
.open = partitions_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -380,7 +375,7 @@ static int diskstats_open(struct inode *inode, struct file *file)
{
return seq_open(file, &diskstats_op);
}
-static struct file_operations proc_diskstats_operations = {
+static const struct file_operations proc_diskstats_operations = {
.open = diskstats_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -394,7 +389,7 @@ static int modules_open(struct inode *inode, struct file *file)
{
return seq_open(file, &modules_op);
}
-static struct file_operations proc_modules_operations = {
+static const struct file_operations proc_modules_operations = {
.open = modules_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -409,7 +404,7 @@ static int slabinfo_open(struct inode *inode, struct file *file)
{
return seq_open(file, &slabinfo_op);
}
-static struct file_operations proc_slabinfo_operations = {
+static const struct file_operations proc_slabinfo_operations = {
.open = slabinfo_open,
.read = seq_read,
.write = slabinfo_write,
@@ -443,7 +438,7 @@ static int slabstats_release(struct inode *inode, struct file *file)
return seq_release(inode, file);
}
-static struct file_operations proc_slabstats_operations = {
+static const struct file_operations proc_slabstats_operations = {
.open = slabstats_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -556,7 +551,7 @@ static int stat_open(struct inode *inode, struct file *file)
kfree(buf);
return res;
}
-static struct file_operations proc_stat_operations = {
+static const struct file_operations proc_stat_operations = {
.open = stat_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -598,7 +593,7 @@ static int interrupts_open(struct inode *inode, struct file *filp)
return seq_open(filp, &int_seq_ops);
}
-static struct file_operations proc_interrupts_operations = {
+static const struct file_operations proc_interrupts_operations = {
.open = interrupts_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -655,7 +650,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
return count;
}
-static struct file_operations proc_sysrq_trigger_operations = {
+static const struct file_operations proc_sysrq_trigger_operations = {
.write = write_sysrq_trigger,
};
#endif
@@ -672,7 +667,6 @@ void create_seq_entry(char *name, mode_t mode, const struct file_operations *f)
void __init proc_misc_init(void)
{
- struct proc_dir_entry *entry;
static struct {
char *name;
int (*read_proc)(char*,char**,off_t,int,int*,void*);
@@ -700,9 +694,12 @@ void __init proc_misc_init(void)
/* And now for trickier ones */
#ifdef CONFIG_PRINTK
- entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
- if (entry)
- entry->proc_fops = &proc_kmsg_operations;
+ {
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry("kmsg", S_IRUSR, &proc_root);
+ if (entry)
+ entry->proc_fops = &proc_kmsg_operations;
+ }
#endif
create_seq_entry("devices", 0, &proc_devinfo_operations);
create_seq_entry("cpuinfo", 0, &proc_cpuinfo_operations);
@@ -743,8 +740,11 @@ void __init proc_misc_init(void)
proc_vmcore->proc_fops = &proc_vmcore_operations;
#endif
#ifdef CONFIG_MAGIC_SYSRQ
- entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
- if (entry)
- entry->proc_fops = &proc_sysrq_trigger_operations;
+ {
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry("sysrq-trigger", S_IWUSR, NULL);
+ if (entry)
+ entry->proc_fops = &proc_sysrq_trigger_operations;
+ }
#endif
}
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
new file mode 100644
index 000000000000..20e8cbb34364
--- /dev/null
+++ b/fs/proc/proc_sysctl.c
@@ -0,0 +1,479 @@
+/*
+ * /proc/sys support
+ */
+
+#include <linux/sysctl.h>
+#include <linux/proc_fs.h>
+#include <linux/security.h>
+#include "internal.h"
+
+static struct dentry_operations proc_sys_dentry_operations;
+static const struct file_operations proc_sys_file_operations;
+static struct inode_operations proc_sys_inode_operations;
+
+static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
+{
+ /* Refresh the cached information bits in the inode */
+ if (table) {
+ inode->i_uid = 0;
+ inode->i_gid = 0;
+ inode->i_mode = table->mode;
+ if (table->proc_handler) {
+ inode->i_mode |= S_IFREG;
+ inode->i_nlink = 1;
+ } else {
+ inode->i_mode |= S_IFDIR;
+ inode->i_nlink = 0; /* It is too hard to figure out */
+ }
+ }
+}
+
+static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
+{
+ struct inode *inode;
+ struct proc_inode *dir_ei, *ei;
+ int depth;
+
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ goto out;
+
+ /* A directory is always one deeper than it's parent */
+ dir_ei = PROC_I(dir);
+ depth = dir_ei->fd + 1;
+
+ ei = PROC_I(inode);
+ ei->fd = depth;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+ inode->i_op = &proc_sys_inode_operations;
+ inode->i_fop = &proc_sys_file_operations;
+ inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
+ proc_sys_refresh_inode(inode, table);
+out:
+ return inode;
+}
+
+static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
+{
+ for (;;) {
+ struct proc_inode *ei;
+
+ ei = PROC_I(dentry->d_inode);
+ if (ei->fd == depth)
+ break; /* found */
+
+ dentry = dentry->d_parent;
+ }
+ return dentry;
+}
+
+static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
+ struct qstr *name)
+{
+ int len;
+ for ( ; table->ctl_name || table->procname; table++) {
+
+ if (!table->procname)
+ continue;
+
+ len = strlen(table->procname);
+ if (len != name->len)
+ continue;
+
+ if (memcmp(table->procname, name->name, len) != 0)
+ continue;
+
+ /* I have a match */
+ return table;
+ }
+ return NULL;
+}
+
+static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
+ struct ctl_table *table)
+{
+ struct dentry *ancestor;
+ struct proc_inode *ei;
+ int depth, i;
+
+ ei = PROC_I(dentry->d_inode);
+ depth = ei->fd;
+
+ if (depth == 0)
+ return table;
+
+ for (i = 1; table && (i <= depth); i++) {
+ ancestor = proc_sys_ancestor(dentry, i);
+ table = proc_sys_lookup_table_one(table, &ancestor->d_name);
+ if (table)
+ table = table->child;
+ }
+ return table;
+
+}
+static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
+ struct qstr *name,
+ struct ctl_table *table)
+{
+ table = proc_sys_lookup_table(dparent, table);
+ if (table)
+ table = proc_sys_lookup_table_one(table, name);
+ return table;
+}
+
+static struct ctl_table *do_proc_sys_lookup(struct dentry *parent,
+ struct qstr *name,
+ struct ctl_table_header **ptr)
+{
+ struct ctl_table_header *head;
+ struct ctl_table *table = NULL;
+
+ for (head = sysctl_head_next(NULL); head;
+ head = sysctl_head_next(head)) {
+ table = proc_sys_lookup_entry(parent, name, head->ctl_table);
+ if (table)
+ break;
+ }
+ *ptr = head;
+ return table;
+}
+
+static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
+ struct nameidata *nd)
+{
+ struct ctl_table_header *head;
+ struct inode *inode;
+ struct dentry *err;
+ struct ctl_table *table;
+
+ err = ERR_PTR(-ENOENT);
+ table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+ if (!table)
+ goto out;
+
+ err = ERR_PTR(-ENOMEM);
+ inode = proc_sys_make_inode(dir, table);
+ if (!inode)
+ goto out;
+
+ err = NULL;
+ dentry->d_op = &proc_sys_dentry_operations;
+ d_add(dentry, inode);
+
+out:
+ sysctl_head_finish(head);
+ return err;
+}
+
+static ssize_t proc_sys_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ ssize_t error, res;
+
+ table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+ /* Has the sysctl entry disappeared on us? */
+ error = -ENOENT;
+ if (!table)
+ goto out;
+
+ /* Has the sysctl entry been replaced by a directory? */
+ error = -EISDIR;
+ if (!table->proc_handler)
+ goto out;
+
+ /*
+ * At this point we know that the sysctl was not unregistered
+ * and won't be until we finish.
+ */
+ error = -EPERM;
+ if (sysctl_perm(table, MAY_READ))
+ goto out;
+
+ /* careful: calling conventions are nasty here */
+ res = count;
+ error = table->proc_handler(table, 0, filp, buf, &res, ppos);
+ if (!error)
+ error = res;
+out:
+ sysctl_head_finish(head);
+
+ return error;
+}
+
+static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ ssize_t error, res;
+
+ table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+ /* Has the sysctl entry disappeared on us? */
+ error = -ENOENT;
+ if (!table)
+ goto out;
+
+ /* Has the sysctl entry been replaced by a directory? */
+ error = -EISDIR;
+ if (!table->proc_handler)
+ goto out;
+
+ /*
+ * At this point we know that the sysctl was not unregistered
+ * and won't be until we finish.
+ */
+ error = -EPERM;
+ if (sysctl_perm(table, MAY_WRITE))
+ goto out;
+
+ /* careful: calling conventions are nasty here */
+ res = count;
+ error = table->proc_handler(table, 1, filp, (char __user *)buf,
+ &res, ppos);
+ if (!error)
+ error = res;
+out:
+ sysctl_head_finish(head);
+
+ return error;
+}
+
+
+static int proc_sys_fill_cache(struct file *filp, void *dirent,
+ filldir_t filldir, struct ctl_table *table)
+{
+ struct ctl_table_header *head;
+ struct ctl_table *child_table = NULL;
+ struct dentry *child, *dir = filp->f_path.dentry;
+ struct inode *inode;
+ struct qstr qname;
+ ino_t ino = 0;
+ unsigned type = DT_UNKNOWN;
+ int ret;
+
+ qname.name = table->procname;
+ qname.len = strlen(table->procname);
+ qname.hash = full_name_hash(qname.name, qname.len);
+
+ /* Suppress duplicates.
+ * Only fill a directory entry if it is the value that
+ * an ordinary lookup of that name returns. Hide all
+ * others.
+ *
+ * If we ever cache this translation in the dcache
+ * I should do a dcache lookup first. But for now
+ * it is just simpler not to.
+ */
+ ret = 0;
+ child_table = do_proc_sys_lookup(dir, &qname, &head);
+ sysctl_head_finish(head);
+ if (child_table != table)
+ return 0;
+
+ child = d_lookup(dir, &qname);
+ if (!child) {
+ struct dentry *new;
+ new = d_alloc(dir, &qname);
+ if (new) {
+ inode = proc_sys_make_inode(dir->d_inode, table);
+ if (!inode)
+ child = ERR_PTR(-ENOMEM);
+ else {
+ new->d_op = &proc_sys_dentry_operations;
+ d_add(new, inode);
+ }
+ if (child)
+ dput(new);
+ else
+ child = new;
+ }
+ }
+ if (!child || IS_ERR(child) || !child->d_inode)
+ goto end_instantiate;
+ inode = child->d_inode;
+ if (inode) {
+ ino = inode->i_ino;
+ type = inode->i_mode >> 12;
+ }
+ dput(child);
+end_instantiate:
+ if (!ino)
+ ino= find_inode_number(dir, &qname);
+ if (!ino)
+ ino = 1;
+ return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+}
+
+static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
+{
+ struct dentry *dentry = filp->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct ctl_table_header *head = NULL;
+ struct ctl_table *table;
+ unsigned long pos;
+ int ret;
+
+ ret = -ENOTDIR;
+ if (!S_ISDIR(inode->i_mode))
+ goto out;
+
+ ret = 0;
+ /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
+ if (filp->f_pos == 0) {
+ if (filldir(dirent, ".", 1, filp->f_pos,
+ inode->i_ino, DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ }
+ if (filp->f_pos == 1) {
+ if (filldir(dirent, "..", 2, filp->f_pos,
+ parent_ino(dentry), DT_DIR) < 0)
+ goto out;
+ filp->f_pos++;
+ }
+ pos = 2;
+
+ /* - Find each instance of the directory
+ * - Read all entries in each instance
+ * - Before returning an entry to user space lookup the entry
+ * by name and if I find a different entry don't return
+ * this one because it means it is a buried dup.
+ * For sysctl this should only happen for directory entries.
+ */
+ for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
+ table = proc_sys_lookup_table(dentry, head->ctl_table);
+
+ if (!table)
+ continue;
+
+ for (; table->ctl_name || table->procname; table++, pos++) {
+ /* Can't do anything without a proc name */
+ if (!table->procname)
+ continue;
+
+ if (pos < filp->f_pos)
+ continue;
+
+ if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
+ goto out;
+ filp->f_pos = pos + 1;
+ }
+ }
+ ret = 1;
+out:
+ sysctl_head_finish(head);
+ return ret;
+}
+
+static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+ /*
+ * sysctl entries that are not writeable,
+ * are _NOT_ writeable, capabilities or not.
+ */
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ struct dentry *dentry;
+ int mode;
+ int depth;
+ int error;
+
+ head = NULL;
+ depth = PROC_I(inode)->fd;
+
+ /* First check the cached permissions, in case we don't have
+ * enough information to lookup the sysctl table entry.
+ */
+ error = -EACCES;
+ mode = inode->i_mode;
+
+ if (current->euid == 0)
+ mode >>= 6;
+ else if (in_group_p(0))
+ mode >>= 3;
+
+ if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
+ error = 0;
+
+ /* If we can't get a sysctl table entry the permission
+ * checks on the cached mode will have to be enough.
+ */
+ if (!nd || !depth)
+ goto out;
+
+ dentry = nd->dentry;
+ table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+
+ /* If the entry does not exist deny permission */
+ error = -EACCES;
+ if (!table)
+ goto out;
+
+ /* Use the permissions on the sysctl table entry */
+ error = sysctl_perm(table, mask);
+out:
+ sysctl_head_finish(head);
+ return error;
+}
+
+static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct inode *inode = dentry->d_inode;
+ int error;
+
+ if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
+ return -EPERM;
+
+ error = inode_change_ok(inode, attr);
+ if (!error) {
+ error = security_inode_setattr(dentry, attr);
+ if (!error)
+ error = inode_setattr(inode, attr);
+ }
+
+ return error;
+}
+
+/* I'm lazy and don't distinguish between files and directories,
+ * until access time.
+ */
+static const struct file_operations proc_sys_file_operations = {
+ .read = proc_sys_read,
+ .write = proc_sys_write,
+ .readdir = proc_sys_readdir,
+};
+
+static struct inode_operations proc_sys_inode_operations = {
+ .lookup = proc_sys_lookup,
+ .permission = proc_sys_permission,
+ .setattr = proc_sys_setattr,
+};
+
+static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ struct ctl_table_header *head;
+ struct ctl_table *table;
+ table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
+ proc_sys_refresh_inode(dentry->d_inode, table);
+ sysctl_head_finish(head);
+ return !!table;
+}
+
+static struct dentry_operations proc_sys_dentry_operations = {
+ .d_revalidate = proc_sys_revalidate,
+};
+
+static struct proc_dir_entry *proc_sys_root;
+
+int proc_sys_init(void)
+{
+ proc_sys_root = proc_mkdir("sys", NULL);
+ proc_sys_root->proc_iops = &proc_sys_inode_operations;
+ proc_sys_root->proc_fops = &proc_sys_file_operations;
+ proc_sys_root->nlink = 0;
+ return 0;
+}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 15c4455b09eb..c1bbfbeb035e 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -138,7 +138,7 @@ static int tty_drivers_open(struct inode *inode, struct file *file)
return seq_open(file, &tty_drivers_op);
}
-static struct file_operations proc_tty_drivers_operations = {
+static const struct file_operations proc_tty_drivers_operations = {
.open = tty_drivers_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 64d242b6dcfa..5834a744c2a9 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -23,10 +23,6 @@
struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc_root_driver;
-#ifdef CONFIG_SYSCTL
-struct proc_dir_entry *proc_sys_root;
-#endif
-
static int proc_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
{
@@ -71,13 +67,6 @@ void __init proc_root_init(void)
#ifdef CONFIG_SYSVIPC
proc_mkdir("sysvipc", NULL);
#endif
-#ifdef CONFIG_SYSCTL
- proc_sys_root = proc_mkdir("sys", NULL);
-#endif
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
- proc_mkdir("sys/fs", NULL);
- proc_mkdir("sys/fs/binfmt_misc", NULL);
-#endif
proc_root_fs = proc_mkdir("fs", NULL);
proc_root_driver = proc_mkdir("driver", NULL);
proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
@@ -90,6 +79,9 @@ void __init proc_root_init(void)
proc_device_tree_init();
#endif
proc_bus = proc_mkdir("bus", NULL);
+#ifdef CONFIG_SYSCTL
+ proc_sys_init();
+#endif
}
static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
@@ -136,7 +128,7 @@ static int proc_root_readdir(struct file * filp,
* <pid> directories. Thus we don't use the generic
* directory handling functions for that..
*/
-static struct file_operations proc_root_operations = {
+static const struct file_operations proc_root_operations = {
.read = generic_read_dir,
.readdir = proc_root_readdir,
};
@@ -144,7 +136,7 @@ static struct file_operations proc_root_operations = {
/*
* proc root can do almost nothing..
*/
-static struct inode_operations proc_root_inode_operations = {
+static const struct inode_operations proc_root_inode_operations = {
.lookup = proc_root_lookup,
.getattr = proc_root_getattr,
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 55ade0d15621..7445980c8022 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -434,7 +434,7 @@ static int maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_maps_op);
}
-struct file_operations proc_maps_operations = {
+const struct file_operations proc_maps_operations = {
.open = maps_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -456,7 +456,7 @@ static int numa_maps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_numa_maps_op);
}
-struct file_operations proc_numa_maps_operations = {
+const struct file_operations proc_numa_maps_operations = {
.open = numa_maps_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -469,7 +469,7 @@ static int smaps_open(struct inode *inode, struct file *file)
return do_maps_open(inode, file, &proc_pid_smaps_op);
}
-struct file_operations proc_smaps_operations = {
+const struct file_operations proc_smaps_operations = {
.open = smaps_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index fcc5caf93f55..7cddf6b8635a 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -220,7 +220,7 @@ static int maps_open(struct inode *inode, struct file *file)
return ret;
}
-struct file_operations proc_maps_operations = {
+const struct file_operations proc_maps_operations = {
.open = maps_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index c94db1db7a71..ea9ffefb48ad 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -87,7 +87,7 @@ const struct file_operations qnx4_dir_operations =
.fsync = file_fsync,
};
-struct inode_operations qnx4_dir_inode_operations =
+const struct inode_operations qnx4_dir_inode_operations =
{
.lookup = qnx4_lookup,
#ifdef CONFIG_QNX4FS_RW
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 467e5ac7280e..44649981bbc8 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -33,7 +33,7 @@ const struct file_operations qnx4_file_operations =
#endif
};
-struct inode_operations qnx4_file_inode_operations =
+const struct inode_operations qnx4_file_inode_operations =
{
#ifdef CONFIG_QNX4FS_RW
.truncate = qnx4_truncate,
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index c047dc654d5c..83bc8e7824cd 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -30,7 +30,7 @@
#define QNX4_VERSION 4
#define QNX4_BMNAME ".bitmap"
-static struct super_operations qnx4_sops;
+static const struct super_operations qnx4_sops;
#ifdef CONFIG_QNX4FS_RW
@@ -129,7 +129,7 @@ static void qnx4_read_inode(struct inode *);
static int qnx4_remount(struct super_block *sb, int *flags, char *data);
static int qnx4_statfs(struct dentry *, struct kstatfs *);
-static struct super_operations qnx4_sops =
+static const struct super_operations qnx4_sops =
{
.alloc_inode = qnx4_alloc_inode,
.destroy_inode = qnx4_destroy_inode,
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 54ebbc84207f..2f14774a124f 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -31,7 +31,7 @@ const struct address_space_operations ramfs_aops = {
.readpage = simple_readpage,
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = __set_page_dirty_no_writeback,
};
const struct file_operations ramfs_file_operations = {
@@ -45,6 +45,6 @@ const struct file_operations ramfs_file_operations = {
.llseek = generic_file_llseek,
};
-struct inode_operations ramfs_file_inode_operations = {
+const struct inode_operations ramfs_file_inode_operations = {
.getattr = simple_getattr,
};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index e9d6c4733282..d3fd7c6732d2 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -32,7 +32,7 @@ const struct address_space_operations ramfs_aops = {
.readpage = simple_readpage,
.prepare_write = simple_prepare_write,
.commit_write = simple_commit_write,
- .set_page_dirty = __set_page_dirty_nobuffers,
+ .set_page_dirty = __set_page_dirty_no_writeback,
};
const struct file_operations ramfs_file_operations = {
@@ -47,7 +47,7 @@ const struct file_operations ramfs_file_operations = {
.llseek = generic_file_llseek,
};
-struct inode_operations ramfs_file_inode_operations = {
+const struct inode_operations ramfs_file_inode_operations = {
.setattr = ramfs_nommu_setattr,
.getattr = simple_getattr,
};
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 2faf4cdf61b0..ff1f7639707b 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -40,8 +40,8 @@
/* some random number */
#define RAMFS_MAGIC 0x858458f6
-static struct super_operations ramfs_ops;
-static struct inode_operations ramfs_dir_inode_operations;
+static const struct super_operations ramfs_ops;
+static const struct inode_operations ramfs_dir_inode_operations;
static struct backing_dev_info ramfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
@@ -143,7 +143,7 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
return error;
}
-static struct inode_operations ramfs_dir_inode_operations = {
+static const struct inode_operations ramfs_dir_inode_operations = {
.create = ramfs_create,
.lookup = simple_lookup,
.link = simple_link,
@@ -155,7 +155,7 @@ static struct inode_operations ramfs_dir_inode_operations = {
.rename = simple_rename,
};
-static struct super_operations ramfs_ops = {
+static const struct super_operations ramfs_ops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
};
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index c2bb58e74653..af7cc074a476 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -12,4 +12,4 @@
extern const struct address_space_operations ramfs_aops;
extern const struct file_operations ramfs_file_operations;
-extern struct inode_operations ramfs_file_inode_operations;
+extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 707ac21700d3..1f8dc373ede7 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -197,13 +197,13 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
struct inode *inode;
loff_t pos;
+ inode = file->f_path.dentry->d_inode;
if (unlikely((ssize_t) count < 0))
goto Einval;
pos = *ppos;
if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
goto Einval;
- inode = file->f_path.dentry->d_inode;
if (unlikely(inode->i_flock && MANDATORY_LOCK(inode))) {
int retval = locks_mandatory_area(
read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
@@ -274,9 +274,9 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
ret = do_sync_read(file, buf, count, pos);
if (ret > 0) {
fsnotify_access(file->f_path.dentry);
- current->rchar += ret;
+ add_rchar(current, ret);
}
- current->syscr++;
+ inc_syscr(current);
}
}
@@ -332,9 +332,9 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
ret = do_sync_write(file, buf, count, pos);
if (ret > 0) {
fsnotify_modify(file->f_path.dentry);
- current->wchar += ret;
+ add_wchar(current, ret);
}
- current->syscw++;
+ inc_syscw(current);
}
}
@@ -675,8 +675,8 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
}
if (ret > 0)
- current->rchar += ret;
- current->syscr++;
+ add_rchar(current, ret);
+ inc_syscr(current);
return ret;
}
@@ -696,8 +696,8 @@ sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen)
}
if (ret > 0)
- current->wchar += ret;
- current->syscw++;
+ add_wchar(current, ret);
+ inc_syscw(current);
return ret;
}
@@ -779,12 +779,12 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file);
if (retval > 0) {
- current->rchar += retval;
- current->wchar += retval;
+ add_rchar(current, retval);
+ add_wchar(current, retval);
}
- current->syscr++;
- current->syscw++;
+ inc_syscr(current);
+ inc_syscw(current);
if (*ppos > max)
retval = -EOVERFLOW;
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index fba304e64de8..f85c5cf4934c 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -19,6 +19,7 @@
#include <linux/time.h>
#include <linux/reiserfs_fs.h>
#include <linux/buffer_head.h>
+#include <linux/kernel.h>
#ifdef CONFIG_REISERFS_CHECK
@@ -1756,7 +1757,7 @@ static void store_thrown(struct tree_balance *tb, struct buffer_head *bh)
if (buffer_dirty(bh))
reiserfs_warning(tb->tb_sb,
"store_thrown deals with dirty buffer");
- for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++)
+ for (i = 0; i < ARRAY_SIZE(tb->thrown); i++)
if (!tb->thrown[i]) {
tb->thrown[i] = bh;
get_bh(bh); /* free_thrown puts this */
@@ -1769,7 +1770,7 @@ static void free_thrown(struct tree_balance *tb)
{
int i;
b_blocknr_t blocknr;
- for (i = 0; i < sizeof(tb->thrown) / sizeof(tb->thrown[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tb->thrown); i++) {
if (tb->thrown[i]) {
blocknr = tb->thrown[i]->b_blocknr;
if (buffer_dirty(tb->thrown[i]))
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 5109f1d5e7ff..abfada2f52db 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1556,7 +1556,7 @@ const struct file_operations reiserfs_file_operations = {
.splice_write = generic_file_splice_write,
};
-struct inode_operations reiserfs_file_inode_operations = {
+const struct inode_operations reiserfs_file_inode_operations = {
.truncate = reiserfs_vfs_truncate_file,
.setattr = reiserfs_setattr,
.setxattr = reiserfs_setxattr,
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 23f5cd5bbf56..a2161840bc7c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1525,7 +1525,7 @@ static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/*
* directories can handle most operations...
*/
-struct inode_operations reiserfs_dir_inode_operations = {
+const struct inode_operations reiserfs_dir_inode_operations = {
//&reiserfs_dir_operations, /* default_file_ops */
.create = reiserfs_create,
.lookup = reiserfs_lookup,
@@ -1548,7 +1548,7 @@ struct inode_operations reiserfs_dir_inode_operations = {
* symlink operations.. same as page_symlink_inode_operations, with xattr
* stuff added
*/
-struct inode_operations reiserfs_symlink_inode_operations = {
+const struct inode_operations reiserfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -1564,7 +1564,7 @@ struct inode_operations reiserfs_symlink_inode_operations = {
/*
* special file operations.. just xattr/acl stuff
*/
-struct inode_operations reiserfs_special_inode_operations = {
+const struct inode_operations reiserfs_special_inode_operations = {
.setattr = reiserfs_setattr,
.setxattr = reiserfs_setxattr,
.getxattr = reiserfs_getxattr,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 58ad4551a7c1..f13a7f164dc6 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -593,7 +593,7 @@ static ssize_t reiserfs_quota_read(struct super_block *, int, char *, size_t,
loff_t);
#endif
-static struct super_operations reiserfs_sops = {
+static const struct super_operations reiserfs_sops = {
.alloc_inode = reiserfs_alloc_inode,
.destroy_inode = reiserfs_destroy_inode,
.write_inode = reiserfs_write_inode,
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index d3e243a6f609..fd601014813e 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -110,7 +110,7 @@ romfs_checksum(void *data, int size)
return sum;
}
-static struct super_operations romfs_ops;
+static const struct super_operations romfs_ops;
static int romfs_fill_super(struct super_block *s, void *data, int silent)
{
@@ -468,7 +468,7 @@ static const struct file_operations romfs_dir_operations = {
.readdir = romfs_readdir,
};
-static struct inode_operations romfs_dir_inode_operations = {
+static const struct inode_operations romfs_dir_inode_operations = {
.lookup = romfs_lookup,
};
@@ -598,7 +598,7 @@ static int romfs_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations romfs_ops = {
+static const struct super_operations romfs_ops = {
.alloc_inode = romfs_alloc_inode,
.destroy_inode = romfs_destroy_inode,
.read_inode = romfs_read_inode,
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
index b1e58d1ac9ca..50136b1a3eca 100644
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -42,7 +42,7 @@ const struct file_operations smb_dir_operations =
.open = smb_dir_open,
};
-struct inode_operations smb_dir_inode_operations =
+const struct inode_operations smb_dir_inode_operations =
{
.create = smb_create,
.lookup = smb_lookup,
@@ -54,7 +54,7 @@ struct inode_operations smb_dir_inode_operations =
.setattr = smb_notify_change,
};
-struct inode_operations smb_dir_inode_operations_unix =
+const struct inode_operations smb_dir_inode_operations_unix =
{
.create = smb_create,
.lookup = smb_lookup,
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index e50533a79517..f161797160c4 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -418,7 +418,7 @@ const struct file_operations smb_file_operations =
.sendfile = smb_file_sendfile,
};
-struct inode_operations smb_file_inode_operations =
+const struct inode_operations smb_file_inode_operations =
{
.permission = smb_file_permission,
.getattr = smb_getattr,
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 84dfe3f3482e..5faba4f1c9ab 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -98,7 +98,7 @@ static int smb_remount(struct super_block *sb, int *flags, char *data)
return 0;
}
-static struct super_operations smb_sops =
+static const struct super_operations smb_sops =
{
.alloc_inode = smb_alloc_inode,
.destroy_inode = smb_destroy_inode,
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
index 34fb462b2379..03f456c1b7d4 100644
--- a/fs/smbfs/proto.h
+++ b/fs/smbfs/proto.h
@@ -36,8 +36,8 @@ extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, stru
extern void smb_install_null_ops(struct smb_ops *ops);
/* dir.c */
extern const struct file_operations smb_dir_operations;
-extern struct inode_operations smb_dir_inode_operations;
-extern struct inode_operations smb_dir_inode_operations_unix;
+extern const struct inode_operations smb_dir_inode_operations;
+extern const struct inode_operations smb_dir_inode_operations_unix;
extern void smb_new_dentry(struct dentry *dentry);
extern void smb_renew_times(struct dentry *dentry);
/* cache.c */
@@ -65,7 +65,7 @@ extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
/* file.c */
extern const struct address_space_operations smb_file_aops;
extern const struct file_operations smb_file_operations;
-extern struct inode_operations smb_file_inode_operations;
+extern const struct inode_operations smb_file_inode_operations;
/* ioctl.c */
extern int smb_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg);
/* smbiod.c */
@@ -84,4 +84,4 @@ extern int smb_request_send_server(struct smb_sb_info *server);
extern int smb_request_recv(struct smb_sb_info *server);
/* symlink.c */
extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
-extern struct inode_operations smb_link_inode_operations;
+extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
index a4bcae8a9aff..42261dbdf60f 100644
--- a/fs/smbfs/request.c
+++ b/fs/smbfs/request.c
@@ -61,7 +61,7 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
struct smb_request *req;
unsigned char *buf = NULL;
- req = kmem_cache_alloc(req_cachep, GFP_KERNEL);
+ req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
VERBOSE("allocating request: %p\n", req);
if (!req)
goto out;
@@ -74,7 +74,6 @@ static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
}
}
- memset(req, 0, sizeof(struct smb_request));
req->rq_buffer = buf;
req->rq_bufsize = bufsize;
req->rq_server = server;
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
index cdc53c4fb381..fea20ceb8a5f 100644
--- a/fs/smbfs/symlink.c
+++ b/fs/smbfs/symlink.c
@@ -6,7 +6,6 @@
* Please add a note about your changes to smbfs in the ChangeLog file.
*/
-#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
@@ -62,7 +61,7 @@ static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
__putname(s);
}
-struct inode_operations smb_link_inode_operations =
+const struct inode_operations smb_link_inode_operations =
{
.readlink = generic_readlink,
.follow_link = smb_follow_link,
diff --git a/fs/stack.c b/fs/stack.c
index 8ffb880d2f46..67716f6a1a4a 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -20,11 +20,6 @@ EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
int (*get_nlinks)(struct inode *))
{
- if (!get_nlinks)
- dest->i_nlink = src->i_nlink;
- else
- dest->i_nlink = (*get_nlinks)(dest);
-
dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid;
dest->i_gid = src->i_gid;
@@ -34,5 +29,14 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
+
+ /*
+ * Update the nlinks AFTER updating the above fields, because the
+ * get_links callback may depend on them.
+ */
+ if (!get_nlinks)
+ dest->i_nlink = src->i_nlink;
+ else
+ dest->i_nlink = (*get_nlinks)(dest);
}
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/super.c b/fs/super.c
index 3e7458c2bb76..60b1e50cbf53 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -285,7 +285,7 @@ int fsync_super(struct super_block *sb)
*/
void generic_shutdown_super(struct super_block *sb)
{
- struct super_operations *sop = sb->s_op;
+ const struct super_operations *sop = sb->s_op;
if (sb->s_root) {
shrink_dcache_for_umount(sb);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index e8f540d38d48..d3b9f5f07db1 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -16,6 +16,7 @@
#include <linux/slab.h>
#include <asm/uaccess.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
@@ -146,7 +147,7 @@ static int open(struct inode * inode, struct file * file)
Error:
module_put(attr->attr.owner);
Done:
- if (error && kobj)
+ if (error)
kobject_put(kobj);
return error;
}
@@ -157,8 +158,7 @@ static int release(struct inode * inode, struct file * file)
struct bin_attribute * attr = to_bin_attr(file->f_path.dentry);
u8 * buffer = file->private_data;
- if (kobj)
- kobject_put(kobj);
+ kobject_put(kobj);
module_put(attr->attr.owner);
kfree(buffer);
return 0;
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 511edef8b321..8813990304fe 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/namei.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
DECLARE_RWSEM(sysfs_rename_sem);
@@ -32,25 +33,39 @@ static struct dentry_operations sysfs_dentry_ops = {
/*
* Allocates a new sysfs_dirent and links it to the parent sysfs_dirent
*/
-static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent * parent_sd,
- void * element)
+static struct sysfs_dirent * __sysfs_new_dirent(void * element)
{
struct sysfs_dirent * sd;
- sd = kmem_cache_alloc(sysfs_dir_cachep, GFP_KERNEL);
+ sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
if (!sd)
return NULL;
- memset(sd, 0, sizeof(*sd));
atomic_set(&sd->s_count, 1);
atomic_set(&sd->s_event, 1);
INIT_LIST_HEAD(&sd->s_children);
- list_add(&sd->s_sibling, &parent_sd->s_children);
+ INIT_LIST_HEAD(&sd->s_sibling);
sd->s_element = element;
return sd;
}
+static void __sysfs_list_dirent(struct sysfs_dirent *parent_sd,
+ struct sysfs_dirent *sd)
+{
+ if (sd)
+ list_add(&sd->s_sibling, &parent_sd->s_children);
+}
+
+static struct sysfs_dirent * sysfs_new_dirent(struct sysfs_dirent *parent_sd,
+ void * element)
+{
+ struct sysfs_dirent *sd;
+ sd = __sysfs_new_dirent(element);
+ __sysfs_list_dirent(parent_sd, sd);
+ return sd;
+}
+
/*
*
* Return -EEXIST if there is already a sysfs element with the same name for
@@ -77,14 +92,14 @@ int sysfs_dirent_exist(struct sysfs_dirent *parent_sd,
}
-int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
- void * element, umode_t mode, int type)
+static struct sysfs_dirent *
+__sysfs_make_dirent(struct dentry *dentry, void *element, mode_t mode, int type)
{
struct sysfs_dirent * sd;
- sd = sysfs_new_dirent(parent_sd, element);
+ sd = __sysfs_new_dirent(element);
if (!sd)
- return -ENOMEM;
+ goto out;
sd->s_mode = mode;
sd->s_type = type;
@@ -94,7 +109,19 @@ int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
dentry->d_op = &sysfs_dentry_ops;
}
- return 0;
+out:
+ return sd;
+}
+
+int sysfs_make_dirent(struct sysfs_dirent * parent_sd, struct dentry * dentry,
+ void * element, umode_t mode, int type)
+{
+ struct sysfs_dirent *sd;
+
+ sd = __sysfs_make_dirent(dentry, element, mode, type);
+ __sysfs_list_dirent(parent_sd, sd);
+
+ return sd ? 0 : -ENOMEM;
}
static int init_dir(struct inode * inode)
@@ -165,11 +192,11 @@ int sysfs_create_subdir(struct kobject * k, const char * n, struct dentry ** d)
/**
* sysfs_create_dir - create a directory for an object.
- * @parent: parent parent object.
* @kobj: object we're creating directory for.
+ * @shadow_parent: parent parent object.
*/
-int sysfs_create_dir(struct kobject * kobj)
+int sysfs_create_dir(struct kobject * kobj, struct dentry *shadow_parent)
{
struct dentry * dentry = NULL;
struct dentry * parent;
@@ -177,7 +204,9 @@ int sysfs_create_dir(struct kobject * kobj)
BUG_ON(!kobj);
- if (kobj->parent)
+ if (shadow_parent)
+ parent = shadow_parent;
+ else if (kobj->parent)
parent = kobj->parent->dentry;
else if (sysfs_mount && sysfs_mount->mnt_sb)
parent = sysfs_mount->mnt_sb->s_root;
@@ -267,7 +296,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(err);
}
-struct inode_operations sysfs_dir_inode_operations = {
+const struct inode_operations sysfs_dir_inode_operations = {
.lookup = sysfs_lookup,
.setattr = sysfs_setattr,
};
@@ -298,21 +327,12 @@ void sysfs_remove_subdir(struct dentry * d)
}
-/**
- * sysfs_remove_dir - remove an object's directory.
- * @kobj: object.
- *
- * The only thing special about this is that we remove any files in
- * the directory before we remove the directory, and we've inlined
- * what used to be sysfs_rmdir() below, instead of calling separately.
- */
-
-void sysfs_remove_dir(struct kobject * kobj)
+static void __sysfs_remove_dir(struct dentry *dentry)
{
- struct dentry * dentry = dget(kobj->dentry);
struct sysfs_dirent * parent_sd;
struct sysfs_dirent * sd, * tmp;
+ dget(dentry);
if (!dentry)
return;
@@ -333,32 +353,60 @@ void sysfs_remove_dir(struct kobject * kobj)
* Drop reference from dget() on entrance.
*/
dput(dentry);
+}
+
+/**
+ * sysfs_remove_dir - remove an object's directory.
+ * @kobj: object.
+ *
+ * The only thing special about this is that we remove any files in
+ * the directory before we remove the directory, and we've inlined
+ * what used to be sysfs_rmdir() below, instead of calling separately.
+ */
+
+void sysfs_remove_dir(struct kobject * kobj)
+{
+ __sysfs_remove_dir(kobj->dentry);
kobj->dentry = NULL;
}
-int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
+int sysfs_rename_dir(struct kobject * kobj, struct dentry *new_parent,
+ const char *new_name)
{
int error = 0;
- struct dentry * new_dentry, * parent;
-
- if (!strcmp(kobject_name(kobj), new_name))
- return -EINVAL;
+ struct dentry * new_dentry;
- if (!kobj->parent)
- return -EINVAL;
+ if (!new_parent)
+ return -EFAULT;
down_write(&sysfs_rename_sem);
- parent = kobj->parent->dentry;
-
- mutex_lock(&parent->d_inode->i_mutex);
+ mutex_lock(&new_parent->d_inode->i_mutex);
- new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
+ new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
if (!IS_ERR(new_dentry)) {
- if (!new_dentry->d_inode) {
+ /* By allowing two different directories with the
+ * same d_parent we allow this routine to move
+ * between different shadows of the same directory
+ */
+ if (kobj->dentry->d_parent->d_inode != new_parent->d_inode)
+ return -EINVAL;
+ else if (new_dentry->d_parent->d_inode != new_parent->d_inode)
+ error = -EINVAL;
+ else if (new_dentry == kobj->dentry)
+ error = -EINVAL;
+ else if (!new_dentry->d_inode) {
error = kobject_set_name(kobj, "%s", new_name);
if (!error) {
+ struct sysfs_dirent *sd, *parent_sd;
+
d_add(new_dentry, NULL);
d_move(kobj->dentry, new_dentry);
+
+ sd = kobj->dentry->d_fsdata;
+ parent_sd = new_parent->d_fsdata;
+
+ list_del_init(&sd->s_sibling);
+ list_add(&sd->s_sibling, &parent_sd->s_children);
}
else
d_drop(new_dentry);
@@ -366,7 +414,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
error = -EEXIST;
dput(new_dentry);
}
- mutex_unlock(&parent->d_inode->i_mutex);
+ mutex_unlock(&new_parent->d_inode->i_mutex);
up_write(&sysfs_rename_sem);
return error;
@@ -378,12 +426,10 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
struct sysfs_dirent *new_parent_sd, *sd;
int error;
- if (!new_parent)
- return -EINVAL;
-
old_parent_dentry = kobj->parent ?
kobj->parent->dentry : sysfs_mount->mnt_sb->s_root;
- new_parent_dentry = new_parent->dentry;
+ new_parent_dentry = new_parent ?
+ new_parent->dentry : sysfs_mount->mnt_sb->s_root;
again:
mutex_lock(&old_parent_dentry->d_inode->i_mutex);
@@ -547,6 +593,95 @@ static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
return offset;
}
+
+/**
+ * sysfs_make_shadowed_dir - Setup so a directory can be shadowed
+ * @kobj: object we're creating shadow of.
+ */
+
+int sysfs_make_shadowed_dir(struct kobject *kobj,
+ void * (*follow_link)(struct dentry *, struct nameidata *))
+{
+ struct inode *inode;
+ struct inode_operations *i_op;
+
+ inode = kobj->dentry->d_inode;
+ if (inode->i_op != &sysfs_dir_inode_operations)
+ return -EINVAL;
+
+ i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
+ if (!i_op)
+ return -ENOMEM;
+
+ memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
+ i_op->follow_link = follow_link;
+
+ /* Locking of inode->i_op?
+ * Since setting i_op is a single word write and they
+ * are atomic we should be ok here.
+ */
+ inode->i_op = i_op;
+ return 0;
+}
+
+/**
+ * sysfs_create_shadow_dir - create a shadow directory for an object.
+ * @kobj: object we're creating directory for.
+ *
+ * sysfs_make_shadowed_dir must already have been called on this
+ * directory.
+ */
+
+struct dentry *sysfs_create_shadow_dir(struct kobject *kobj)
+{
+ struct sysfs_dirent *sd;
+ struct dentry *parent, *dir, *shadow;
+ struct inode *inode;
+
+ dir = kobj->dentry;
+ inode = dir->d_inode;
+ parent = dir->d_parent;
+ shadow = ERR_PTR(-EINVAL);
+ if (!sysfs_is_shadowed_inode(inode))
+ goto out;
+
+ shadow = d_alloc(parent, &dir->d_name);
+ if (!shadow)
+ goto nomem;
+
+ sd = __sysfs_make_dirent(shadow, kobj, inode->i_mode, SYSFS_DIR);
+ if (!sd)
+ goto nomem;
+
+ d_instantiate(shadow, igrab(inode));
+ inc_nlink(inode);
+ inc_nlink(parent->d_inode);
+ shadow->d_op = &sysfs_dentry_ops;
+
+ dget(shadow); /* Extra count - pin the dentry in core */
+
+out:
+ return shadow;
+nomem:
+ dput(shadow);
+ shadow = ERR_PTR(-ENOMEM);
+ goto out;
+}
+
+/**
+ * sysfs_remove_shadow_dir - remove an object's directory.
+ * @shadow: dentry of shadow directory
+ *
+ * The only thing special about this is that we remove any files in
+ * the directory before we remove the directory, and we've inlined
+ * what used to be sysfs_rmdir() below, instead of calling separately.
+ */
+
+void sysfs_remove_shadow_dir(struct dentry *shadow)
+{
+ __sysfs_remove_dir(shadow);
+}
+
const struct file_operations sysfs_dir_operations = {
.open = sysfs_dir_open,
.release = sysfs_dir_close,
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 9cfe53e1e00d..98b0910ad80c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -7,6 +7,7 @@
#include <linux/kobject.h>
#include <linux/namei.h>
#include <linux/poll.h>
+#include <linux/list.h>
#include <asm/uaccess.h>
#include <asm/semaphore.h>
@@ -50,17 +51,29 @@ static struct sysfs_ops subsys_sysfs_ops = {
.store = subsys_attr_store,
};
+/**
+ * add_to_collection - add buffer to a collection
+ * @buffer: buffer to be added
+ * @node: inode of set to add to
+ */
-struct sysfs_buffer {
- size_t count;
- loff_t pos;
- char * page;
- struct sysfs_ops * ops;
- struct semaphore sem;
- int needs_read_fill;
- int event;
-};
+static inline void
+add_to_collection(struct sysfs_buffer *buffer, struct inode *node)
+{
+ struct sysfs_buffer_collection *set = node->i_private;
+ mutex_lock(&node->i_mutex);
+ list_add(&buffer->associates, &set->associates);
+ mutex_unlock(&node->i_mutex);
+}
+
+static inline void
+remove_from_collection(struct sysfs_buffer *buffer, struct inode *node)
+{
+ mutex_lock(&node->i_mutex);
+ list_del(&buffer->associates);
+ mutex_unlock(&node->i_mutex);
+}
/**
* fill_read_buffer - allocate and fill buffer from object.
@@ -70,7 +83,8 @@ struct sysfs_buffer {
* Allocate @buffer->page, if it hasn't been already, then call the
* kobject's show() method to fill the buffer with this attribute's
* data.
- * This is called only once, on the file's first read.
+ * This is called only once, on the file's first read unless an error
+ * is returned.
*/
static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer)
{
@@ -88,12 +102,13 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
buffer->event = atomic_read(&sd->s_event);
count = ops->show(kobj,attr,buffer->page);
- buffer->needs_read_fill = 0;
BUG_ON(count > (ssize_t)PAGE_SIZE);
- if (count >= 0)
+ if (count >= 0) {
+ buffer->needs_read_fill = 0;
buffer->count = count;
- else
+ } else {
ret = count;
+ }
return ret;
}
@@ -153,6 +168,10 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
ssize_t retval = 0;
down(&buffer->sem);
+ if (buffer->orphaned) {
+ retval = -ENODEV;
+ goto out;
+ }
if (buffer->needs_read_fill) {
if ((retval = fill_read_buffer(file->f_path.dentry,buffer)))
goto out;
@@ -165,7 +184,6 @@ out:
return retval;
}
-
/**
* fill_write_buffer - copy buffer from userspace.
* @buffer: data buffer for file.
@@ -243,19 +261,25 @@ sysfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t
ssize_t len;
down(&buffer->sem);
+ if (buffer->orphaned) {
+ len = -ENODEV;
+ goto out;
+ }
len = fill_write_buffer(buffer, buf, count);
if (len > 0)
len = flush_write_buffer(file->f_path.dentry, buffer, len);
if (len > 0)
*ppos += len;
+out:
up(&buffer->sem);
return len;
}
-static int check_perm(struct inode * inode, struct file * file)
+static int sysfs_open_file(struct inode *inode, struct file *file)
{
struct kobject *kobj = sysfs_get_kobject(file->f_path.dentry->d_parent);
struct attribute * attr = to_attr(file->f_path.dentry);
+ struct sysfs_buffer_collection *set;
struct sysfs_buffer * buffer;
struct sysfs_ops * ops = NULL;
int error = 0;
@@ -285,6 +309,18 @@ static int check_perm(struct inode * inode, struct file * file)
if (!ops)
goto Eaccess;
+ /* make sure we have a collection to add our buffers to */
+ mutex_lock(&inode->i_mutex);
+ if (!(set = inode->i_private)) {
+ if (!(set = inode->i_private = kmalloc(sizeof(struct sysfs_buffer_collection), GFP_KERNEL))) {
+ error = -ENOMEM;
+ goto Done;
+ } else {
+ INIT_LIST_HEAD(&set->associates);
+ }
+ }
+ mutex_unlock(&inode->i_mutex);
+
/* File needs write support.
* The inode's perms must say it's ok,
* and we must have a store method.
@@ -310,9 +346,11 @@ static int check_perm(struct inode * inode, struct file * file)
*/
buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL);
if (buffer) {
+ INIT_LIST_HEAD(&buffer->associates);
init_MUTEX(&buffer->sem);
buffer->needs_read_fill = 1;
buffer->ops = ops;
+ add_to_collection(buffer, inode);
file->private_data = buffer;
} else
error = -ENOMEM;
@@ -325,16 +363,11 @@ static int check_perm(struct inode * inode, struct file * file)
error = -EACCES;
module_put(attr->owner);
Done:
- if (error && kobj)
+ if (error)
kobject_put(kobj);
return error;
}
-static int sysfs_open_file(struct inode * inode, struct file * filp)
-{
- return check_perm(inode,filp);
-}
-
static int sysfs_release(struct inode * inode, struct file * filp)
{
struct kobject * kobj = to_kobj(filp->f_path.dentry->d_parent);
@@ -342,8 +375,9 @@ static int sysfs_release(struct inode * inode, struct file * filp)
struct module * owner = attr->owner;
struct sysfs_buffer * buffer = filp->private_data;
- if (kobj)
- kobject_put(kobj);
+ if (buffer)
+ remove_from_collection(buffer, inode);
+ kobject_put(kobj);
/* After this point, attr should not be accessed. */
module_put(owner);
@@ -548,7 +582,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
{
- sysfs_hash_and_remove(kobj->dentry,attr->name);
+ sysfs_hash_and_remove(kobj->dentry, attr->name);
}
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 122145b0895c..b20951c93761 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -13,6 +13,8 @@
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/err.h>
+#include <linux/fs.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index e79e38d52c00..dd1344b007f5 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -13,6 +13,7 @@
#include <linux/backing-dev.h>
#include <linux/capability.h>
#include <linux/errno.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
extern struct super_block * sysfs_sb;
@@ -28,10 +29,20 @@ static struct backing_dev_info sysfs_backing_dev_info = {
.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
};
-static struct inode_operations sysfs_inode_operations ={
+static const struct inode_operations sysfs_inode_operations ={
.setattr = sysfs_setattr,
};
+void sysfs_delete_inode(struct inode *inode)
+{
+ /* Free the shadowed directory inode operations */
+ if (sysfs_is_shadowed_inode(inode)) {
+ kfree(inode->i_op);
+ inode->i_op = NULL;
+ }
+ return generic_delete_inode(inode);
+}
+
int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
{
struct inode * inode = dentry->d_inode;
@@ -209,6 +220,22 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
return NULL;
}
+static inline void orphan_all_buffers(struct inode *node)
+{
+ struct sysfs_buffer_collection *set = node->i_private;
+ struct sysfs_buffer *buf;
+
+ mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
+ if (node->i_private) {
+ list_for_each_entry(buf, &set->associates, associates) {
+ down(&buf->sem);
+ buf->orphaned = 1;
+ up(&buf->sem);
+ }
+ }
+ mutex_unlock(&node->i_mutex);
+}
+
/*
* Unhashes the dentry corresponding to given sysfs_dirent
@@ -217,16 +244,23 @@ const unsigned char * sysfs_get_name(struct sysfs_dirent *sd)
void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
{
struct dentry * dentry = sd->s_dentry;
+ struct inode *inode;
if (dentry) {
spin_lock(&dcache_lock);
spin_lock(&dentry->d_lock);
if (!(d_unhashed(dentry) && dentry->d_inode)) {
+ inode = dentry->d_inode;
+ spin_lock(&inode->i_lock);
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
dget_locked(dentry);
__d_drop(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
simple_unlink(parent->d_inode, dentry);
+ orphan_all_buffers(inode);
+ iput(inode);
} else {
spin_unlock(&dentry->d_lock);
spin_unlock(&dcache_lock);
@@ -248,7 +282,7 @@ int sysfs_hash_and_remove(struct dentry * dir, const char * name)
return -ENOENT;
parent_sd = dir->d_fsdata;
- mutex_lock(&dir->d_inode->i_mutex);
+ mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
if (!sd->s_element)
continue;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index e503f858fba8..23a48a38e6af 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -8,6 +8,7 @@
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/init.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
@@ -18,9 +19,12 @@ struct vfsmount *sysfs_mount;
struct super_block * sysfs_sb = NULL;
struct kmem_cache *sysfs_dir_cachep;
-static struct super_operations sysfs_ops = {
+static void sysfs_clear_inode(struct inode *inode);
+
+static const struct super_operations sysfs_ops = {
.statfs = simple_statfs,
- .drop_inode = generic_delete_inode,
+ .drop_inode = sysfs_delete_inode,
+ .clear_inode = sysfs_clear_inode,
};
static struct sysfs_dirent sysfs_root = {
@@ -31,6 +35,11 @@ static struct sysfs_dirent sysfs_root = {
.s_iattr = NULL,
};
+static void sysfs_clear_inode(struct inode *inode)
+{
+ kfree(inode->i_private);
+}
+
static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
{
struct inode *inode;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index f50e3cc2ded8..7b9c5bfde920 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/kobject.h>
#include <linux/namei.h>
+#include <asm/semaphore.h>
#include "sysfs.h"
@@ -180,7 +181,7 @@ static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *co
free_page((unsigned long)page);
}
-struct inode_operations sysfs_symlink_inode_operations = {
+const struct inode_operations sysfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = sysfs_follow_link,
.put_link = sysfs_put_link,
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index bd7cec295dab..d976b0005549 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -2,6 +2,7 @@
extern struct vfsmount * sysfs_mount;
extern struct kmem_cache *sysfs_dir_cachep;
+extern void sysfs_delete_inode(struct inode *inode);
extern struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent *);
extern int sysfs_create(struct dentry *, int mode, int (*init)(struct inode *));
@@ -25,14 +26,30 @@ extern struct super_block * sysfs_sb;
extern const struct file_operations sysfs_dir_operations;
extern const struct file_operations sysfs_file_operations;
extern const struct file_operations bin_fops;
-extern struct inode_operations sysfs_dir_inode_operations;
-extern struct inode_operations sysfs_symlink_inode_operations;
+extern const struct inode_operations sysfs_dir_inode_operations;
+extern const struct inode_operations sysfs_symlink_inode_operations;
struct sysfs_symlink {
char * link_name;
struct kobject * target_kobj;
};
+struct sysfs_buffer {
+ struct list_head associates;
+ size_t count;
+ loff_t pos;
+ char * page;
+ struct sysfs_ops * ops;
+ struct semaphore sem;
+ int orphaned;
+ int needs_read_fill;
+ int event;
+};
+
+struct sysfs_buffer_collection {
+ struct list_head associates;
+};
+
static inline struct kobject * to_kobj(struct dentry * dentry)
{
struct sysfs_dirent * sd = dentry->d_fsdata;
@@ -96,3 +113,7 @@ static inline void sysfs_put(struct sysfs_dirent * sd)
release_sysfs_dirent(sd);
}
+static inline int sysfs_is_shadowed_inode(struct inode *inode)
+{
+ return S_ISDIR(inode->i_mode) && inode->i_op->follow_link;
+}
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 47a4b728f15b..0732ddb9020b 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -30,7 +30,7 @@ const struct file_operations sysv_file_operations = {
.sendfile = generic_file_sendfile,
};
-struct inode_operations sysv_file_inode_operations = {
+const struct inode_operations sysv_file_inode_operations = {
.truncate = sysv_truncate,
.getattr = sysv_getattr,
};
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index ead9864567e3..9311cac186fe 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -142,7 +142,7 @@ static inline void write3byte(struct sysv_sb_info *sbi,
}
}
-static struct inode_operations sysv_symlink_inode_operations = {
+static const struct inode_operations sysv_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
@@ -327,7 +327,7 @@ static void init_once(void *p, struct kmem_cache *cachep, unsigned long flags)
inode_init_once(&si->vfs_inode);
}
-struct super_operations sysv_sops = {
+const struct super_operations sysv_sops = {
.alloc_inode = sysv_alloc_inode,
.destroy_inode = sysv_destroy_inode,
.read_inode = sysv_read_inode,
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index f7c08db8e34c..4e48abbd2b5d 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -292,7 +292,7 @@ out:
/*
* directories can handle most operations...
*/
-struct inode_operations sysv_dir_inode_operations = {
+const struct inode_operations sysv_dir_inode_operations = {
.create = sysv_create,
.lookup = sysv_lookup,
.link = sysv_link,
diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c
index b85ce61d635c..00d2f8a43e4e 100644
--- a/fs/sysv/symlink.c
+++ b/fs/sysv/symlink.c
@@ -14,7 +14,7 @@ static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations sysv_fast_symlink_inode_operations = {
+const struct inode_operations sysv_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = sysv_follow_link,
};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index dcb18b2171fe..5b4fedf17cc4 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -159,13 +159,13 @@ extern struct sysv_dir_entry *sysv_dotdot(struct inode *, struct page **);
extern ino_t sysv_inode_by_name(struct dentry *);
-extern struct inode_operations sysv_file_inode_operations;
-extern struct inode_operations sysv_dir_inode_operations;
-extern struct inode_operations sysv_fast_symlink_inode_operations;
+extern const struct inode_operations sysv_file_inode_operations;
+extern const struct inode_operations sysv_dir_inode_operations;
+extern const struct inode_operations sysv_fast_symlink_inode_operations;
extern const struct file_operations sysv_file_operations;
extern const struct file_operations sysv_dir_operations;
extern const struct address_space_operations sysv_aops;
-extern struct super_operations sysv_sops;
+extern const struct super_operations sysv_sops;
extern struct dentry_operations sysv_dentry_operations;
diff --git a/fs/udf/file.c b/fs/udf/file.c
index d81f2db7b0e3..40d5047defea 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -263,6 +263,6 @@ const struct file_operations udf_file_operations = {
.sendfile = generic_file_sendfile,
};
-struct inode_operations udf_file_inode_operations = {
+const struct inode_operations udf_file_inode_operations = {
.truncate = udf_truncate,
};
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 73163325e5ec..fe361cd19a98 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1308,7 +1308,7 @@ end_rename:
return retval;
}
-struct inode_operations udf_dir_inode_operations = {
+const struct inode_operations udf_dir_inode_operations = {
.lookup = udf_lookup,
.create = udf_create,
.link = udf_link,
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 1dbc2955f02e..8672b88f7ff2 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -160,7 +160,7 @@ static void destroy_inodecache(void)
}
/* Superblock operations */
-static struct super_operations udf_sb_ops = {
+static const struct super_operations udf_sb_ops = {
.alloc_inode = udf_alloc_inode,
.destroy_inode = udf_destroy_inode,
.write_inode = udf_write_inode,
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index 1033b7cf2939..ee1dece1f6f5 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -42,9 +42,9 @@ struct task_struct;
struct buffer_head;
struct super_block;
-extern struct inode_operations udf_dir_inode_operations;
+extern const struct inode_operations udf_dir_inode_operations;
extern const struct file_operations udf_dir_operations;
-extern struct inode_operations udf_file_inode_operations;
+extern const struct inode_operations udf_file_inode_operations;
extern const struct file_operations udf_file_operations;
extern const struct address_space_operations udf_aops;
extern const struct address_space_operations udf_adinicb_aops;
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 638f4c585e89..bcc44084e004 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -4,6 +4,8 @@
* Copyright (C) 1998
* Daniel Pirkl <daniel.pirkl@email.cz>
* Charles University, Faculty of Mathematics and Physics
+ *
+ * UFS2 write support Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
#include <linux/fs.h>
@@ -14,45 +16,48 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/capability.h>
-#include <linux/sched.h>
#include <linux/bitops.h>
#include <asm/byteorder.h>
#include "swab.h"
#include "util.h"
-static unsigned ufs_add_fragments (struct inode *, unsigned, unsigned, unsigned, int *);
-static unsigned ufs_alloc_fragments (struct inode *, unsigned, unsigned, unsigned, int *);
-static unsigned ufs_alloccg_block (struct inode *, struct ufs_cg_private_info *, unsigned, int *);
-static unsigned ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, unsigned, unsigned);
+#define INVBLOCK ((u64)-1L)
+
+static u64 ufs_add_fragments(struct inode *, u64, unsigned, unsigned, int *);
+static u64 ufs_alloc_fragments(struct inode *, unsigned, u64, unsigned, int *);
+static u64 ufs_alloccg_block(struct inode *, struct ufs_cg_private_info *, u64, int *);
+static u64 ufs_bitmap_search (struct super_block *, struct ufs_cg_private_info *, u64, unsigned);
static unsigned char ufs_fragtable_8fpb[], ufs_fragtable_other[];
static void ufs_clusteracct(struct super_block *, struct ufs_cg_private_info *, unsigned, int);
/*
* Free 'count' fragments from fragment number 'fragment'
*/
-void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count)
+void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
- unsigned cgno, bit, end_bit, bbase, blkmap, i, blkno, cylno;
+ unsigned cgno, bit, end_bit, bbase, blkmap, i;
+ u64 blkno;
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
- UFSD("ENTER, fragment %u, count %u\n", fragment, count);
+ UFSD("ENTER, fragment %llu, count %u\n",
+ (unsigned long long)fragment, count);
if (ufs_fragnum(fragment) + count > uspi->s_fpg)
ufs_error (sb, "ufs_free_fragments", "internal error");
lock_super(sb);
- cgno = ufs_dtog(fragment);
- bit = ufs_dtogd(fragment);
+ cgno = ufs_dtog(uspi, fragment);
+ bit = ufs_dtogd(uspi, fragment);
if (cgno >= uspi->s_ncg) {
ufs_panic (sb, "ufs_free_fragments", "freeing blocks are outside device");
goto failed;
@@ -101,9 +106,13 @@ void ufs_free_fragments(struct inode *inode, unsigned fragment, unsigned count)
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
- cylno = ufs_cbtocylno (bbase);
- fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(bbase)), 1);
- fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+ if (uspi->fs_magic != UFS2_MAGIC) {
+ unsigned cylno = ufs_cbtocylno (bbase);
+
+ fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
+ ufs_cbtorpos(bbase)), 1);
+ fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+ }
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
@@ -127,24 +136,27 @@ failed:
/*
* Free 'count' fragments from fragment number 'fragment' (free whole blocks)
*/
-void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count)
+void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
- unsigned overflow, cgno, bit, end_bit, blkno, i, cylno;
+ unsigned overflow, cgno, bit, end_bit, i;
+ u64 blkno;
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
- UFSD("ENTER, fragment %u, count %u\n", fragment, count);
+ UFSD("ENTER, fragment %llu, count %u\n",
+ (unsigned long long)fragment, count);
if ((fragment & uspi->s_fpbmask) || (count & uspi->s_fpbmask)) {
ufs_error (sb, "ufs_free_blocks", "internal error, "
- "fragment %u, count %u\n", fragment, count);
+ "fragment %llu, count %u\n",
+ (unsigned long long)fragment, count);
goto failed;
}
@@ -152,8 +164,8 @@ void ufs_free_blocks(struct inode *inode, unsigned fragment, unsigned count)
do_more:
overflow = 0;
- cgno = ufs_dtog (fragment);
- bit = ufs_dtogd (fragment);
+ cgno = ufs_dtog(uspi, fragment);
+ bit = ufs_dtogd(uspi, fragment);
if (cgno >= uspi->s_ncg) {
ufs_panic (sb, "ufs_free_blocks", "freeing blocks are outside device");
goto failed_unlock;
@@ -187,9 +199,14 @@ do_more:
fs32_add(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree++;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nbfree, 1);
- cylno = ufs_cbtocylno(i);
- fs16_add(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(i)), 1);
- fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+
+ if (uspi->fs_magic != UFS2_MAGIC) {
+ unsigned cylno = ufs_cbtocylno(i);
+
+ fs16_add(sb, &ubh_cg_blks(ucpi, cylno,
+ ufs_cbtorpos(i)), 1);
+ fs32_add(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+ }
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
@@ -308,15 +325,19 @@ static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
}
}
-unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
- unsigned goal, unsigned count, int * err, struct page *locked_page)
+u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
+ u64 goal, unsigned count, int *err,
+ struct page *locked_page)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
- unsigned cgno, oldcount, newcount, tmp, request, result;
+ unsigned cgno, oldcount, newcount;
+ u64 tmp, request, result;
- UFSD("ENTER, ino %lu, fragment %u, goal %u, count %u\n", inode->i_ino, fragment, goal, count);
+ UFSD("ENTER, ino %lu, fragment %llu, goal %llu, count %u\n",
+ inode->i_ino, (unsigned long long)fragment,
+ (unsigned long long)goal, count);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
@@ -324,11 +345,12 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
*err = -ENOSPC;
lock_super (sb);
-
- tmp = fs32_to_cpu(sb, *p);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
+
if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
- ufs_warning (sb, "ufs_new_fragments", "internal warning"
- " fragment %u, count %u", fragment, count);
+ ufs_warning(sb, "ufs_new_fragments", "internal warning"
+ " fragment %llu, count %u",
+ (unsigned long long)fragment, count);
count = uspi->s_fpb - ufs_fragnum(fragment);
}
oldcount = ufs_fragnum (fragment);
@@ -339,10 +361,12 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
*/
if (oldcount) {
if (!tmp) {
- ufs_error (sb, "ufs_new_fragments", "internal error, "
- "fragment %u, tmp %u\n", fragment, tmp);
- unlock_super (sb);
- return (unsigned)-1;
+ ufs_error(sb, "ufs_new_fragments", "internal error, "
+ "fragment %llu, tmp %llu\n",
+ (unsigned long long)fragment,
+ (unsigned long long)tmp);
+ unlock_super(sb);
+ return INVBLOCK;
}
if (fragment < UFS_I(inode)->i_lastfrag) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
@@ -372,7 +396,7 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
if (goal == 0)
cgno = ufs_inotocg (inode->i_ino);
else
- cgno = ufs_dtog (goal);
+ cgno = ufs_dtog(uspi, goal);
/*
* allocate new fragment
@@ -380,14 +404,16 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
if (oldcount == 0) {
result = ufs_alloc_fragments (inode, cgno, goal, count, err);
if (result) {
- *p = cpu_to_fs32(sb, result);
+ ufs_cpu_to_data_ptr(sb, p, result);
*err = 0;
- UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
- ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
- locked_page != NULL);
+ UFS_I(inode)->i_lastfrag =
+ max_t(u32, UFS_I(inode)->i_lastfrag,
+ fragment + count);
+ ufs_clear_frags(inode, result + oldcount,
+ newcount - oldcount, locked_page != NULL);
}
unlock_super(sb);
- UFSD("EXIT, result %u\n", result);
+ UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -401,7 +427,7 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
unlock_super(sb);
- UFSD("EXIT, result %u\n", result);
+ UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -433,15 +459,14 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
locked_page != NULL);
ufs_change_blocknr(inode, fragment - oldcount, oldcount, tmp,
result, locked_page);
-
- *p = cpu_to_fs32(sb, result);
+ ufs_cpu_to_data_ptr(sb, p, result);
*err = 0;
UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
unlock_super(sb);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
- UFSD("EXIT, result %u\n", result);
+ UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -450,9 +475,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
return 0;
}
-static unsigned
-ufs_add_fragments (struct inode * inode, unsigned fragment,
- unsigned oldcount, unsigned newcount, int * err)
+static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
+ unsigned oldcount, unsigned newcount, int *err)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
@@ -461,14 +485,15 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
struct ufs_cylinder_group * ucg;
unsigned cgno, fragno, fragoff, count, fragsize, i;
- UFSD("ENTER, fragment %u, oldcount %u, newcount %u\n", fragment, oldcount, newcount);
+ UFSD("ENTER, fragment %llu, oldcount %u, newcount %u\n",
+ (unsigned long long)fragment, oldcount, newcount);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first (uspi);
count = newcount - oldcount;
- cgno = ufs_dtog(fragment);
+ cgno = ufs_dtog(uspi, fragment);
if (fs32_to_cpu(sb, UFS_SB(sb)->fs_cs(cgno).cs_nffree) < count)
return 0;
if ((ufs_fragnum (fragment) + newcount) > uspi->s_fpb)
@@ -483,7 +508,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
return 0;
}
- fragno = ufs_dtogd (fragment);
+ fragno = ufs_dtogd(uspi, fragment);
fragoff = ufs_fragnum (fragno);
for (i = oldcount; i < newcount; i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
@@ -521,7 +546,7 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
}
sb->s_dirt = 1;
- UFSD("EXIT, fragment %u\n", fragment);
+ UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
return fragment;
}
@@ -534,17 +559,19 @@ ufs_add_fragments (struct inode * inode, unsigned fragment,
if (fs32_to_cpu(sb, ucg->cg_frsum[k])) \
goto cg_found;
-static unsigned ufs_alloc_fragments (struct inode * inode, unsigned cgno,
- unsigned goal, unsigned count, int * err)
+static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno,
+ u64 goal, unsigned count, int *err)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_cg_private_info * ucpi;
struct ufs_cylinder_group * ucg;
- unsigned oldcg, i, j, k, result, allocsize;
+ unsigned oldcg, i, j, k, allocsize;
+ u64 result;
- UFSD("ENTER, ino %lu, cgno %u, goal %u, count %u\n", inode->i_ino, cgno, goal, count);
+ UFSD("ENTER, ino %lu, cgno %u, goal %llu, count %u\n",
+ inode->i_ino, cgno, (unsigned long long)goal, count);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
@@ -593,7 +620,7 @@ cg_found:
if (count == uspi->s_fpb) {
result = ufs_alloccg_block (inode, ucpi, goal, err);
- if (result == (unsigned)-1)
+ if (result == INVBLOCK)
return 0;
goto succed;
}
@@ -604,9 +631,9 @@ cg_found:
if (allocsize == uspi->s_fpb) {
result = ufs_alloccg_block (inode, ucpi, goal, err);
- if (result == (unsigned)-1)
+ if (result == INVBLOCK)
return 0;
- goal = ufs_dtogd (result);
+ goal = ufs_dtogd(uspi, result);
for (i = count; i < uspi->s_fpb; i++)
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
@@ -620,7 +647,7 @@ cg_found:
}
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
- if (result == (unsigned)-1)
+ if (result == INVBLOCK)
return 0;
if(DQUOT_ALLOC_BLOCK(inode, count)) {
*err = -EDQUOT;
@@ -647,20 +674,21 @@ succed:
sb->s_dirt = 1;
result += cgno * uspi->s_fpg;
- UFSD("EXIT3, result %u\n", result);
+ UFSD("EXIT3, result %llu\n", (unsigned long long)result);
return result;
}
-static unsigned ufs_alloccg_block (struct inode * inode,
- struct ufs_cg_private_info * ucpi, unsigned goal, int * err)
+static u64 ufs_alloccg_block(struct inode *inode,
+ struct ufs_cg_private_info *ucpi,
+ u64 goal, int *err)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_cylinder_group * ucg;
- unsigned result, cylno, blkno;
+ u64 result, blkno;
- UFSD("ENTER, goal %u\n", goal);
+ UFSD("ENTER, goal %llu\n", (unsigned long long)goal);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
@@ -672,7 +700,7 @@ static unsigned ufs_alloccg_block (struct inode * inode,
goto norot;
}
goal = ufs_blknum (goal);
- goal = ufs_dtogd (goal);
+ goal = ufs_dtogd(uspi, goal);
/*
* If the requested block is available, use it.
@@ -684,8 +712,8 @@ static unsigned ufs_alloccg_block (struct inode * inode,
norot:
result = ufs_bitmap_search (sb, ucpi, goal, uspi->s_fpb);
- if (result == (unsigned)-1)
- return (unsigned)-1;
+ if (result == INVBLOCK)
+ return INVBLOCK;
ucpi->c_rotor = result;
gotit:
blkno = ufs_fragstoblks(result);
@@ -694,17 +722,22 @@ gotit:
ufs_clusteracct (sb, ucpi, blkno, -1);
if(DQUOT_ALLOC_BLOCK(inode, uspi->s_fpb)) {
*err = -EDQUOT;
- return (unsigned)-1;
+ return INVBLOCK;
}
fs32_sub(sb, &ucg->cg_cs.cs_nbfree, 1);
uspi->cs_total.cs_nbfree--;
fs32_sub(sb, &UFS_SB(sb)->fs_cs(ucpi->c_cgx).cs_nbfree, 1);
- cylno = ufs_cbtocylno(result);
- fs16_sub(sb, &ubh_cg_blks(ucpi, cylno, ufs_cbtorpos(result)), 1);
- fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+
+ if (uspi->fs_magic != UFS2_MAGIC) {
+ unsigned cylno = ufs_cbtocylno((unsigned)result);
+
+ fs16_sub(sb, &ubh_cg_blks(ucpi, cylno,
+ ufs_cbtorpos((unsigned)result)), 1);
+ fs32_sub(sb, &ubh_cg_blktot(ucpi, cylno), 1);
+ }
- UFSD("EXIT, result %u\n", result);
+ UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -744,9 +777,9 @@ static unsigned ubh_scanc(struct ufs_sb_private_info *uspi,
* @goal: near which block we want find new one
* @count: specified size
*/
-static unsigned ufs_bitmap_search(struct super_block *sb,
- struct ufs_cg_private_info *ucpi,
- unsigned goal, unsigned count)
+static u64 ufs_bitmap_search(struct super_block *sb,
+ struct ufs_cg_private_info *ucpi,
+ u64 goal, unsigned count)
{
/*
* Bit patterns for identifying fragments in the block map
@@ -761,16 +794,18 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_super_block_first *usb1;
struct ufs_cylinder_group *ucg;
- unsigned start, length, loc, result;
+ unsigned start, length, loc;
unsigned pos, want, blockmap, mask, end;
+ u64 result;
- UFSD("ENTER, cg %u, goal %u, count %u\n", ucpi->c_cgx, goal, count);
+ UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx,
+ (unsigned long long)goal, count);
usb1 = ubh_get_usb_first (uspi);
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
if (goal)
- start = ufs_dtogd(goal) >> 3;
+ start = ufs_dtogd(uspi, goal) >> 3;
else
start = ucpi->c_frotor >> 3;
@@ -790,7 +825,7 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
" length %u, count %u, freeoff %u\n",
ucpi->c_cgx, start, length, count,
ucpi->c_freeoff);
- return (unsigned)-1;
+ return INVBLOCK;
}
start = 0;
}
@@ -808,7 +843,8 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
want = want_arr[count];
for (pos = 0; pos <= uspi->s_fpb - count; pos++) {
if ((blockmap & mask) == want) {
- UFSD("EXIT, result %u\n", result);
+ UFSD("EXIT, result %llu\n",
+ (unsigned long long)result);
return result + pos;
}
mask <<= 1;
@@ -819,7 +855,7 @@ static unsigned ufs_bitmap_search(struct super_block *sb,
ufs_error(sb, "ufs_bitmap_search", "block not in map on cg %u\n",
ucpi->c_cgx);
UFSD("EXIT (FAILED)\n");
- return (unsigned)-1;
+ return INVBLOCK;
}
static void ufs_clusteracct(struct super_block * sb,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 433b6f68403a..4890ddf1518e 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -20,7 +20,6 @@
#include <linux/fs.h>
#include <linux/ufs_fs.h>
#include <linux/smp_lock.h>
-#include <linux/sched.h>
#include "swab.h"
#include "util.h"
@@ -106,12 +105,13 @@ static void ufs_check_page(struct page *page)
char *kaddr = page_address(page);
unsigned offs, rec_len;
unsigned limit = PAGE_CACHE_SIZE;
+ const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
struct ufs_dir_entry *p;
char *error;
if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
limit = dir->i_size & ~PAGE_CACHE_MASK;
- if (limit & (UFS_SECTOR_SIZE - 1))
+ if (limit & chunk_mask)
goto Ebadsize;
if (!limit)
goto out;
@@ -126,7 +126,7 @@ static void ufs_check_page(struct page *page)
goto Ealign;
if (rec_len < UFS_DIR_REC_LEN(ufs_get_de_namlen(sb, p)))
goto Enamelen;
- if (((offs + rec_len - 1) ^ offs) & ~(UFS_SECTOR_SIZE-1))
+ if (((offs + rec_len - 1) ^ offs) & ~chunk_mask)
goto Espan;
if (fs32_to_cpu(sb, p->d_ino) > (UFS_SB(sb)->s_uspi->s_ipg *
UFS_SB(sb)->s_uspi->s_ncg))
@@ -310,6 +310,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
int namelen = dentry->d_name.len;
struct super_block *sb = dir->i_sb;
unsigned reclen = UFS_DIR_REC_LEN(namelen);
+ const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
unsigned short rec_len, name_len;
struct page *page = NULL;
struct ufs_dir_entry *de;
@@ -342,8 +343,8 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
if ((char *)de == dir_end) {
/* We hit i_size */
name_len = 0;
- rec_len = UFS_SECTOR_SIZE;
- de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE);
+ rec_len = chunk_size;
+ de->d_reclen = cpu_to_fs16(sb, chunk_size);
de->d_ino = 0;
goto got_it;
}
@@ -431,7 +432,7 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir)
unsigned int offset = pos & ~PAGE_CACHE_MASK;
unsigned long n = pos >> PAGE_CACHE_SHIFT;
unsigned long npages = ufs_dir_pages(inode);
- unsigned chunk_mask = ~(UFS_SECTOR_SIZE - 1);
+ unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
int need_revalidate = filp->f_version != inode->i_version;
unsigned flags = UFS_SB(sb)->s_flags;
@@ -511,7 +512,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
struct super_block *sb = inode->i_sb;
struct address_space *mapping = page->mapping;
char *kaddr = page_address(page);
- unsigned from = ((char*)dir - kaddr) & ~(UFS_SECTOR_SIZE - 1);
+ unsigned from = ((char*)dir - kaddr) & ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
unsigned to = ((char*)dir - kaddr) + fs16_to_cpu(sb, dir->d_reclen);
struct ufs_dir_entry *pde = NULL;
struct ufs_dir_entry *de = (struct ufs_dir_entry *) (kaddr + from);
@@ -556,6 +557,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
struct super_block * sb = dir->i_sb;
struct address_space *mapping = inode->i_mapping;
struct page *page = grab_cache_page(mapping, 0);
+ const unsigned int chunk_size = UFS_SB(sb)->s_uspi->s_dirblksize;
struct ufs_dir_entry * de;
char *base;
int err;
@@ -563,7 +565,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
if (!page)
return -ENOMEM;
kmap(page);
- err = mapping->a_ops->prepare_write(NULL, page, 0, UFS_SECTOR_SIZE);
+ err = mapping->a_ops->prepare_write(NULL, page, 0, chunk_size);
if (err) {
unlock_page(page);
goto fail;
@@ -584,11 +586,11 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
((char *)de + fs16_to_cpu(sb, de->d_reclen));
de->d_ino = cpu_to_fs32(sb, dir->i_ino);
ufs_set_de_type(sb, de, dir->i_mode);
- de->d_reclen = cpu_to_fs16(sb, UFS_SECTOR_SIZE - UFS_DIR_REC_LEN(1));
+ de->d_reclen = cpu_to_fs16(sb, chunk_size - UFS_DIR_REC_LEN(1));
ufs_set_de_namlen(sb, de, 2);
strcpy (de->d_name, "..");
- err = ufs_commit_chunk(page, 0, UFS_SECTOR_SIZE);
+ err = ufs_commit_chunk(page, 0, chunk_size);
fail:
kunmap(page);
page_cache_release(page);
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 2ad1259c6eca..b868878009b6 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -18,6 +18,9 @@
* Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
* Big-endian to little-endian byte-swapping/bitmaps by
* David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * UFS2 write support added by
+ * Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
#include <linux/fs.h>
@@ -126,6 +129,47 @@ void ufs_free_inode (struct inode * inode)
}
/*
+ * Nullify new chunk of inodes,
+ * BSD people also set ui_gen field of inode
+ * during nullification, but we not care about
+ * that because of linux ufs do not support NFS
+ */
+static void ufs2_init_inodes_chunk(struct super_block *sb,
+ struct ufs_cg_private_info *ucpi,
+ struct ufs_cylinder_group *ucg)
+{
+ struct buffer_head *bh;
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+ sector_t beg = uspi->s_sbbase +
+ ufs_inotofsba(ucpi->c_cgx * uspi->s_ipg +
+ fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_initediblk));
+ sector_t end = beg + uspi->s_fpb;
+
+ UFSD("ENTER cgno %d\n", ucpi->c_cgx);
+
+ for (; beg < end; ++beg) {
+ bh = sb_getblk(sb, beg);
+ lock_buffer(bh);
+ memset(bh->b_data, 0, sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bh);
+ brelse(bh);
+ }
+
+ fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
+ ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
+ if (sb->s_flags & MS_SYNCHRONOUS) {
+ ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
+ ubh_wait_on_buffer(UCPI_UBH(ucpi));
+ }
+
+ UFSD("EXIT\n");
+}
+
+/*
* There are two policies for allocating an inode. If the new inode is
* a directory, then a forward search is made for a block group with both
* free space and a low directory-to-inode ratio; if that fails, then of
@@ -146,6 +190,7 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
struct inode * inode;
unsigned cg, bit, i, j, start;
struct ufs_inode_info *ufsi;
+ int err = -ENOSPC;
UFSD("ENTER\n");
@@ -198,13 +243,15 @@ struct inode * ufs_new_inode(struct inode * dir, int mode)
goto cg_found;
}
}
-
+
goto failed;
cg_found:
ucpi = ufs_load_cylinder (sb, cg);
- if (!ucpi)
+ if (!ucpi) {
+ err = -EIO;
goto failed;
+ }
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
if (!ufs_cg_chkmagic(sb, ucg))
ufs_panic (sb, "ufs_new_inode", "internal error, bad cg magic number");
@@ -216,6 +263,7 @@ cg_found:
if (!(bit < start)) {
ufs_error (sb, "ufs_new_inode",
"cylinder group %u corrupted - error in inode bitmap\n", cg);
+ err = -EIO;
goto failed;
}
}
@@ -224,9 +272,18 @@ cg_found:
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_iusedoff, bit);
else {
ufs_panic (sb, "ufs_new_inode", "internal error");
+ err = -EIO;
goto failed;
}
-
+
+ if (uspi->fs_magic == UFS2_MAGIC) {
+ u32 initediblk = fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_initediblk);
+
+ if (bit + uspi->s_inopb > initediblk &&
+ initediblk < fs32_to_cpu(sb, ucg->cg_u.cg_u2.cg_niblk))
+ ufs2_init_inodes_chunk(sb, ucpi, ucg);
+ }
+
fs32_sub(sb, &ucg->cg_cs.cs_nifree, 1);
uspi->cs_total.cs_nifree--;
fs32_sub(sb, &sbi->fs_cs(cg).cs_nifree, 1);
@@ -236,7 +293,6 @@ cg_found:
uspi->cs_total.cs_ndir++;
fs32_add(sb, &sbi->fs_cs(cg).cs_ndir, 1);
}
-
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
if (sb->s_flags & MS_SYNCHRONOUS) {
@@ -245,6 +301,7 @@ cg_found:
}
sb->s_dirt = 1;
+ inode->i_ino = cg * uspi->s_ipg + bit;
inode->i_mode = mode;
inode->i_uid = current->fsuid;
if (dir->i_mode & S_ISGID) {
@@ -254,39 +311,72 @@ cg_found:
} else
inode->i_gid = current->fsgid;
- inode->i_ino = cg * uspi->s_ipg + bit;
inode->i_blocks = 0;
+ inode->i_generation = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
ufsi->i_flags = UFS_I(dir)->i_flags;
ufsi->i_lastfrag = 0;
- ufsi->i_gen = 0;
ufsi->i_shadow = 0;
ufsi->i_osync = 0;
ufsi->i_oeftflag = 0;
ufsi->i_dir_start_lookup = 0;
memset(&ufsi->i_u1, 0, sizeof(ufsi->i_u1));
-
insert_inode_hash(inode);
mark_inode_dirty(inode);
+ if (uspi->fs_magic == UFS2_MAGIC) {
+ struct buffer_head *bh;
+ struct ufs2_inode *ufs2_inode;
+
+ /*
+ * setup birth date, we do it here because of there is no sense
+ * to hold it in struct ufs_inode_info, and lose 64 bit
+ */
+ bh = sb_bread(sb, uspi->s_sbbase + ufs_inotofsba(inode->i_ino));
+ if (!bh) {
+ ufs_warning(sb, "ufs_read_inode",
+ "unable to read inode %lu\n",
+ inode->i_ino);
+ err = -EIO;
+ goto fail_remove_inode;
+ }
+ lock_buffer(bh);
+ ufs2_inode = (struct ufs2_inode *)bh->b_data;
+ ufs2_inode += ufs_inotofsbo(inode->i_ino);
+ ufs2_inode->ui_birthtime.tv_sec =
+ cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec);
+ ufs2_inode->ui_birthtime.tv_usec = 0;
+ mark_buffer_dirty(bh);
+ unlock_buffer(bh);
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ sync_dirty_buffer(bh);
+ brelse(bh);
+ }
+
unlock_super (sb);
if (DQUOT_ALLOC_INODE(inode)) {
DQUOT_DROP(inode);
- inode->i_flags |= S_NOQUOTA;
- inode->i_nlink = 0;
- iput(inode);
- return ERR_PTR(-EDQUOT);
+ err = -EDQUOT;
+ goto fail_without_unlock;
}
UFSD("allocating inode %lu\n", inode->i_ino);
UFSD("EXIT\n");
return inode;
+fail_remove_inode:
+ unlock_super(sb);
+fail_without_unlock:
+ inode->i_flags |= S_NOQUOTA;
+ inode->i_nlink = 0;
+ iput(inode);
+ UFSD("EXIT (FAILED): err %d\n", err);
+ return ERR_PTR(err);
failed:
unlock_super (sb);
make_bad_inode(inode);
iput (inode);
- UFSD("EXIT (FAILED)\n");
- return ERR_PTR(-ENOSPC);
+ UFSD("EXIT (FAILED): err %d\n", err);
+ return ERR_PTR(err);
}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index 4295ca91cf85..fb34ad03e224 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -170,7 +170,7 @@ out:
* @locked_page - for ufs_new_fragments()
*/
static struct buffer_head *
-ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
+ufs_inode_getfrag(struct inode *inode, u64 fragment,
sector_t new_fragment, unsigned int required, int *err,
long *phys, int *new, struct page *locked_page)
{
@@ -178,12 +178,12 @@ ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * result;
- unsigned block, blockoff, lastfrag, lastblock, lastblockoff;
- unsigned tmp, goal;
- __fs32 * p, * p2;
+ unsigned blockoff, lastblockoff;
+ u64 tmp, goal, lastfrag, block, lastblock;
+ void *p, *p2;
- UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, required %u, "
- "metadata %d\n", inode->i_ino, fragment,
+ UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, required %u, "
+ "metadata %d\n", inode->i_ino, (unsigned long long)fragment,
(unsigned long long)new_fragment, required, !phys);
/* TODO : to be done for write support
@@ -193,17 +193,20 @@ ufs_inode_getfrag(struct inode *inode, unsigned int fragment,
block = ufs_fragstoblks (fragment);
blockoff = ufs_fragnum (fragment);
- p = ufsi->i_u1.i_data + block;
+ p = ufs_get_direct_data_ptr(uspi, ufsi, block);
+
goal = 0;
repeat:
- tmp = fs32_to_cpu(sb, *p);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
+
lastfrag = ufsi->i_lastfrag;
if (tmp && fragment < lastfrag) {
if (!phys) {
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
- if (tmp == fs32_to_cpu(sb, *p)) {
- UFSD("EXIT, result %u\n", tmp + blockoff);
+ if (tmp == ufs_data_ptr_to_cpu(sb, p)) {
+ UFSD("EXIT, result %llu\n",
+ (unsigned long long)tmp + blockoff);
return result;
}
brelse (result);
@@ -224,10 +227,11 @@ repeat:
* We must reallocate last allocated block
*/
if (lastblockoff) {
- p2 = ufsi->i_u1.i_data + lastblock;
- tmp = ufs_new_fragments (inode, p2, lastfrag,
- fs32_to_cpu(sb, *p2), uspi->s_fpb - lastblockoff,
- err, locked_page);
+ p2 = ufs_get_direct_data_ptr(uspi, ufsi, lastblock);
+ tmp = ufs_new_fragments(inode, p2, lastfrag,
+ ufs_data_ptr_to_cpu(sb, p2),
+ uspi->s_fpb - lastblockoff,
+ err, locked_page);
if (!tmp) {
if (lastfrag != ufsi->i_lastfrag)
goto repeat;
@@ -237,27 +241,31 @@ repeat:
lastfrag = ufsi->i_lastfrag;
}
- tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[lastblock]);
+ tmp = ufs_data_ptr_to_cpu(sb,
+ ufs_get_direct_data_ptr(uspi, ufsi,
+ lastblock));
if (tmp)
goal = tmp + uspi->s_fpb;
tmp = ufs_new_fragments (inode, p, fragment - blockoff,
goal, required + blockoff,
err,
phys != NULL ? locked_page : NULL);
- }
+ } else if (lastblock == block) {
/*
* We will extend last allocated block
*/
- else if (lastblock == block) {
- tmp = ufs_new_fragments(inode, p, fragment - (blockoff - lastblockoff),
- fs32_to_cpu(sb, *p), required + (blockoff - lastblockoff),
+ tmp = ufs_new_fragments(inode, p, fragment -
+ (blockoff - lastblockoff),
+ ufs_data_ptr_to_cpu(sb, p),
+ required + (blockoff - lastblockoff),
err, phys != NULL ? locked_page : NULL);
} else /* (lastblock > block) */ {
/*
* We will allocate new block before last allocated block
*/
if (block) {
- tmp = fs32_to_cpu(sb, ufsi->i_u1.i_data[block-1]);
+ tmp = ufs_data_ptr_to_cpu(sb,
+ ufs_get_direct_data_ptr(uspi, ufsi, block - 1));
if (tmp)
goal = tmp + uspi->s_fpb;
}
@@ -266,7 +274,7 @@ repeat:
phys != NULL ? locked_page : NULL);
}
if (!tmp) {
- if ((!blockoff && *p) ||
+ if ((!blockoff && ufs_data_ptr_to_cpu(sb, p)) ||
(blockoff && lastfrag != ufsi->i_lastfrag))
goto repeat;
*err = -ENOSPC;
@@ -286,7 +294,7 @@ repeat:
if (IS_SYNC(inode))
ufs_sync_inode (inode);
mark_inode_dirty(inode);
- UFSD("EXIT, result %u\n", tmp + blockoff);
+ UFSD("EXIT, result %llu\n", (unsigned long long)tmp + blockoff);
return result;
/* This part : To be implemented ....
@@ -320,20 +328,22 @@ repeat2:
*/
static struct buffer_head *
ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
- unsigned int fragment, sector_t new_fragment, int *err,
+ u64 fragment, sector_t new_fragment, int *err,
long *phys, int *new, struct page *locked_page)
{
struct super_block *sb = inode->i_sb;
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct buffer_head * result;
- unsigned tmp, goal, block, blockoff;
- __fs32 * p;
+ unsigned blockoff;
+ u64 tmp, goal, block;
+ void *p;
block = ufs_fragstoblks (fragment);
blockoff = ufs_fragnum (fragment);
- UFSD("ENTER, ino %lu, fragment %u, new_fragment %llu, metadata %d\n",
- inode->i_ino, fragment, (unsigned long long)new_fragment, !phys);
+ UFSD("ENTER, ino %lu, fragment %llu, new_fragment %llu, metadata %d\n",
+ inode->i_ino, (unsigned long long)fragment,
+ (unsigned long long)new_fragment, !phys);
result = NULL;
if (!bh)
@@ -344,14 +354,16 @@ ufs_inode_getblock(struct inode *inode, struct buffer_head *bh,
if (!buffer_uptodate(bh))
goto out;
}
-
- p = (__fs32 *) bh->b_data + block;
+ if (uspi->fs_magic == UFS2_MAGIC)
+ p = (__fs64 *)bh->b_data + block;
+ else
+ p = (__fs32 *)bh->b_data + block;
repeat:
- tmp = fs32_to_cpu(sb, *p);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (tmp) {
if (!phys) {
result = sb_getblk(sb, uspi->s_sbbase + tmp + blockoff);
- if (tmp == fs32_to_cpu(sb, *p))
+ if (tmp == ufs_data_ptr_to_cpu(sb, p))
goto out;
brelse (result);
goto repeat;
@@ -361,14 +373,16 @@ repeat:
}
}
- if (block && (tmp = fs32_to_cpu(sb, ((__fs32*)bh->b_data)[block-1])))
+ if (block && (uspi->fs_magic == UFS2_MAGIC ?
+ (tmp = fs64_to_cpu(sb, ((__fs64 *)bh->b_data)[block-1])) :
+ (tmp = fs32_to_cpu(sb, ((__fs32 *)bh->b_data)[block-1]))))
goal = tmp + uspi->s_fpb;
else
goal = bh->b_blocknr + uspi->s_fpb;
tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment), goal,
uspi->s_fpb, err, locked_page);
if (!tmp) {
- if (fs32_to_cpu(sb, *p))
+ if (ufs_data_ptr_to_cpu(sb, p))
goto repeat;
goto out;
}
@@ -386,7 +400,7 @@ repeat:
sync_dirty_buffer(bh);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
- UFSD("result %u\n", tmp + blockoff);
+ UFSD("result %llu\n", (unsigned long long)tmp + blockoff);
out:
brelse (bh);
UFSD("EXIT\n");
@@ -616,8 +630,8 @@ static void ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
+ inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen);
ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
- ufsi->i_gen = fs32_to_cpu(sb, ufs_inode->ui_gen);
ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
@@ -661,8 +675,8 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
inode->i_atime.tv_nsec = 0;
inode->i_ctime.tv_nsec = 0;
inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
+ inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
- ufsi->i_gen = fs32_to_cpu(sb, ufs2_inode->ui_gen);
/*
ufsi->i_shadow = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_shadow);
ufsi->i_oeftflag = fs32_to_cpu(sb, ufs_inode->ui_u3.ui_sun.ui_oeftflag);
@@ -731,34 +745,11 @@ bad_inode:
make_bad_inode(inode);
}
-static int ufs_update_inode(struct inode * inode, int do_sync)
+static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
{
- struct ufs_inode_info *ufsi = UFS_I(inode);
- struct super_block * sb;
- struct ufs_sb_private_info * uspi;
- struct buffer_head * bh;
- struct ufs_inode * ufs_inode;
- unsigned i;
- unsigned flags;
-
- UFSD("ENTER, ino %lu\n", inode->i_ino);
-
- sb = inode->i_sb;
- uspi = UFS_SB(sb)->s_uspi;
- flags = UFS_SB(sb)->s_flags;
-
- if (inode->i_ino < UFS_ROOTINO ||
- inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
- ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
- return -1;
- }
-
- bh = sb_bread(sb, ufs_inotofsba(inode->i_ino));
- if (!bh) {
- ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
- return -1;
- }
- ufs_inode = (struct ufs_inode *) (bh->b_data + ufs_inotofsbo(inode->i_ino) * sizeof(struct ufs_inode));
+ struct super_block *sb = inode->i_sb;
+ struct ufs_inode_info *ufsi = UFS_I(inode);
+ unsigned i;
ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
@@ -775,9 +766,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
ufs_inode->ui_mtime.tv_usec = 0;
ufs_inode->ui_blocks = cpu_to_fs32(sb, inode->i_blocks);
ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
- ufs_inode->ui_gen = cpu_to_fs32(sb, ufsi->i_gen);
+ ufs_inode->ui_gen = cpu_to_fs32(sb, inode->i_generation);
- if ((flags & UFS_UID_MASK) == UFS_UID_EFT) {
+ if ((UFS_SB(sb)->s_flags & UFS_UID_MASK) == UFS_UID_EFT) {
ufs_inode->ui_u3.ui_sun.ui_shadow = cpu_to_fs32(sb, ufsi->i_shadow);
ufs_inode->ui_u3.ui_sun.ui_oeftflag = cpu_to_fs32(sb, ufsi->i_oeftflag);
}
@@ -796,6 +787,78 @@ static int ufs_update_inode(struct inode * inode, int do_sync)
if (!inode->i_nlink)
memset (ufs_inode, 0, sizeof(struct ufs_inode));
+}
+
+static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ufs_inode_info *ufsi = UFS_I(inode);
+ unsigned i;
+
+ UFSD("ENTER\n");
+ ufs_inode->ui_mode = cpu_to_fs16(sb, inode->i_mode);
+ ufs_inode->ui_nlink = cpu_to_fs16(sb, inode->i_nlink);
+
+ ufs_inode->ui_uid = cpu_to_fs32(sb, inode->i_uid);
+ ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid);
+
+ ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
+ ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
+ ufs_inode->ui_atime.tv_usec = 0;
+ ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
+ ufs_inode->ui_ctime.tv_usec = 0;
+ ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
+ ufs_inode->ui_mtime.tv_usec = 0;
+
+ ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks);
+ ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
+ ufs_inode->ui_gen = cpu_to_fs32(sb, inode->i_generation);
+
+ if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+ /* ufs_inode->ui_u2.ui_addr.ui_db[0] = cpu_to_fs32(sb, inode->i_rdev); */
+ ufs_inode->ui_u2.ui_addr.ui_db[0] = ufsi->i_u1.u2_i_data[0];
+ } else if (inode->i_blocks) {
+ for (i = 0; i < (UFS_NDADDR + UFS_NINDIR); i++)
+ ufs_inode->ui_u2.ui_addr.ui_db[i] = ufsi->i_u1.u2_i_data[i];
+ } else {
+ for (i = 0; i < (UFS_NDADDR + UFS_NINDIR) * 4; i++)
+ ufs_inode->ui_u2.ui_symlink[i] = ufsi->i_u1.i_symlink[i];
+ }
+
+ if (!inode->i_nlink)
+ memset (ufs_inode, 0, sizeof(struct ufs2_inode));
+ UFSD("EXIT\n");
+}
+
+static int ufs_update_inode(struct inode * inode, int do_sync)
+{
+ struct super_block *sb = inode->i_sb;
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+ struct buffer_head * bh;
+
+ UFSD("ENTER, ino %lu\n", inode->i_ino);
+
+ if (inode->i_ino < UFS_ROOTINO ||
+ inode->i_ino > (uspi->s_ncg * uspi->s_ipg)) {
+ ufs_warning (sb, "ufs_read_inode", "bad inode number (%lu)\n", inode->i_ino);
+ return -1;
+ }
+
+ bh = sb_bread(sb, ufs_inotofsba(inode->i_ino));
+ if (!bh) {
+ ufs_warning (sb, "ufs_read_inode", "unable to read inode %lu\n", inode->i_ino);
+ return -1;
+ }
+ if (uspi->fs_magic == UFS2_MAGIC) {
+ struct ufs2_inode *ufs2_inode = (struct ufs2_inode *)bh->b_data;
+
+ ufs2_update_inode(inode,
+ ufs2_inode + ufs_inotofsbo(inode->i_ino));
+ } else {
+ struct ufs_inode *ufs_inode = (struct ufs_inode *) bh->b_data;
+
+ ufs1_update_inode(inode, ufs_inode + ufs_inotofsbo(inode->i_ino));
+ }
mark_buffer_dirty(bh);
if (do_sync)
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index e84c0ecf0730..a059ccd064ea 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -355,7 +355,7 @@ out:
return err;
}
-struct inode_operations ufs_dir_inode_operations = {
+const struct inode_operations ufs_dir_inode_operations = {
.create = ufs_create,
.lookup = ufs_lookup,
.link = ufs_link,
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 8a8e9382ec09..b5a6461ec66b 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -61,6 +61,8 @@
* UFS2 (of FreeBSD 5.x) support added by
* Niraj Kumar <niraj17@iitbombay.org>, Jan 2004
*
+ * UFS2 write support added by
+ * Evgeniy Dushistov <dushistov@mail.ru>, 2007
*/
@@ -93,14 +95,16 @@
/*
* Print contents of ufs_super_block, useful for debugging
*/
-static void ufs_print_super_stuff(struct super_block *sb, unsigned flags,
+static void ufs_print_super_stuff(struct super_block *sb,
struct ufs_super_block_first *usb1,
struct ufs_super_block_second *usb2,
struct ufs_super_block_third *usb3)
{
+ u32 magic = fs32_to_cpu(sb, usb3->fs_magic);
+
printk("ufs_print_super_stuff\n");
- printk(" magic: 0x%x\n", fs32_to_cpu(sb, usb3->fs_magic));
- if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) {
+ printk(" magic: 0x%x\n", magic);
+ if (fs32_to_cpu(sb, usb3->fs_magic) == UFS2_MAGIC) {
printk(" fs_size: %llu\n", (unsigned long long)
fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_size));
printk(" fs_dsize: %llu\n", (unsigned long long)
@@ -117,6 +121,12 @@ static void ufs_print_super_stuff(struct super_block *sb, unsigned flags,
printk(" cs_nbfree(No of free blocks): %llu\n",
(unsigned long long)
fs64_to_cpu(sb, usb2->fs_un.fs_u2.cs_nbfree));
+ printk(KERN_INFO" cs_nifree(Num of free inodes): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nifree));
+ printk(KERN_INFO" cs_nffree(Num of free frags): %llu\n",
+ (unsigned long long)
+ fs64_to_cpu(sb, usb3->fs_un1.fs_u2.cs_nffree));
} else {
printk(" sblkno: %u\n", fs32_to_cpu(sb, usb1->fs_sblkno));
printk(" cblkno: %u\n", fs32_to_cpu(sb, usb1->fs_cblkno));
@@ -199,11 +209,11 @@ static void ufs_print_cylinder_stuff(struct super_block *sb,
printk("\n");
}
#else
-# define ufs_print_super_stuff(sb, flags, usb1, usb2, usb3) /**/
+# define ufs_print_super_stuff(sb, usb1, usb2, usb3) /**/
# define ufs_print_cylinder_stuff(sb, cg) /**/
#endif /* CONFIG_UFS_DEBUG */
-static struct super_operations ufs_super_ops;
+static const struct super_operations ufs_super_ops;
static char error_buf[1024];
@@ -422,7 +432,6 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
{
struct ufs_sb_info *sbi = UFS_SB(sb);
struct ufs_sb_private_info *uspi = sbi->s_uspi;
- unsigned flags = sbi->s_flags;
struct ufs_buffer_head * ubh;
unsigned char * base, * space;
unsigned size, blks, i;
@@ -446,11 +455,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb)
if (i + uspi->s_fpb > blks)
size = (blks - i) * uspi->s_fsize;
- if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2)
- ubh = ubh_bread(sb,
- fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr) + i, size);
- else
- ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
+ ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
if (!ubh)
goto failed;
@@ -545,6 +550,7 @@ static void ufs_put_cstotal(struct super_block *sb)
cpu_to_fs32(sb, uspi->cs_total.cs_nffree);
}
ubh_mark_buffer_dirty(USPI_UBH(uspi));
+ ufs_print_super_stuff(sb, usb1, usb2, usb3);
UFSD("EXIT\n");
}
@@ -572,7 +578,9 @@ static void ufs_put_super_internal(struct super_block *sb)
size = uspi->s_bsize;
if (i + uspi->s_fpb > blks)
size = (blks - i) * uspi->s_fsize;
+
ubh = ubh_bread(sb, uspi->s_csaddr + i, size);
+
ubh_memcpyubh (ubh, space, size);
space += size;
ubh_mark_buffer_uptodate (ubh, 1);
@@ -649,7 +657,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
kmalloc (sizeof(struct ufs_sb_private_info), GFP_KERNEL);
if (!uspi)
goto failed;
-
+ uspi->s_dirblksize = UFS_SECTOR_SIZE;
super_block_offset=UFS_SBLOCK;
/* Keep 2Gig file limit. Some UFS variants need to override
@@ -674,10 +682,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
uspi->s_sbsize = super_block_size = 1536;
uspi->s_sbbase = 0;
flags |= UFS_TYPE_UFS2 | UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
- if (!(sb->s_flags & MS_RDONLY)) {
- printk(KERN_INFO "ufstype=ufs2 is supported read-only\n");
- sb->s_flags |= MS_RDONLY;
- }
break;
case UFS_MOUNT_UFSTYPE_SUN:
@@ -718,6 +722,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_NEXTSTEP:
+ /*TODO: check may be we need set special dir block size?*/
UFSD("ufstype=nextstep\n");
uspi->s_fsize = block_size = 1024;
uspi->s_fmask = ~(1024 - 1);
@@ -733,6 +738,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
break;
case UFS_MOUNT_UFSTYPE_NEXTSTEP_CD:
+ /*TODO: check may be we need set special dir block size?*/
UFSD("ufstype=nextstep-cd\n");
uspi->s_fsize = block_size = 2048;
uspi->s_fmask = ~(2048 - 1);
@@ -754,6 +760,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
uspi->s_fshift = 10;
uspi->s_sbsize = super_block_size = 2048;
uspi->s_sbbase = 0;
+ uspi->s_dirblksize = 1024;
flags |= UFS_DE_44BSD | UFS_UID_44BSD | UFS_ST_44BSD | UFS_CG_44BSD;
if (!(sb->s_flags & MS_RDONLY)) {
if (!silent)
@@ -887,7 +894,7 @@ magic_found:
}
- ufs_print_super_stuff(sb, flags, usb1, usb2, usb3);
+ ufs_print_super_stuff(sb, usb1, usb2, usb3);
/*
* Check, if file system was correctly unmounted.
@@ -970,7 +977,12 @@ magic_found:
uspi->s_npsect = ufs_get_fs_npsect(sb, usb1, usb3);
uspi->s_interleave = fs32_to_cpu(sb, usb1->fs_interleave);
uspi->s_trackskew = fs32_to_cpu(sb, usb1->fs_trackskew);
- uspi->s_csaddr = fs32_to_cpu(sb, usb1->fs_csaddr);
+
+ if (uspi->fs_magic == UFS2_MAGIC)
+ uspi->s_csaddr = fs64_to_cpu(sb, usb3->fs_un1.fs_u2.fs_csaddr);
+ else
+ uspi->s_csaddr = fs32_to_cpu(sb, usb1->fs_csaddr);
+
uspi->s_cssize = fs32_to_cpu(sb, usb1->fs_cssize);
uspi->s_cgsize = fs32_to_cpu(sb, usb1->fs_cgsize);
uspi->s_ntrak = fs32_to_cpu(sb, usb1->fs_ntrak);
@@ -1057,7 +1069,6 @@ static void ufs_write_super(struct super_block *sb)
unsigned flags;
lock_kernel();
-
UFSD("ENTER\n");
flags = UFS_SB(sb)->s_flags;
uspi = UFS_SB(sb)->s_uspi;
@@ -1153,7 +1164,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#else
if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
ufstype != UFS_MOUNT_UFSTYPE_44BSD &&
- ufstype != UFS_MOUNT_UFSTYPE_SUNx86) {
+ ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
+ ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
printk("this ufstype is read-only supported\n");
return -EINVAL;
}
@@ -1252,7 +1264,7 @@ static ssize_t ufs_quota_read(struct super_block *, int, char *,size_t, loff_t);
static ssize_t ufs_quota_write(struct super_block *, int, const char *, size_t, loff_t);
#endif
-static struct super_operations ufs_super_ops = {
+static const struct super_operations ufs_super_ops = {
.alloc_inode = ufs_alloc_inode,
.destroy_inode = ufs_destroy_inode,
.read_inode = ufs_read_inode,
diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c
index 337512ed5781..d8549f807e80 100644
--- a/fs/ufs/symlink.c
+++ b/fs/ufs/symlink.c
@@ -36,7 +36,7 @@ static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-struct inode_operations ufs_fast_symlink_inode_operations = {
+const struct inode_operations ufs_fast_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = ufs_follow_link,
};
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 0437b0a6fe97..749581fa7729 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -30,8 +30,8 @@
*/
/*
- * Modified to avoid infinite loop on 2006 by
- * Evgeniy Dushistov <dushistov@mail.ru>
+ * Adoptation to use page cache and UFS2 write support by
+ * Evgeniy Dushistov <dushistov@mail.ru>, 2006-2007
*/
#include <linux/errno.h>
@@ -63,13 +63,13 @@
#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
-static int ufs_trunc_direct (struct inode * inode)
+static int ufs_trunc_direct(struct inode *inode)
{
struct ufs_inode_info *ufsi = UFS_I(inode);
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- __fs32 * p;
- unsigned frag1, frag2, frag3, frag4, block1, block2;
+ void *p;
+ u64 frag1, frag2, frag3, frag4, block1, block2;
unsigned frag_to_free, free_count;
unsigned i, tmp;
int retry;
@@ -91,13 +91,16 @@ static int ufs_trunc_direct (struct inode * inode)
if (frag2 > frag3) {
frag2 = frag4;
frag3 = frag4 = 0;
- }
- else if (frag2 < frag3) {
+ } else if (frag2 < frag3) {
block1 = ufs_fragstoblks (frag2);
block2 = ufs_fragstoblks (frag3);
}
- UFSD("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4);
+ UFSD("frag1 %llu, frag2 %llu, block1 %llu, block2 %llu, frag3 %llu,"
+ " frag4 %llu\n",
+ (unsigned long long)frag1, (unsigned long long)frag2,
+ (unsigned long long)block1, (unsigned long long)block2,
+ (unsigned long long)frag3, (unsigned long long)frag4);
if (frag1 >= frag2)
goto next1;
@@ -105,8 +108,8 @@ static int ufs_trunc_direct (struct inode * inode)
/*
* Free first free fragments
*/
- p = ufsi->i_u1.i_data + ufs_fragstoblks (frag1);
- tmp = fs32_to_cpu(sb, *p);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag1));
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic (sb, "ufs_trunc_direct", "internal error");
frag2 -= frag1;
@@ -121,12 +124,11 @@ next1:
* Free whole blocks
*/
for (i = block1 ; i < block2; i++) {
- p = ufsi->i_u1.i_data + i;
- tmp = fs32_to_cpu(sb, *p);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, i);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
continue;
-
- *p = 0;
+ ufs_data_ptr_clear(uspi, p);
if (free_count == 0) {
frag_to_free = tmp;
@@ -150,13 +152,12 @@ next1:
/*
* Free last free fragments
*/
- p = ufsi->i_u1.i_data + ufs_fragstoblks (frag3);
- tmp = fs32_to_cpu(sb, *p);
+ p = ufs_get_direct_data_ptr(uspi, ufsi, ufs_fragstoblks(frag3));
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp )
ufs_panic(sb, "ufs_truncate_direct", "internal error");
frag4 = ufs_fragnum (frag4);
-
- *p = 0;
+ ufs_data_ptr_clear(uspi, p);
ufs_free_fragments (inode, tmp, frag4);
mark_inode_dirty(inode);
@@ -167,17 +168,20 @@ next1:
}
-static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
+static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
struct ufs_buffer_head * ind_ubh;
- __fs32 * ind;
- unsigned indirect_block, i, tmp;
- unsigned frag_to_free, free_count;
+ void *ind;
+ u64 tmp, indirect_block, i, frag_to_free;
+ unsigned free_count;
int retry;
- UFSD("ENTER\n");
+ UFSD("ENTER: ino %lu, offset %llu, p: %p\n",
+ inode->i_ino, (unsigned long long)offset, p);
+
+ BUG_ON(!p);
sb = inode->i_sb;
uspi = UFS_SB(sb)->s_uspi;
@@ -186,27 +190,27 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
free_count = 0;
retry = 0;
- tmp = fs32_to_cpu(sb, *p);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
return 0;
ind_ubh = ubh_bread(sb, tmp, uspi->s_bsize);
- if (tmp != fs32_to_cpu(sb, *p)) {
+ if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (ind_ubh);
return 1;
}
if (!ind_ubh) {
- *p = 0;
+ ufs_data_ptr_clear(uspi, p);
return 0;
}
indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
for (i = indirect_block; i < uspi->s_apb; i++) {
- ind = ubh_get_addr32 (ind_ubh, i);
- tmp = fs32_to_cpu(sb, *ind);
+ ind = ubh_get_data_ptr(uspi, ind_ubh, i);
+ tmp = ufs_data_ptr_to_cpu(sb, ind);
if (!tmp)
continue;
- *ind = 0;
+ ufs_data_ptr_clear(uspi, ind);
ubh_mark_buffer_dirty(ind_ubh);
if (free_count == 0) {
frag_to_free = tmp;
@@ -226,11 +230,12 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
ufs_free_blocks (inode, frag_to_free, free_count);
}
for (i = 0; i < uspi->s_apb; i++)
- if (*ubh_get_addr32(ind_ubh,i))
+ if (!ufs_is_data_ptr_zero(uspi,
+ ubh_get_data_ptr(uspi, ind_ubh, i)))
break;
if (i >= uspi->s_apb) {
- tmp = fs32_to_cpu(sb, *p);
- *p = 0;
+ tmp = ufs_data_ptr_to_cpu(sb, p);
+ ufs_data_ptr_clear(uspi, p);
ufs_free_blocks (inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
@@ -248,13 +253,13 @@ static int ufs_trunc_indirect (struct inode * inode, unsigned offset, __fs32 *p)
return retry;
}
-static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
+static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
{
struct super_block * sb;
struct ufs_sb_private_info * uspi;
- struct ufs_buffer_head * dind_bh;
- unsigned i, tmp, dindirect_block;
- __fs32 * dind;
+ struct ufs_buffer_head *dind_bh;
+ u64 i, tmp, dindirect_block;
+ void *dind;
int retry = 0;
UFSD("ENTER\n");
@@ -266,22 +271,22 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
retry = 0;
- tmp = fs32_to_cpu(sb, *p);
+ tmp = ufs_data_ptr_to_cpu(sb, p);
if (!tmp)
return 0;
dind_bh = ubh_bread(sb, tmp, uspi->s_bsize);
- if (tmp != fs32_to_cpu(sb, *p)) {
+ if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (dind_bh);
return 1;
}
if (!dind_bh) {
- *p = 0;
+ ufs_data_ptr_clear(uspi, p);
return 0;
}
for (i = dindirect_block ; i < uspi->s_apb ; i++) {
- dind = ubh_get_addr32 (dind_bh, i);
- tmp = fs32_to_cpu(sb, *dind);
+ dind = ubh_get_data_ptr(uspi, dind_bh, i);
+ tmp = ufs_data_ptr_to_cpu(sb, dind);
if (!tmp)
continue;
retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
@@ -289,11 +294,12 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
}
for (i = 0; i < uspi->s_apb; i++)
- if (*ubh_get_addr32 (dind_bh, i))
+ if (!ufs_is_data_ptr_zero(uspi,
+ ubh_get_data_ptr(uspi, dind_bh, i)))
break;
if (i >= uspi->s_apb) {
- tmp = fs32_to_cpu(sb, *p);
- *p = 0;
+ tmp = ufs_data_ptr_to_cpu(sb, p);
+ ufs_data_ptr_clear(uspi, p);
ufs_free_blocks(inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
@@ -311,34 +317,33 @@ static int ufs_trunc_dindirect (struct inode *inode, unsigned offset, __fs32 *p)
return retry;
}
-static int ufs_trunc_tindirect (struct inode * inode)
+static int ufs_trunc_tindirect(struct inode *inode)
{
+ struct super_block *sb = inode->i_sb;
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
struct ufs_inode_info *ufsi = UFS_I(inode);
- struct super_block * sb;
- struct ufs_sb_private_info * uspi;
struct ufs_buffer_head * tind_bh;
- unsigned tindirect_block, tmp, i;
- __fs32 * tind, * p;
+ u64 tindirect_block, tmp, i;
+ void *tind, *p;
int retry;
UFSD("ENTER\n");
- sb = inode->i_sb;
- uspi = UFS_SB(sb)->s_uspi;
retry = 0;
tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
- p = ufsi->i_u1.i_data + UFS_TIND_BLOCK;
- if (!(tmp = fs32_to_cpu(sb, *p)))
+
+ p = ufs_get_direct_data_ptr(uspi, ufsi, UFS_TIND_BLOCK);
+ if (!(tmp = ufs_data_ptr_to_cpu(sb, p)))
return 0;
tind_bh = ubh_bread (sb, tmp, uspi->s_bsize);
- if (tmp != fs32_to_cpu(sb, *p)) {
+ if (tmp != ufs_data_ptr_to_cpu(sb, p)) {
ubh_brelse (tind_bh);
return 1;
}
if (!tind_bh) {
- *p = 0;
+ ufs_data_ptr_clear(uspi, p);
return 0;
}
@@ -349,11 +354,12 @@ static int ufs_trunc_tindirect (struct inode * inode)
ubh_mark_buffer_dirty(tind_bh);
}
for (i = 0; i < uspi->s_apb; i++)
- if (*ubh_get_addr32 (tind_bh, i))
+ if (!ufs_is_data_ptr_zero(uspi,
+ ubh_get_data_ptr(uspi, tind_bh, i)))
break;
if (i >= uspi->s_apb) {
- tmp = fs32_to_cpu(sb, *p);
- *p = 0;
+ tmp = ufs_data_ptr_to_cpu(sb, p);
+ ufs_data_ptr_clear(uspi, p);
ufs_free_blocks(inode, tmp, uspi->s_fpb);
mark_inode_dirty(inode);
@@ -375,7 +381,8 @@ static int ufs_alloc_lastblock(struct inode *inode)
int err = 0;
struct address_space *mapping = inode->i_mapping;
struct ufs_sb_private_info *uspi = UFS_SB(inode->i_sb)->s_uspi;
- unsigned lastfrag, i, end;
+ unsigned i, end;
+ sector_t lastfrag;
struct page *lastpage;
struct buffer_head *bh;
@@ -430,7 +437,9 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
int retry, err = 0;
- UFSD("ENTER\n");
+ UFSD("ENTER: ino %lu, i_size: %llu, old_i_size: %llu\n",
+ inode->i_ino, (unsigned long long)i_size_read(inode),
+ (unsigned long long)old_i_size);
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
@@ -450,10 +459,12 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size)
lock_kernel();
while (1) {
retry = ufs_trunc_direct(inode);
- retry |= ufs_trunc_indirect (inode, UFS_IND_BLOCK,
- (__fs32 *) &ufsi->i_u1.i_data[UFS_IND_BLOCK]);
- retry |= ufs_trunc_dindirect (inode, UFS_IND_BLOCK + uspi->s_apb,
- (__fs32 *) &ufsi->i_u1.i_data[UFS_DIND_BLOCK]);
+ retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK,
+ ufs_get_direct_data_ptr(uspi, ufsi,
+ UFS_IND_BLOCK));
+ retry |= ufs_trunc_dindirect(inode, UFS_IND_BLOCK + uspi->s_apb,
+ ufs_get_direct_data_ptr(uspi, ufsi,
+ UFS_DIND_BLOCK));
retry |= ufs_trunc_tindirect (inode);
if (!retry)
break;
@@ -502,6 +513,6 @@ static int ufs_setattr(struct dentry *dentry, struct iattr *attr)
return inode_setattr(inode, attr);
}
-struct inode_operations ufs_file_inode_operations = {
+const struct inode_operations ufs_file_inode_operations = {
.setattr = ufs_setattr,
};
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 7dd12bb1d62b..06d344839c42 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -305,8 +305,22 @@ static inline void *get_usb_offset(struct ufs_sb_private_info *uspi,
(((__fs32*)((ubh)->bh[(begin) >> (uspi->s_fshift-2)]->b_data)) + \
((begin) & ((uspi->s_fsize>>2) - 1)))
+#define ubh_get_addr64(ubh,begin) \
+ (((__fs64*)((ubh)->bh[(begin) >> (uspi->s_fshift-3)]->b_data)) + \
+ ((begin) & ((uspi->s_fsize>>3) - 1)))
+
#define ubh_get_addr ubh_get_addr8
+static inline void *ubh_get_data_ptr(struct ufs_sb_private_info *uspi,
+ struct ufs_buffer_head *ubh,
+ u64 blk)
+{
+ if (uspi->fs_magic == UFS2_MAGIC)
+ return ubh_get_addr64(ubh, blk);
+ else
+ return ubh_get_addr32(ubh, blk);
+}
+
#define ubh_blkmap(ubh,begin,bit) \
((*ubh_get_addr(ubh, (begin) + ((bit) >> 3)) >> ((bit) & 7)) & (0xff >> (UFS_MAXFRAG - uspi->s_fpb)))
@@ -507,3 +521,46 @@ static inline void ufs_fragacct (struct super_block * sb, unsigned blockmap,
if (fragsize > 0 && fragsize < uspi->s_fpb)
fs32_add(sb, &fraglist[fragsize], cnt);
}
+
+static inline void *ufs_get_direct_data_ptr(struct ufs_sb_private_info *uspi,
+ struct ufs_inode_info *ufsi,
+ unsigned blk)
+{
+ BUG_ON(blk > UFS_TIND_BLOCK);
+ return uspi->fs_magic == UFS2_MAGIC ?
+ (void *)&ufsi->i_u1.u2_i_data[blk] :
+ (void *)&ufsi->i_u1.i_data[blk];
+}
+
+static inline u64 ufs_data_ptr_to_cpu(struct super_block *sb, void *p)
+{
+ return UFS_SB(sb)->s_uspi->fs_magic == UFS2_MAGIC ?
+ fs64_to_cpu(sb, *(__fs64 *)p) :
+ fs32_to_cpu(sb, *(__fs32 *)p);
+}
+
+static inline void ufs_cpu_to_data_ptr(struct super_block *sb, void *p, u64 val)
+{
+ if (UFS_SB(sb)->s_uspi->fs_magic == UFS2_MAGIC)
+ *(__fs64 *)p = cpu_to_fs64(sb, val);
+ else
+ *(__fs32 *)p = cpu_to_fs32(sb, val);
+}
+
+static inline void ufs_data_ptr_clear(struct ufs_sb_private_info *uspi,
+ void *p)
+{
+ if (uspi->fs_magic == UFS2_MAGIC)
+ *(__fs64 *)p = 0;
+ else
+ *(__fs32 *)p = 0;
+}
+
+static inline int ufs_is_data_ptr_zero(struct ufs_sb_private_info *uspi,
+ void *p)
+{
+ if (uspi->fs_magic == UFS2_MAGIC)
+ return *(__fs64 *)p == 0;
+ else
+ return *(__fs32 *)p == 0;
+}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 0afd745a37cd..c28add2fbe95 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -996,7 +996,7 @@ error_inode:
goto out;
}
-static struct inode_operations vfat_dir_inode_operations = {
+static const struct inode_operations vfat_dir_inode_operations = {
.create = vfat_create,
.lookup = vfat_lookup,
.unlink = vfat_unlink,
diff --git a/fs/xattr_acl.c b/fs/xattr_acl.c
index 789a2559bd54..c6ad7c7e3ee9 100644
--- a/fs/xattr_acl.c
+++ b/fs/xattr_acl.c
@@ -6,7 +6,6 @@
*/
#include <linux/module.h>
-#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/posix_acl_xattr.h>
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 004baf600611..ed2b16dff914 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -15,7 +15,6 @@
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index 32e1ce0f04c9..af168a1a98c1 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -31,15 +31,13 @@ typedef struct {
do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
#define mrfree(mrp) do { } while (0)
-#define mraccess(mrp) mraccessf(mrp, 0)
-#define mrupdate(mrp) mrupdatef(mrp, 0)
-static inline void mraccessf(mrlock_t *mrp, int flags)
+static inline void mraccess(mrlock_t *mrp)
{
down_read(&mrp->mr_lock);
}
-static inline void mrupdatef(mrlock_t *mrp, int flags)
+static inline void mrupdate(mrlock_t *mrp)
{
down_write(&mrp->mr_lock);
mrp->mr_writer = 1;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 7b54461695e2..143ffc851c9d 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -56,8 +56,6 @@ xfs_count_page_state(
do {
if (buffer_uptodate(bh) && !buffer_mapped(bh))
(*unmapped) = 1;
- else if (buffer_unwritten(bh) && !buffer_delay(bh))
- clear_buffer_unwritten(bh);
else if (buffer_unwritten(bh))
(*unwritten) = 1;
else if (buffer_delay(bh))
@@ -249,7 +247,7 @@ xfs_map_blocks(
return -error;
}
-STATIC inline int
+STATIC_INLINE int
xfs_iomap_valid(
xfs_iomap_t *iomapp,
loff_t offset)
@@ -1272,7 +1270,6 @@ __xfs_get_blocks(
if (direct)
bh_result->b_private = inode;
set_buffer_unwritten(bh_result);
- set_buffer_delay(bh_result);
}
}
@@ -1283,13 +1280,18 @@ __xfs_get_blocks(
bh_result->b_bdev = iomap.iomap_target->bt_bdev;
/*
- * If we previously allocated a block out beyond eof and we are
- * now coming back to use it then we will need to flag it as new
- * even if it has a disk address.
+ * If we previously allocated a block out beyond eof and we are now
+ * coming back to use it then we will need to flag it as new even if it
+ * has a disk address.
+ *
+ * With sub-block writes into unwritten extents we also need to mark
+ * the buffer as new so that the unwritten parts of the buffer gets
+ * correctly zeroed.
*/
if (create &&
((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
- (offset >= i_size_read(inode)) || (iomap.iomap_flags & IOMAP_NEW)))
+ (offset >= i_size_read(inode)) ||
+ (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
set_buffer_new(bh_result);
if (iomap.iomap_flags & IOMAP_DELAY) {
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 4fb01ffdfd1a..e2bea6a661f0 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -34,13 +34,13 @@
#include <linux/backing-dev.h>
#include <linux/freezer.h>
-STATIC kmem_zone_t *xfs_buf_zone;
-STATIC kmem_shaker_t xfs_buf_shake;
+static kmem_zone_t *xfs_buf_zone;
+static kmem_shaker_t xfs_buf_shake;
STATIC int xfsbufd(void *);
STATIC int xfsbufd_wakeup(int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-STATIC struct workqueue_struct *xfslogd_workqueue;
+static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
#ifdef XFS_BUF_TRACE
@@ -139,7 +139,7 @@ page_region_mask(
return mask;
}
-STATIC inline void
+STATIC_INLINE void
set_page_region(
struct page *page,
size_t offset,
@@ -151,7 +151,7 @@ set_page_region(
SetPageUptodate(page);
}
-STATIC inline int
+STATIC_INLINE int
test_page_region(
struct page *page,
size_t offset,
@@ -171,9 +171,9 @@ typedef struct a_list {
struct a_list *next;
} a_list_t;
-STATIC a_list_t *as_free_head;
-STATIC int as_list_len;
-STATIC DEFINE_SPINLOCK(as_lock);
+static a_list_t *as_free_head;
+static int as_list_len;
+static DEFINE_SPINLOCK(as_lock);
/*
* Try to batch vunmaps because they are costly.
@@ -1085,7 +1085,7 @@ xfs_buf_iostart(
return status;
}
-STATIC __inline__ int
+STATIC_INLINE int
_xfs_buf_iolocked(
xfs_buf_t *bp)
{
@@ -1095,7 +1095,7 @@ _xfs_buf_iolocked(
return 0;
}
-STATIC __inline__ void
+STATIC_INLINE void
_xfs_buf_ioend(
xfs_buf_t *bp,
int schedule)
@@ -1426,8 +1426,8 @@ xfs_free_bufhash(
/*
* buftarg list for delwrite queue processing
*/
-STATIC LIST_HEAD(xfs_buftarg_list);
-STATIC DEFINE_SPINLOCK(xfs_buftarg_lock);
+LIST_HEAD(xfs_buftarg_list);
+static DEFINE_SPINLOCK(xfs_buftarg_lock);
STATIC void
xfs_register_buftarg(
@@ -1679,21 +1679,60 @@ xfsbufd_wakeup(
return 0;
}
+/*
+ * Move as many buffers as specified to the supplied list
+ * idicating if we skipped any buffers to prevent deadlocks.
+ */
+STATIC int
+xfs_buf_delwri_split(
+ xfs_buftarg_t *target,
+ struct list_head *list,
+ unsigned long age)
+{
+ xfs_buf_t *bp, *n;
+ struct list_head *dwq = &target->bt_delwrite_queue;
+ spinlock_t *dwlk = &target->bt_delwrite_lock;
+ int skipped = 0;
+ int force;
+
+ force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+ INIT_LIST_HEAD(list);
+ spin_lock(dwlk);
+ list_for_each_entry_safe(bp, n, dwq, b_list) {
+ XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
+ ASSERT(bp->b_flags & XBF_DELWRI);
+
+ if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!force &&
+ time_before(jiffies, bp->b_queuetime + age)) {
+ xfs_buf_unlock(bp);
+ break;
+ }
+
+ bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
+ _XBF_RUN_QUEUES);
+ bp->b_flags |= XBF_WRITE;
+ list_move_tail(&bp->b_list, list);
+ } else
+ skipped++;
+ }
+ spin_unlock(dwlk);
+
+ return skipped;
+
+}
+
STATIC int
xfsbufd(
- void *data)
+ void *data)
{
- struct list_head tmp;
- unsigned long age;
- xfs_buftarg_t *target = (xfs_buftarg_t *)data;
- xfs_buf_t *bp, *n;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
- int count;
+ struct list_head tmp;
+ xfs_buftarg_t *target = (xfs_buftarg_t *)data;
+ int count;
+ xfs_buf_t *bp;
current->flags |= PF_MEMALLOC;
- INIT_LIST_HEAD(&tmp);
do {
if (unlikely(freezing(current))) {
set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
@@ -1705,37 +1744,17 @@ xfsbufd(
schedule_timeout_interruptible(
xfs_buf_timer_centisecs * msecs_to_jiffies(10));
- count = 0;
- age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- XB_TRACE(bp, "walkq1", (long)xfs_buf_ispin(bp));
- ASSERT(bp->b_flags & XBF_DELWRI);
-
- if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
- if (!test_bit(XBT_FORCE_FLUSH,
- &target->bt_flags) &&
- time_before(jiffies,
- bp->b_queuetime + age)) {
- xfs_buf_unlock(bp);
- break;
- }
-
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
- _XBF_RUN_QUEUES);
- bp->b_flags |= XBF_WRITE;
- list_move_tail(&bp->b_list, &tmp);
- count++;
- }
- }
- spin_unlock(dwlk);
+ xfs_buf_delwri_split(target, &tmp,
+ xfs_buf_age_centisecs * msecs_to_jiffies(10));
+ count = 0;
while (!list_empty(&tmp)) {
bp = list_entry(tmp.next, xfs_buf_t, b_list);
ASSERT(target == bp->b_target);
list_del_init(&bp->b_list);
xfs_buf_iostrategy(bp);
+ count++;
}
if (as_list_len > 0)
@@ -1743,7 +1762,6 @@ xfsbufd(
if (count)
blk_run_address_space(target->bt_mapping);
- clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
} while (!kthread_should_stop());
return 0;
@@ -1756,40 +1774,24 @@ xfsbufd(
*/
int
xfs_flush_buftarg(
- xfs_buftarg_t *target,
- int wait)
+ xfs_buftarg_t *target,
+ int wait)
{
- struct list_head tmp;
- xfs_buf_t *bp, *n;
- int pincount = 0;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
+ struct list_head tmp;
+ xfs_buf_t *bp, *n;
+ int pincount = 0;
xfs_buf_runall_queues(xfsdatad_workqueue);
xfs_buf_runall_queues(xfslogd_workqueue);
- INIT_LIST_HEAD(&tmp);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- ASSERT(bp->b_target == target);
- ASSERT(bp->b_flags & (XBF_DELWRI | _XBF_DELWRI_Q));
- XB_TRACE(bp, "walkq2", (long)xfs_buf_ispin(bp));
- if (xfs_buf_ispin(bp)) {
- pincount++;
- continue;
- }
-
- list_move_tail(&bp->b_list, &tmp);
- }
- spin_unlock(dwlk);
+ set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
+ pincount = xfs_buf_delwri_split(target, &tmp, 0);
/*
* Dropped the delayed write list lock, now walk the temporary list
*/
list_for_each_entry_safe(bp, n, &tmp, b_list) {
- xfs_buf_lock(bp);
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|_XBF_RUN_QUEUES);
- bp->b_flags |= XBF_WRITE;
+ ASSERT(target == bp->b_target);
if (wait)
bp->b_flags &= ~XBF_ASYNC;
else
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 9dd235cb0107..9e8ef8fef39f 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -69,8 +69,8 @@ typedef enum {
} xfs_buf_flags_t;
typedef enum {
- XBT_FORCE_SLEEP = (0 << 1),
- XBT_FORCE_FLUSH = (1 << 1),
+ XBT_FORCE_SLEEP = 0,
+ XBT_FORCE_FLUSH = 1,
} xfs_buftarg_flags_t;
typedef struct xfs_bufhash {
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 5fb75d9151f2..e3a5fedac1ba 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -24,7 +24,7 @@
#include "xfs_mount.h"
#include "xfs_export.h"
-STATIC struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
+static struct dentry dotdot = { .d_name.name = "..", .d_name.len = 2, };
/*
* XFS encodes and decodes the fileid portion of NFS filehandles
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index d26f5cd2ba70..cb51dc961355 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -46,7 +46,7 @@ static struct vm_operations_struct xfs_file_vm_ops;
static struct vm_operations_struct xfs_dmapi_file_vm_ops;
#endif
-STATIC inline ssize_t
+STATIC_INLINE ssize_t
__xfs_file_read(
struct kiocb *iocb,
const struct iovec *iov,
@@ -84,7 +84,7 @@ xfs_file_aio_read_invis(
return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos);
}
-STATIC inline ssize_t
+STATIC_INLINE ssize_t
__xfs_file_write(
struct kiocb *iocb,
const struct iovec *iov,
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index f011c9cd0d62..ff5c41ff8d40 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,8 +41,6 @@
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -355,7 +353,6 @@ STATIC int
xfs_readlink_by_handle(
xfs_mount_t *mp,
void __user *arg,
- struct file *parfilp,
struct inode *parinode)
{
int error;
@@ -388,7 +385,7 @@ xfs_readlink_by_handle(
aiov.iov_len = olen;
aiov.iov_base = hreq.ohandle;
- auio.uio_iov = &aiov;
+ auio.uio_iov = (struct kvec *)&aiov;
auio.uio_iovcnt = 1;
auio.uio_offset = 0;
auio.uio_segflg = UIO_USERSPACE;
@@ -406,7 +403,6 @@ STATIC int
xfs_fssetdm_by_handle(
xfs_mount_t *mp,
void __user *arg,
- struct file *parfilp,
struct inode *parinode)
{
int error;
@@ -448,7 +444,6 @@ STATIC int
xfs_attrlist_by_handle(
xfs_mount_t *mp,
void __user *arg,
- struct file *parfilp,
struct inode *parinode)
{
int error;
@@ -569,7 +564,6 @@ STATIC int
xfs_attrmulti_by_handle(
xfs_mount_t *mp,
void __user *arg,
- struct file *parfilp,
struct inode *parinode)
{
int error;
@@ -689,7 +683,6 @@ xfs_ioc_xattr(
STATIC int
xfs_ioc_getbmap(
bhv_desc_t *bdp,
- struct file *filp,
int flags,
unsigned int cmd,
void __user *arg);
@@ -788,7 +781,7 @@ xfs_ioctl(
case XFS_IOC_GETBMAP:
case XFS_IOC_GETBMAPA:
- return xfs_ioc_getbmap(bdp, filp, ioflags, cmd, arg);
+ return xfs_ioc_getbmap(bdp, ioflags, cmd, arg);
case XFS_IOC_GETBMAPX:
return xfs_ioc_getbmapx(bdp, arg);
@@ -802,16 +795,16 @@ xfs_ioctl(
return xfs_open_by_handle(mp, arg, filp, inode);
case XFS_IOC_FSSETDM_BY_HANDLE:
- return xfs_fssetdm_by_handle(mp, arg, filp, inode);
+ return xfs_fssetdm_by_handle(mp, arg, inode);
case XFS_IOC_READLINK_BY_HANDLE:
- return xfs_readlink_by_handle(mp, arg, filp, inode);
+ return xfs_readlink_by_handle(mp, arg, inode);
case XFS_IOC_ATTRLIST_BY_HANDLE:
- return xfs_attrlist_by_handle(mp, arg, filp, inode);
+ return xfs_attrlist_by_handle(mp, arg, inode);
case XFS_IOC_ATTRMULTI_BY_HANDLE:
- return xfs_attrmulti_by_handle(mp, arg, filp, inode);
+ return xfs_attrmulti_by_handle(mp, arg, inode);
case XFS_IOC_SWAPEXT: {
error = xfs_swapext((struct xfs_swapext __user *)arg);
@@ -1095,11 +1088,6 @@ xfs_ioc_fsgeometry(
/*
* Linux extended inode flags interface.
*/
-#define LINUX_XFLAG_SYNC 0x00000008 /* Synchronous updates */
-#define LINUX_XFLAG_IMMUTABLE 0x00000010 /* Immutable file */
-#define LINUX_XFLAG_APPEND 0x00000020 /* writes to file may only append */
-#define LINUX_XFLAG_NODUMP 0x00000040 /* do not dump file */
-#define LINUX_XFLAG_NOATIME 0x00000080 /* do not update atime */
STATIC unsigned int
xfs_merge_ioc_xflags(
@@ -1108,23 +1096,23 @@ xfs_merge_ioc_xflags(
{
unsigned int xflags = start;
- if (flags & LINUX_XFLAG_IMMUTABLE)
+ if (flags & FS_IMMUTABLE_FL)
xflags |= XFS_XFLAG_IMMUTABLE;
else
xflags &= ~XFS_XFLAG_IMMUTABLE;
- if (flags & LINUX_XFLAG_APPEND)
+ if (flags & FS_APPEND_FL)
xflags |= XFS_XFLAG_APPEND;
else
xflags &= ~XFS_XFLAG_APPEND;
- if (flags & LINUX_XFLAG_SYNC)
+ if (flags & FS_SYNC_FL)
xflags |= XFS_XFLAG_SYNC;
else
xflags &= ~XFS_XFLAG_SYNC;
- if (flags & LINUX_XFLAG_NOATIME)
+ if (flags & FS_NOATIME_FL)
xflags |= XFS_XFLAG_NOATIME;
else
xflags &= ~XFS_XFLAG_NOATIME;
- if (flags & LINUX_XFLAG_NODUMP)
+ if (flags & FS_NODUMP_FL)
xflags |= XFS_XFLAG_NODUMP;
else
xflags &= ~XFS_XFLAG_NODUMP;
@@ -1139,15 +1127,15 @@ xfs_di2lxflags(
unsigned int flags = 0;
if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= LINUX_XFLAG_IMMUTABLE;
+ flags |= FS_IMMUTABLE_FL;
if (di_flags & XFS_DIFLAG_APPEND)
- flags |= LINUX_XFLAG_APPEND;
+ flags |= FS_APPEND_FL;
if (di_flags & XFS_DIFLAG_SYNC)
- flags |= LINUX_XFLAG_SYNC;
+ flags |= FS_SYNC_FL;
if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= LINUX_XFLAG_NOATIME;
+ flags |= FS_NOATIME_FL;
if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= LINUX_XFLAG_NODUMP;
+ flags |= FS_NODUMP_FL;
return flags;
}
@@ -1247,9 +1235,9 @@ xfs_ioc_xattr(
break;
}
- if (flags & ~(LINUX_XFLAG_IMMUTABLE | LINUX_XFLAG_APPEND | \
- LINUX_XFLAG_NOATIME | LINUX_XFLAG_NODUMP | \
- LINUX_XFLAG_SYNC)) {
+ if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
+ FS_NOATIME_FL | FS_NODUMP_FL | \
+ FS_SYNC_FL)) {
error = -EOPNOTSUPP;
break;
}
@@ -1281,7 +1269,6 @@ xfs_ioc_xattr(
STATIC int
xfs_ioc_getbmap(
bhv_desc_t *bdp,
- struct file *filp,
int ioflags,
unsigned int cmd,
void __user *arg)
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 3ba814ae3bba..0b5fa124bef2 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -250,13 +248,13 @@ xfs_init_security(
*
* XXX(hch): nfsd is broken, better fix it instead.
*/
-STATIC inline int
+STATIC_INLINE int
xfs_has_fs_struct(struct task_struct *task)
{
return (task->fs != init_task.fs);
}
-STATIC inline void
+STATIC void
xfs_cleanup_inode(
bhv_vnode_t *dvp,
bhv_vnode_t *vp,
@@ -815,7 +813,7 @@ xfs_vn_removexattr(
}
-struct inode_operations xfs_inode_operations = {
+const struct inode_operations xfs_inode_operations = {
.permission = xfs_vn_permission,
.truncate = xfs_vn_truncate,
.getattr = xfs_vn_getattr,
@@ -826,7 +824,7 @@ struct inode_operations xfs_inode_operations = {
.removexattr = xfs_vn_removexattr,
};
-struct inode_operations xfs_dir_inode_operations = {
+const struct inode_operations xfs_dir_inode_operations = {
.create = xfs_vn_create,
.lookup = xfs_vn_lookup,
.link = xfs_vn_link,
@@ -845,7 +843,7 @@ struct inode_operations xfs_dir_inode_operations = {
.removexattr = xfs_vn_removexattr,
};
-struct inode_operations xfs_symlink_inode_operations = {
+const struct inode_operations xfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = xfs_vn_follow_link,
.put_link = xfs_vn_put_link,
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index ad6173da5678..95a69398fce0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -18,9 +18,9 @@
#ifndef __XFS_IOPS_H__
#define __XFS_IOPS_H__
-extern struct inode_operations xfs_inode_operations;
-extern struct inode_operations xfs_dir_inode_operations;
-extern struct inode_operations xfs_symlink_inode_operations;
+extern const struct inode_operations xfs_inode_operations;
+extern const struct inode_operations xfs_dir_inode_operations;
+extern const struct inode_operations xfs_symlink_inode_operations;
extern const struct file_operations xfs_file_operations;
extern const struct file_operations xfs_dir_file_operations;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 2b0e0018738a..715adad7dd4d 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -109,16 +109,6 @@
#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#endif
-/*
- * State flag for unwritten extent buffers.
- *
- * We need to be able to distinguish between these and delayed
- * allocate buffers within XFS. The generic IO path code does
- * not need to distinguish - we use the BH_Delay flag for both
- * delalloc and these ondisk-uninitialised buffers.
- */
-BUFFER_FNS(PrivateStart, unwritten);
-
#define restricted_chown xfs_params.restrict_chown.val
#define irix_sgid_inherit xfs_params.sgid_inherit.val
#define irix_symlink_mode xfs_params.symlink_mode.val
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 65e79b471d49..ff8d64eba9f8 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
@@ -134,13 +132,11 @@ STATIC int
xfs_iozero(
struct inode *ip, /* inode */
loff_t pos, /* offset in file */
- size_t count, /* size of data to zero */
- loff_t end_size) /* max file size to set */
+ size_t count) /* size of data to zero */
{
unsigned bytes;
struct page *page;
struct address_space *mapping;
- char *kaddr;
int status;
mapping = ip->i_mapping;
@@ -158,26 +154,21 @@ xfs_iozero(
if (!page)
break;
- kaddr = kmap(page);
status = mapping->a_ops->prepare_write(NULL, page, offset,
offset + bytes);
- if (status) {
+ if (status)
goto unlock;
- }
- memset((void *) (kaddr + offset), 0, bytes);
- flush_dcache_page(page);
+ memclear_highpage_flush(page, offset, bytes);
+
status = mapping->a_ops->commit_write(NULL, page, offset,
offset + bytes);
if (!status) {
pos += bytes;
count -= bytes;
- if (pos > i_size_read(ip))
- i_size_write(ip, pos < end_size ? pos : end_size);
}
unlock:
- kunmap(page);
unlock_page(page);
page_cache_release(page);
if (status)
@@ -449,8 +440,8 @@ STATIC int /* error (positive) */
xfs_zero_last_block(
struct inode *ip,
xfs_iocore_t *io,
- xfs_fsize_t isize,
- xfs_fsize_t end_size)
+ xfs_fsize_t offset,
+ xfs_fsize_t isize)
{
xfs_fileoff_t last_fsb;
xfs_mount_t *mp = io->io_mount;
@@ -459,7 +450,6 @@ xfs_zero_last_block(
int zero_len;
int error = 0;
xfs_bmbt_irec_t imap;
- loff_t loff;
ASSERT(ismrlocked(io->io_lock, MR_UPDATE) != 0);
@@ -494,9 +484,10 @@ xfs_zero_last_block(
*/
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
- loff = XFS_FSB_TO_B(mp, last_fsb);
zero_len = mp->m_sb.sb_blocksize - zero_offset;
- error = xfs_iozero(ip, loff + zero_offset, zero_len, end_size);
+ if (isize + zero_len > offset)
+ zero_len = offset - isize;
+ error = xfs_iozero(ip, isize, zero_len);
XFS_ILOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
ASSERT(error >= 0);
@@ -519,14 +510,15 @@ xfs_zero_eof(
bhv_vnode_t *vp,
xfs_iocore_t *io,
xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize, /* current inode size */
- xfs_fsize_t end_size) /* terminal inode size */
+ xfs_fsize_t isize) /* current inode size */
{
struct inode *ip = vn_to_inode(vp);
xfs_fileoff_t start_zero_fsb;
xfs_fileoff_t end_zero_fsb;
xfs_fileoff_t zero_count_fsb;
xfs_fileoff_t last_fsb;
+ xfs_fileoff_t zero_off;
+ xfs_fsize_t zero_len;
xfs_mount_t *mp = io->io_mount;
int nimaps;
int error = 0;
@@ -540,7 +532,7 @@ xfs_zero_eof(
* First handle zeroing the block on which isize resides.
* We only zero a part of that block so it is handled specially.
*/
- error = xfs_zero_last_block(ip, io, isize, end_size);
+ error = xfs_zero_last_block(ip, io, offset, isize);
if (error) {
ASSERT(ismrlocked(io->io_lock, MR_UPDATE));
ASSERT(ismrlocked(io->io_iolock, MR_UPDATE));
@@ -601,10 +593,13 @@ xfs_zero_eof(
*/
XFS_IUNLOCK(mp, io, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
- error = xfs_iozero(ip,
- XFS_FSB_TO_B(mp, start_zero_fsb),
- XFS_FSB_TO_B(mp, imap.br_blockcount),
- end_size);
+ zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
+ zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
+
+ if ((zero_off + zero_len) > offset)
+ zero_len = offset - zero_off;
+
+ error = xfs_iozero(ip, zero_off, zero_len);
if (error) {
goto out_lock;
}
@@ -783,8 +778,7 @@ start:
*/
if (pos > isize) {
- error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos,
- isize, pos + count);
+ error = xfs_zero_eof(BHV_TO_VNODE(bdp), io, pos, isize);
if (error) {
xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
goto out_unlock_mutex;
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index c77e62efb742..7ac51b1d2161 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -83,7 +83,7 @@ extern int xfs_bdstrat_cb(struct xfs_buf *);
extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
extern int xfs_zero_eof(struct bhv_vnode *, struct xfs_iocore *, xfs_off_t,
- xfs_fsize_t, xfs_fsize_t);
+ xfs_fsize_t);
extern ssize_t xfs_read(struct bhv_desc *, struct kiocb *,
const struct iovec *, unsigned int,
loff_t *, int, struct cred *);
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index b93265b7c79c..1a4103ca593c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -58,10 +56,10 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
-STATIC struct quotactl_ops xfs_quotactl_operations;
-STATIC struct super_operations xfs_super_operations;
-STATIC kmem_zone_t *xfs_vnode_zone;
-STATIC kmem_zone_t *xfs_ioend_zone;
+static struct quotactl_ops xfs_quotactl_operations;
+static struct super_operations xfs_super_operations;
+static kmem_zone_t *xfs_vnode_zone;
+static kmem_zone_t *xfs_ioend_zone;
mempool_t *xfs_ioend_pool;
STATIC struct xfs_mount_args *
@@ -121,7 +119,7 @@ xfs_max_file_offset(
return (((__uint64_t)pagefactor) << bitshift) - 1;
}
-STATIC __inline__ void
+STATIC_INLINE void
xfs_set_inodeops(
struct inode *inode)
{
@@ -147,7 +145,7 @@ xfs_set_inodeops(
}
}
-STATIC __inline__ void
+STATIC_INLINE void
xfs_revalidate_inode(
xfs_mount_t *mp,
bhv_vnode_t *vp,
@@ -553,7 +551,6 @@ vfs_sync_worker(
error = bhv_vfs_sync(vfsp, SYNC_FSDATA | SYNC_BDFLUSH | \
SYNC_ATTR | SYNC_REFCACHE, NULL);
vfsp->vfs_sync_seq++;
- wmb();
wake_up(&vfsp->vfs_wait_single_sync_task);
}
@@ -659,9 +656,17 @@ xfs_fs_sync_super(
int error;
int flags;
- if (unlikely(sb->s_frozen == SB_FREEZE_WRITE))
- flags = SYNC_QUIESCE;
- else
+ if (unlikely(sb->s_frozen == SB_FREEZE_WRITE)) {
+ /*
+ * First stage of freeze - no more writers will make progress
+ * now we are here, so we flush delwri and delalloc buffers
+ * here, then wait for all I/O to complete. Data is frozen at
+ * that point. Metadata is not frozen, transactions can still
+ * occur here so don't bother flushing the buftarg (i.e
+ * SYNC_QUIESCE) because it'll just get dirty again.
+ */
+ flags = SYNC_FSDATA | SYNC_DELWRI | SYNC_WAIT | SYNC_IOWAIT;
+ } else
flags = SYNC_FSDATA | (wait ? SYNC_WAIT : 0);
error = bhv_vfs_sync(vfsp, flags, NULL);
@@ -873,7 +878,7 @@ xfs_fs_get_sb(
mnt);
}
-STATIC struct super_operations xfs_super_operations = {
+static struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
.write_inode = xfs_fs_write_inode,
@@ -887,7 +892,7 @@ STATIC struct super_operations xfs_super_operations = {
.show_options = xfs_fs_show_options,
};
-STATIC struct quotactl_ops xfs_quotactl_operations = {
+static struct quotactl_ops xfs_quotactl_operations = {
.quota_sync = xfs_fs_quotasync,
.get_xstate = xfs_fs_getxstate,
.set_xstate = xfs_fs_setxstate,
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index af246532fbfb..cd6eaa44aa2b 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -54,102 +54,204 @@ xfs_stats_clear_proc_handler(
}
#endif /* CONFIG_PROC_FS */
-STATIC ctl_table xfs_table[] = {
- {XFS_RESTRICT_CHOWN, "restrict_chown", &xfs_params.restrict_chown.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.restrict_chown.min, &xfs_params.restrict_chown.max},
-
- {XFS_SGID_INHERIT, "irix_sgid_inherit", &xfs_params.sgid_inherit.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.sgid_inherit.min, &xfs_params.sgid_inherit.max},
-
- {XFS_SYMLINK_MODE, "irix_symlink_mode", &xfs_params.symlink_mode.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.symlink_mode.min, &xfs_params.symlink_mode.max},
-
- {XFS_PANIC_MASK, "panic_mask", &xfs_params.panic_mask.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.panic_mask.min, &xfs_params.panic_mask.max},
-
- {XFS_ERRLEVEL, "error_level", &xfs_params.error_level.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.error_level.min, &xfs_params.error_level.max},
-
- {XFS_SYNCD_TIMER, "xfssyncd_centisecs", &xfs_params.syncd_timer.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.syncd_timer.min, &xfs_params.syncd_timer.max},
-
- {XFS_INHERIT_SYNC, "inherit_sync", &xfs_params.inherit_sync.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_sync.min, &xfs_params.inherit_sync.max},
-
- {XFS_INHERIT_NODUMP, "inherit_nodump", &xfs_params.inherit_nodump.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_nodump.min, &xfs_params.inherit_nodump.max},
-
- {XFS_INHERIT_NOATIME, "inherit_noatime", &xfs_params.inherit_noatim.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_noatim.min, &xfs_params.inherit_noatim.max},
-
- {XFS_BUF_TIMER, "xfsbufd_centisecs", &xfs_params.xfs_buf_timer.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.xfs_buf_timer.min, &xfs_params.xfs_buf_timer.max},
-
- {XFS_BUF_AGE, "age_buffer_centisecs", &xfs_params.xfs_buf_age.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.xfs_buf_age.min, &xfs_params.xfs_buf_age.max},
-
- {XFS_INHERIT_NOSYM, "inherit_nosymlinks", &xfs_params.inherit_nosym.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_nosym.min, &xfs_params.inherit_nosym.max},
-
- {XFS_ROTORSTEP, "rotorstep", &xfs_params.rotorstep.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.rotorstep.min, &xfs_params.rotorstep.max},
-
- {XFS_INHERIT_NODFRG, "inherit_nodefrag", &xfs_params.inherit_nodfrg.val,
- sizeof(int), 0644, NULL, &proc_dointvec_minmax,
- &sysctl_intvec, NULL,
- &xfs_params.inherit_nodfrg.min, &xfs_params.inherit_nodfrg.max},
+static ctl_table xfs_table[] = {
+ {
+ .ctl_name = XFS_RESTRICT_CHOWN,
+ .procname = "restrict_chown",
+ .data = &xfs_params.restrict_chown.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.restrict_chown.min,
+ .extra2 = &xfs_params.restrict_chown.max
+ },
+ {
+ .ctl_name = XFS_SGID_INHERIT,
+ .procname = "irix_sgid_inherit",
+ .data = &xfs_params.sgid_inherit.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.sgid_inherit.min,
+ .extra2 = &xfs_params.sgid_inherit.max
+ },
+ {
+ .ctl_name = XFS_SYMLINK_MODE,
+ .procname = "irix_symlink_mode",
+ .data = &xfs_params.symlink_mode.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.symlink_mode.min,
+ .extra2 = &xfs_params.symlink_mode.max
+ },
+ {
+ .ctl_name = XFS_PANIC_MASK,
+ .procname = "panic_mask",
+ .data = &xfs_params.panic_mask.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.panic_mask.min,
+ .extra2 = &xfs_params.panic_mask.max
+ },
+ {
+ .ctl_name = XFS_ERRLEVEL,
+ .procname = "error_level",
+ .data = &xfs_params.error_level.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.error_level.min,
+ .extra2 = &xfs_params.error_level.max
+ },
+ {
+ .ctl_name = XFS_SYNCD_TIMER,
+ .procname = "xfssyncd_centisecs",
+ .data = &xfs_params.syncd_timer.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.syncd_timer.min,
+ .extra2 = &xfs_params.syncd_timer.max
+ },
+ {
+ .ctl_name = XFS_INHERIT_SYNC,
+ .procname = "inherit_sync",
+ .data = &xfs_params.inherit_sync.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.inherit_sync.min,
+ .extra2 = &xfs_params.inherit_sync.max
+ },
+ {
+ .ctl_name = XFS_INHERIT_NODUMP,
+ .procname = "inherit_nodump",
+ .data = &xfs_params.inherit_nodump.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.inherit_nodump.min,
+ .extra2 = &xfs_params.inherit_nodump.max
+ },
+ {
+ .ctl_name = XFS_INHERIT_NOATIME,
+ .procname = "inherit_noatime",
+ .data = &xfs_params.inherit_noatim.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.inherit_noatim.min,
+ .extra2 = &xfs_params.inherit_noatim.max
+ },
+ {
+ .ctl_name = XFS_BUF_TIMER,
+ .procname = "xfsbufd_centisecs",
+ .data = &xfs_params.xfs_buf_timer.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.xfs_buf_timer.min,
+ .extra2 = &xfs_params.xfs_buf_timer.max
+ },
+ {
+ .ctl_name = XFS_BUF_AGE,
+ .procname = "age_buffer_centisecs",
+ .data = &xfs_params.xfs_buf_age.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.xfs_buf_age.min,
+ .extra2 = &xfs_params.xfs_buf_age.max
+ },
+ {
+ .ctl_name = XFS_INHERIT_NOSYM,
+ .procname = "inherit_nosymlinks",
+ .data = &xfs_params.inherit_nosym.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.inherit_nosym.min,
+ .extra2 = &xfs_params.inherit_nosym.max
+ },
+ {
+ .ctl_name = XFS_ROTORSTEP,
+ .procname = "rotorstep",
+ .data = &xfs_params.rotorstep.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.rotorstep.min,
+ .extra2 = &xfs_params.rotorstep.max
+ },
+ {
+ .ctl_name = XFS_INHERIT_NODFRG,
+ .procname = "inherit_nodefrag",
+ .data = &xfs_params.inherit_nodfrg.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec_minmax,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.inherit_nodfrg.min,
+ .extra2 = &xfs_params.inherit_nodfrg.max
+ },
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
- {XFS_STATS_CLEAR, "stats_clear", &xfs_params.stats_clear.val,
- sizeof(int), 0644, NULL, &xfs_stats_clear_proc_handler,
- &sysctl_intvec, NULL,
- &xfs_params.stats_clear.min, &xfs_params.stats_clear.max},
+ {
+ .ctl_name = XFS_STATS_CLEAR,
+ .procname = "stats_clear",
+ .data = &xfs_params.stats_clear.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &xfs_stats_clear_proc_handler,
+ .strategy = &sysctl_intvec,
+ .extra1 = &xfs_params.stats_clear.min,
+ .extra2 = &xfs_params.stats_clear.max
+ },
#endif /* CONFIG_PROC_FS */
- {0}
+ {}
};
-STATIC ctl_table xfs_dir_table[] = {
- {FS_XFS, "xfs", NULL, 0, 0555, xfs_table},
- {0}
+static ctl_table xfs_dir_table[] = {
+ {
+ .ctl_name = FS_XFS,
+ .procname = "xfs",
+ .mode = 0555,
+ .child = xfs_table
+ },
+ {}
};
-STATIC ctl_table xfs_root_table[] = {
- {CTL_FS, "fs", NULL, 0, 0555, xfs_dir_table},
- {0}
+static ctl_table xfs_root_table[] = {
+ {
+ .ctl_name = CTL_FS,
+ .procname = "fs",
+ .mode = 0555,
+ .child = xfs_dir_table
+ },
+ {}
};
void
xfs_sysctl_register(void)
{
- xfs_table_header = register_sysctl_table(xfs_root_table, 1);
+ xfs_table_header = register_sysctl_table(xfs_root_table);
}
void
diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h
index da255bdf5260..e2c2ce98ab5b 100644
--- a/fs/xfs/linux-2.6/xfs_vfs.h
+++ b/fs/xfs/linux-2.6/xfs_vfs.h
@@ -91,7 +91,7 @@ typedef enum {
#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */
+#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */
#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */
#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index 553fa731ade5..ada24baf88de 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -26,7 +26,7 @@ DEFINE_SPINLOCK(vnumber_lock);
*/
#define NVSYNC 37
#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC])
-STATIC wait_queue_head_t vsync[NVSYNC];
+static wait_queue_head_t vsync[NVSYNC];
void
vn_init(void)
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 515f5fdea57a..b76118cf4897 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -489,14 +489,14 @@ static inline struct bhv_vnode *vn_grab(struct bhv_vnode *vp)
#define VN_LOCK(vp) mutex_spinlock(&(vp)->v_lock)
#define VN_UNLOCK(vp, s) mutex_spinunlock(&(vp)->v_lock, s)
-static __inline__ void vn_flagset(struct bhv_vnode *vp, uint flag)
+STATIC_INLINE void vn_flagset(struct bhv_vnode *vp, uint flag)
{
spin_lock(&vp->v_lock);
vp->v_flag |= flag;
spin_unlock(&vp->v_lock);
}
-static __inline__ uint vn_flagclr(struct bhv_vnode *vp, uint flag)
+STATIC_INLINE uint vn_flagclr(struct bhv_vnode *vp, uint flag)
{
uint cleared;
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 3aa771531856..4adaf13aac6f 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -484,7 +482,7 @@ xfs_qm_dqalloc(
xfs_trans_bhold(tp, bp);
- if ((error = xfs_bmap_finish(tpp, &flist, firstblock, &committed))) {
+ if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
goto error1;
}
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 33ad5af386e0..ddb61fe22a5c 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -399,7 +397,7 @@ xfs_qm_dquot_logitem_committing(
/*
* This is the ops vector for dquots
*/
-STATIC struct xfs_item_ops xfs_dquot_item_ops = {
+static struct xfs_item_ops xfs_dquot_item_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_dquot_logitem_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_qm_dquot_logitem_format,
@@ -606,7 +604,7 @@ xfs_qm_qoffend_logitem_committing(xfs_qoff_logitem_t *qip, xfs_lsn_t commit_lsn)
return;
}
-STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
+static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_qm_qoff_logitem_format,
@@ -628,7 +626,7 @@ STATIC struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
/*
* This is the ops vector shared by all quotaoff-start log items.
*/
-STATIC struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
+static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_qm_qoff_logitem_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_qm_qoff_logitem_format,
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 7c6a3a50379e..1de2acdc7f70 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -44,8 +44,6 @@
#include "xfs_bmap.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -64,10 +62,10 @@ uint ndquot;
kmem_zone_t *qm_dqzone;
kmem_zone_t *qm_dqtrxzone;
-STATIC kmem_shaker_t xfs_qm_shaker;
+static kmem_shaker_t xfs_qm_shaker;
-STATIC cred_t xfs_zerocr;
-STATIC xfs_inode_t xfs_zeroino;
+static cred_t xfs_zerocr;
+static xfs_inode_t xfs_zeroino;
STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index db8872be8c87..d2cdb8a2aad6 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -44,8 +44,6 @@
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
@@ -384,7 +382,7 @@ xfs_qm_dqrele_null(
}
-STATIC struct xfs_qmops xfs_qmcore_xfs = {
+static struct xfs_qmops xfs_qmcore_xfs = {
.xfs_qminit = xfs_qm_newmount,
.xfs_qmdone = xfs_qm_unmount_quotadestroy,
.xfs_qmmount = xfs_qm_endmount,
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 6f858fb81a36..709f5f545cf5 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -43,8 +43,6 @@
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index ed620c4d1594..716f562aa8b2 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -46,8 +46,6 @@
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -134,7 +132,7 @@ xfs_qm_quotactl(
break;
case Q_XQUOTASYNC:
- return (xfs_sync_inodes(mp, SYNC_DELWRI, 0, NULL));
+ return (xfs_sync_inodes(mp, SYNC_DELWRI, NULL));
default:
break;
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 0242e9666e8e..d7491e7b1f3b 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -43,8 +43,6 @@
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
diff --git a/fs/xfs/support/debug.c b/fs/xfs/support/debug.c
index 4363512d2f90..08bbd3cb87ae 100644
--- a/fs/xfs/support/debug.c
+++ b/fs/xfs/support/debug.c
@@ -19,7 +19,7 @@
#include "debug.h"
#include "spin.h"
-static char message[256]; /* keep it off the stack */
+static char message[1024]; /* keep it off the stack */
static DEFINE_SPINLOCK(xfs_err_lock);
/* Translate from CE_FOO to KERN_FOO, err_level(CE_FOO) == KERN_FOO */
@@ -44,13 +44,14 @@ cmn_err(register int level, char *fmt, ...)
spin_lock_irqsave(&xfs_err_lock,flags);
va_start(ap, fmt);
if (*fmt == '!') fp++;
- len = vsprintf(message, fp, ap);
- if (level != CE_DEBUG && message[len-1] != '\n')
- strcat(message, "\n");
- printk("%s%s", err_level[level], message);
+ len = vsnprintf(message, sizeof(message), fp, ap);
+ if (len >= sizeof(message))
+ len = sizeof(message) - 1;
+ if (message[len-1] == '\n')
+ message[len-1] = 0;
+ printk("%s%s\n", err_level[level], message);
va_end(ap);
spin_unlock_irqrestore(&xfs_err_lock,flags);
-
BUG_ON(level == CE_PANIC);
}
@@ -64,11 +65,13 @@ icmn_err(register int level, char *fmt, va_list ap)
if(level > XFS_MAX_ERR_LEVEL)
level = XFS_MAX_ERR_LEVEL;
spin_lock_irqsave(&xfs_err_lock,flags);
- len = vsprintf(message, fmt, ap);
- if (level != CE_DEBUG && message[len-1] != '\n')
- strcat(message, "\n");
+ len = vsnprintf(message, sizeof(message), fmt, ap);
+ if (len >= sizeof(message))
+ len = sizeof(message) - 1;
+ if (message[len-1] == '\n')
+ message[len-1] = 0;
+ printk("%s%s\n", err_level[level], message);
spin_unlock_irqrestore(&xfs_err_lock,flags);
- printk("%s%s", err_level[level], message);
BUG_ON(level == CE_PANIC);
}
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 4f54dca662a8..2a70cc605ae3 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -38,13 +38,37 @@ extern void assfail(char *expr, char *f, int l);
#ifndef DEBUG
# define ASSERT(expr) ((void)0)
-#else
+
+#ifndef STATIC
+# define STATIC static noinline
+#endif
+
+#ifndef STATIC_INLINE
+# define STATIC_INLINE static inline
+#endif
+
+#else /* DEBUG */
+
# define ASSERT(expr) ASSERT_ALWAYS(expr)
extern unsigned long random(void);
-#endif
#ifndef STATIC
-# define STATIC static
+# define STATIC noinline
#endif
+/*
+ * We stop inlining of inline functions in debug mode.
+ * Unfortunately, this means static inline in header files
+ * get multiple definitions, so they need to remain static.
+ * This then gives tonnes of warnings about unused but defined
+ * functions, so we need to add the unused attribute to prevent
+ * these spurious warnings.
+ */
+#ifndef STATIC_INLINE
+# define STATIC_INLINE static __attribute__ ((unused)) noinline
+#endif
+
+#endif /* DEBUG */
+
+
#endif /* __XFS_SUPPORT_DEBUG_H__ */
diff --git a/fs/xfs/support/move.h b/fs/xfs/support/move.h
index 977879c24ff5..324e413deadd 100644
--- a/fs/xfs/support/move.h
+++ b/fs/xfs/support/move.h
@@ -55,7 +55,7 @@ enum uio_seg {
};
struct uio {
- struct iovec *uio_iov; /* pointer to array of iovecs */
+ struct kvec *uio_iov; /* pointer to array of iovecs */
int uio_iovcnt; /* number of iovecs in array */
xfs_off_t uio_offset; /* offset in file this uio corresponds to */
int uio_resid; /* residual i/o count */
@@ -63,7 +63,7 @@ struct uio {
};
typedef struct uio uio_t;
-typedef struct iovec iovec_t;
+typedef struct kvec iovec_t;
extern int xfs_uio_read (caddr_t, size_t, uio_t *);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4b0cb474be4c..4ca4beb7bb54 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -31,7 +31,6 @@
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_acl.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include <linux/capability.h>
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index bce81c7a4fdc..5bd1a2c8bd07 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -58,7 +58,6 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
/*
* Real block structures have a size equal to the disk block size.
*/
-#define XFS_ALLOC_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
#define XFS_ALLOC_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_alloc_mxr[lev != 0])
#define XFS_ALLOC_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_alloc_mnr[lev != 0])
@@ -87,16 +86,13 @@ typedef struct xfs_btree_sblock xfs_alloc_block_t;
* Record, key, and pointer address macros for btree blocks.
*/
#define XFS_ALLOC_REC_ADDR(bb,i,cur) \
- XFS_BTREE_REC_ADDR(XFS_ALLOC_BLOCK_SIZE(0,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(0, cur))
+ XFS_BTREE_REC_ADDR(xfs_alloc, bb, i)
#define XFS_ALLOC_KEY_ADDR(bb,i,cur) \
- XFS_BTREE_KEY_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+ XFS_BTREE_KEY_ADDR(xfs_alloc, bb, i)
#define XFS_ALLOC_PTR_ADDR(bb,i,cur) \
- XFS_BTREE_PTR_ADDR(XFS_ALLOC_BLOCK_SIZE(1,cur), xfs_alloc, \
- bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
+ XFS_BTREE_PTR_ADDR(xfs_alloc, bb, i, XFS_ALLOC_BLOCK_MAXRECS(1, cur))
/*
* Decrement cursor by one record at the level.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 9ada7bdbae52..9d358ffce4e5 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -57,9 +57,9 @@
*/
#define ATTR_SYSCOUNT 2
-STATIC struct attrnames posix_acl_access;
-STATIC struct attrnames posix_acl_default;
-STATIC struct attrnames *attr_system_names[ATTR_SYSCOUNT];
+static struct attrnames posix_acl_access;
+static struct attrnames posix_acl_default;
+static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
/*========================================================================
* Function prototypes for the kernel.
@@ -199,18 +199,14 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
return (error);
/*
- * Determine space new attribute will use, and if it would be
- * "local" or "remote" (note: local != inline).
- */
- size = xfs_attr_leaf_newentsize(namelen, valuelen,
- mp->m_sb.sb_blocksize, &local);
-
- /*
* If the inode doesn't have an attribute fork, add one.
* (inode must not be locked when we call this routine)
*/
if (XFS_IFORK_Q(dp) == 0) {
- if ((error = xfs_bmap_add_attrfork(dp, size, rsvd)))
+ int sf_size = sizeof(xfs_attr_sf_hdr_t) +
+ XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
+
+ if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
return(error);
}
@@ -231,6 +227,13 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
args.addname = 1;
args.oknoent = 1;
+ /*
+ * Determine space new attribute will use, and if it would be
+ * "local" or "remote" (note: local != inline).
+ */
+ size = xfs_attr_leaf_newentsize(namelen, valuelen,
+ mp->m_sb.sb_blocksize, &local);
+
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
if (local) {
if (size > (mp->m_sb.sb_blocksize >> 1)) {
@@ -346,7 +349,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
error = xfs_attr_shortform_to_leaf(&args);
if (!error) {
error = xfs_bmap_finish(&args.trans, args.flist,
- *args.firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -973,7 +976,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
error = xfs_attr_leaf_to_node(args);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -1074,7 +1077,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- *args->firstblock,
&committed);
}
if (error) {
@@ -1152,7 +1154,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
/* bp is gone due to xfs_da_shrink_inode */
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -1307,7 +1309,6 @@ restart:
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- *args->firstblock,
&committed);
}
if (error) {
@@ -1347,7 +1348,7 @@ restart:
error = xfs_da_split(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -1459,7 +1460,6 @@ restart:
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- *args->firstblock,
&committed);
}
if (error) {
@@ -1594,7 +1594,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
error = xfs_da_join(state);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -1646,7 +1646,6 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (!error) {
error = xfs_bmap_finish(&args->trans,
args->flist,
- *args->firstblock,
&committed);
}
if (error) {
@@ -2090,7 +2089,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
args->flist, NULL);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -2246,7 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
NULL, &done);
if (!error) {
error = xfs_bmap_finish(&args->trans, args->flist,
- *args->firstblock, &committed);
+ &committed);
}
if (error) {
ASSERT(committed);
@@ -2477,7 +2476,7 @@ posix_acl_default_exists(
return xfs_acl_vhasacl_default(vp);
}
-STATIC struct attrnames posix_acl_access = {
+static struct attrnames posix_acl_access = {
.attr_name = "posix_acl_access",
.attr_namelen = sizeof("posix_acl_access") - 1,
.attr_get = posix_acl_access_get,
@@ -2486,7 +2485,7 @@ STATIC struct attrnames posix_acl_access = {
.attr_exists = posix_acl_access_exists,
};
-STATIC struct attrnames posix_acl_default = {
+static struct attrnames posix_acl_default = {
.attr_name = "posix_acl_default",
.attr_namelen = sizeof("posix_acl_default") - 1,
.attr_get = posix_acl_default_get,
@@ -2495,7 +2494,7 @@ STATIC struct attrnames posix_acl_default = {
.attr_exists = posix_acl_default_exists,
};
-STATIC struct attrnames *attr_system_names[] =
+static struct attrnames *attr_system_names[] =
{ &posix_acl_access, &posix_acl_default };
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 9719bbef122c..8eab73e8340a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,7 +94,7 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
* Namespace helper routines
*========================================================================*/
-STATIC inline attrnames_t *
+STATIC_INLINE attrnames_t *
xfs_attr_flags_namesp(int flags)
{
return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
@@ -105,7 +105,7 @@ xfs_attr_flags_namesp(int flags)
* If namespace bits don't match return 0.
* If all match then return 1.
*/
-STATIC inline int
+STATIC_INLINE int
xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
{
return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
@@ -116,7 +116,7 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
* then return 0.
* If all match or are overridable then return 1.
*/
-STATIC inline int
+STATIC_INLINE int
xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
{
if (((arg_flags & ATTR_SECURE) == 0) !=
@@ -150,6 +150,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
int offset;
int minforkoff; /* lower limit on valid forkoff locations */
int maxforkoff; /* upper limit on valid forkoff locations */
+ int dsize;
xfs_mount_t *mp = dp->i_mount;
offset = (XFS_LITINO(mp) - bytes) >> 3; /* rounded down */
@@ -169,8 +170,43 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
return 0;
}
- /* data fork btree root can have at least this many key/ptr pairs */
- minforkoff = MAX(dp->i_df.if_bytes, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+ dsize = dp->i_df.if_bytes;
+
+ switch (dp->i_d.di_format) {
+ case XFS_DINODE_FMT_EXTENTS:
+ /*
+ * If there is no attr fork and the data fork is extents,
+ * determine if creating the default attr fork will result
+ * in the extents form migrating to btree. If so, the
+ * minimum offset only needs to be the space required for
+ * the btree root.
+ */
+ if (!dp->i_d.di_forkoff && dp->i_df.if_bytes > mp->m_attroffset)
+ dsize = XFS_BMDR_SPACE_CALC(MINDBTPTRS);
+ break;
+
+ case XFS_DINODE_FMT_BTREE:
+ /*
+ * If have data btree then keep forkoff if we have one,
+ * otherwise we are adding a new attr, so then we set
+ * minforkoff to where the btree root can finish so we have
+ * plenty of room for attrs
+ */
+ if (dp->i_d.di_forkoff) {
+ if (offset < dp->i_d.di_forkoff)
+ return 0;
+ else
+ return dp->i_d.di_forkoff;
+ } else
+ dsize = XFS_BMAP_BROOT_SPACE(dp->i_df.if_broot);
+ break;
+ }
+
+ /*
+ * A data fork btree root must have space for at least
+ * MINDBTPTRS key/ptr pairs if the data fork is small or empty.
+ */
+ minforkoff = MAX(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
minforkoff = roundup(minforkoff, 8) >> 3;
/* attr fork btree root can have at least this many key/ptr pairs */
@@ -336,7 +372,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
*/
totsize -= size;
if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname &&
- (mp->m_flags & XFS_MOUNT_ATTR2)) {
+ (mp->m_flags & XFS_MOUNT_ATTR2) &&
+ (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
/*
* Last attribute now removed, revert to original
* inode format making all literal area available
@@ -355,7 +392,8 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
ASSERT(dp->i_d.di_forkoff);
ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname ||
- !(mp->m_flags & XFS_MOUNT_ATTR2));
+ !(mp->m_flags & XFS_MOUNT_ATTR2) ||
+ dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
dp->i_afp->if_ext_max =
XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
dp->i_df.if_ext_max =
@@ -748,6 +786,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
+ be16_to_cpu(name_loc->valuelen);
}
if ((dp->i_mount->m_flags & XFS_MOUNT_ATTR2) &&
+ (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
(bytes == sizeof(struct xfs_attr_sf_hdr)))
return(-1);
return(xfs_attr_shortform_bytesfit(dp, bytes));
@@ -786,6 +825,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
if (forkoff == -1) {
ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2);
+ ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE);
/*
* Last attribute was removed, revert to original
diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c
index 43be6a7e47c6..1afe07f67e3b 100644
--- a/fs/xfs/xfs_bit.c
+++ b/fs/xfs/xfs_bit.c
@@ -29,7 +29,7 @@
/*
* Index of high bit number in byte, -1 for none set, 0..7 otherwise.
*/
-STATIC const char xfs_highbit[256] = {
+static const char xfs_highbit[256] = {
-1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */
3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */
4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 498ad50d1f45..87795188cedf 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -185,16 +185,6 @@ xfs_bmap_btree_to_extents(
int *logflagsp, /* inode logging flags */
int whichfork); /* data or attr fork */
-#ifdef DEBUG
-/*
- * Check that the extents list for the inode ip is in the right order.
- */
-STATIC void
-xfs_bmap_check_extents(
- xfs_inode_t *ip, /* incore inode pointer */
- int whichfork); /* data or attr fork */
-#endif
-
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
@@ -410,7 +400,6 @@ xfs_bmap_count_leaves(
STATIC int
xfs_bmap_disk_count_leaves(
xfs_ifork_t *ifp,
- xfs_mount_t *mp,
xfs_extnum_t idx,
xfs_bmbt_block_t *block,
int numrecs,
@@ -684,7 +673,7 @@ xfs_bmap_add_extent(
ASSERT(nblks <= da_old);
if (nblks < da_old)
xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
- (int)(da_old - nblks), rsvd);
+ (int64_t)(da_old - nblks), rsvd);
}
/*
* Clear out the allocated field, done with it now in any case.
@@ -1209,7 +1198,7 @@ xfs_bmap_add_extent_delay_real(
diff = (int)(temp + temp2 - STARTBLOCKVAL(PREV.br_startblock) -
(cur ? cur->bc_private.b.allocated : 0));
if (diff > 0 &&
- xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -diff, rsvd)) {
+ xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) {
/*
* Ick gross gag me with a spoon.
*/
@@ -1220,7 +1209,7 @@ xfs_bmap_add_extent_delay_real(
diff--;
if (!diff ||
!xfs_mod_incore_sb(ip->i_mount,
- XFS_SBS_FDBLOCKS, -diff, rsvd))
+ XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
break;
}
if (temp2) {
@@ -1228,7 +1217,7 @@ xfs_bmap_add_extent_delay_real(
diff--;
if (!diff ||
!xfs_mod_incore_sb(ip->i_mount,
- XFS_SBS_FDBLOCKS, -diff, rsvd))
+ XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd))
break;
}
}
@@ -2015,7 +2004,7 @@ xfs_bmap_add_extent_hole_delay(
if (oldlen != newlen) {
ASSERT(oldlen > newlen);
xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS,
- (int)(oldlen - newlen), rsvd);
+ (int64_t)(oldlen - newlen), rsvd);
/*
* Nothing to do for disk quota accounting here.
*/
@@ -3359,7 +3348,7 @@ xfs_bmap_del_extent(
*/
ASSERT(da_old >= da_new);
if (da_old > da_new)
- xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int)(da_old - da_new),
+ xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new),
rsvd);
if (delta) {
/* DELTA: report the original extent. */
@@ -3543,6 +3532,7 @@ xfs_bmap_forkoff_reset(
if (whichfork == XFS_ATTR_FORK &&
(ip->i_d.di_format != XFS_DINODE_FMT_DEV) &&
(ip->i_d.di_format != XFS_DINODE_FMT_UUID) &&
+ (ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
((mp->m_attroffset >> 3) > ip->i_d.di_forkoff)) {
ip->i_d.di_forkoff = mp->m_attroffset >> 3;
ip->i_df.if_ext_max = XFS_IFORK_DSIZE(ip) /
@@ -4079,7 +4069,7 @@ xfs_bmap_add_attrfork(
} else
XFS_SB_UNLOCK(mp, s);
}
- if ((error = xfs_bmap_finish(&tp, &flist, firstblock, &committed)))
+ if ((error = xfs_bmap_finish(&tp, &flist, &committed)))
goto error2;
error = xfs_trans_commit(tp, XFS_TRANS_PERM_LOG_RES, NULL);
ASSERT(ip->i_df.if_ext_max ==
@@ -4212,7 +4202,6 @@ int /* error */
xfs_bmap_finish(
xfs_trans_t **tp, /* transaction pointer addr */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_fsblock_t firstblock, /* controlled ag for allocs */
int *committed) /* xact committed or not */
{
xfs_efd_log_item_t *efd; /* extent free data */
@@ -4533,8 +4522,7 @@ xfs_bmap_read_extents(
error0);
if (level == 0)
break;
- pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
- 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
xfs_trans_brelse(tp, bp);
@@ -4577,8 +4565,7 @@ xfs_bmap_read_extents(
/*
* Copy records into the extent records.
*/
- frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
- block, 1, mp->m_bmap_dmxr[0]);
+ frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
start = i;
for (j = 0; j < num_recs; j++, i++, frp++) {
trp = xfs_iext_get_ext(ifp, i);
@@ -4929,28 +4916,28 @@ xfs_bmapi(
if (rt) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FREXTENTS,
- -(extsz), (flags &
+ -((int64_t)extsz), (flags &
XFS_BMAPI_RSVBLOCKS));
} else {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
- -(alen), (flags &
+ -((int64_t)alen), (flags &
XFS_BMAPI_RSVBLOCKS));
}
if (!error) {
error = xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
- -(indlen), (flags &
+ -((int64_t)indlen), (flags &
XFS_BMAPI_RSVBLOCKS));
if (error && rt)
xfs_mod_incore_sb(mp,
XFS_SBS_FREXTENTS,
- extsz, (flags &
+ (int64_t)extsz, (flags &
XFS_BMAPI_RSVBLOCKS));
else if (error)
xfs_mod_incore_sb(mp,
XFS_SBS_FDBLOCKS,
- alen, (flags &
+ (int64_t)alen, (flags &
XFS_BMAPI_RSVBLOCKS));
}
@@ -5616,13 +5603,13 @@ xfs_bunmapi(
rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
- (int)rtexts, rsvd);
+ (int64_t)rtexts, rsvd);
(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
- (int)del.br_blockcount, rsvd);
+ (int64_t)del.br_blockcount, rsvd);
(void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
NULL, ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
@@ -6048,32 +6035,6 @@ xfs_bmap_eof(
}
#ifdef DEBUG
-/*
- * Check that the extents list for the inode ip is in the right order.
- */
-STATIC void
-xfs_bmap_check_extents(
- xfs_inode_t *ip, /* incore inode pointer */
- int whichfork) /* data or attr fork */
-{
- xfs_bmbt_rec_t *ep; /* current extent entry */
- xfs_extnum_t idx; /* extent record index */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t nextents; /* number of extents in list */
- xfs_bmbt_rec_t *nextp; /* next extent entry */
-
- ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(ifp->if_flags & XFS_IFEXTENTS);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- ep = xfs_iext_get_ext(ifp, 0);
- for (idx = 0; idx < nextents - 1; idx++) {
- nextp = xfs_iext_get_ext(ifp, idx + 1);
- xfs_btree_check_rec(XFS_BTNUM_BMAP, (void *)ep,
- (void *)(nextp));
- ep = nextp;
- }
-}
-
STATIC
xfs_buf_t *
xfs_bmap_get_bp(
@@ -6156,8 +6117,7 @@ xfs_check_block(
if (root) {
keyp = XFS_BMAP_BROOT_KEY_ADDR(block, i, sz);
} else {
- keyp = XFS_BTREE_KEY_ADDR(mp->m_sb.sb_blocksize,
- xfs_bmbt, block, i, dmxr);
+ keyp = XFS_BTREE_KEY_ADDR(xfs_bmbt, block, i);
}
if (prevp) {
@@ -6172,15 +6132,14 @@ xfs_check_block(
if (root) {
pp = XFS_BMAP_BROOT_PTR_ADDR(block, i, sz);
} else {
- pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
- xfs_bmbt, block, i, dmxr);
+ pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, i, dmxr);
}
for (j = i+1; j <= be16_to_cpu(block->bb_numrecs); j++) {
if (root) {
thispa = XFS_BMAP_BROOT_PTR_ADDR(block, j, sz);
} else {
- thispa = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
- xfs_bmbt, block, j, dmxr);
+ thispa = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, j,
+ dmxr);
}
if (*thispa == *pp) {
cmn_err(CE_WARN, "%s: thispa(%d) == pp(%d) %Ld",
@@ -6267,8 +6226,7 @@ xfs_bmap_check_leaf_extents(
*/
xfs_check_block(block, mp, 0, 0);
- pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt, block,
- 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
XFS_WANT_CORRUPTED_GOTO(XFS_FSB_SANITY_CHECK(mp, bno), error0);
if (bp_release) {
@@ -6305,11 +6263,9 @@ xfs_bmap_check_leaf_extents(
* conform with the first entry in this one.
*/
- ep = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
- block, 1, mp->m_bmap_dmxr[0]);
+ ep = XFS_BTREE_REC_ADDR(xfs_bmbt, block, 1);
for (j = 1; j < num_recs; j++) {
- nextp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_bmbt,
- block, j + 1, mp->m_bmap_dmxr[0]);
+ nextp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, j + 1);
if (lastp) {
xfs_btree_check_rec(XFS_BTNUM_BMAP,
(void *)lastp, (void *)ep);
@@ -6454,8 +6410,7 @@ xfs_bmap_count_tree(
}
/* Dive to the next level */
- pp = XFS_BTREE_PTR_ADDR(mp->m_sb.sb_blocksize,
- xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
+ pp = XFS_BTREE_PTR_ADDR(xfs_bmbt, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
if (unlikely((error =
xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
@@ -6470,7 +6425,7 @@ xfs_bmap_count_tree(
for (;;) {
nextbno = be64_to_cpu(block->bb_rightsib);
numrecs = be16_to_cpu(block->bb_numrecs);
- if (unlikely(xfs_bmap_disk_count_leaves(ifp, mp,
+ if (unlikely(xfs_bmap_disk_count_leaves(ifp,
0, block, numrecs, count) < 0)) {
xfs_trans_brelse(tp, bp);
XFS_ERROR_REPORT("xfs_bmap_count_tree(2)",
@@ -6518,7 +6473,6 @@ xfs_bmap_count_leaves(
int
xfs_bmap_disk_count_leaves(
xfs_ifork_t *ifp,
- xfs_mount_t *mp,
xfs_extnum_t idx,
xfs_bmbt_block_t *block,
int numrecs,
@@ -6528,8 +6482,7 @@ xfs_bmap_disk_count_leaves(
xfs_bmbt_rec_t *frp;
for (b = 1; b <= numrecs; b++) {
- frp = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize,
- xfs_bmbt, block, idx + b, mp->m_bmap_dmxr[0]);
+ frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b);
*count += xfs_bmbt_disk_get_blockcount(frp);
}
return 0;
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 80e93409b78d..4f24c7e39b31 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -202,7 +202,6 @@ int /* error */
xfs_bmap_finish(
struct xfs_trans **tp, /* transaction pointer addr */
xfs_bmap_free_t *flist, /* i/o: list extents to free */
- xfs_fsblock_t firstblock, /* controlled a.g. for allocs */
int *committed); /* xact committed or not */
/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index a7b835bf870a..0bf192fea3eb 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -678,47 +678,6 @@ error0:
return error;
}
-#ifdef DEBUG
-/*
- * Get the data from the pointed-to record.
- */
-int
-xfs_bmbt_get_rec(
- xfs_btree_cur_t *cur,
- xfs_fileoff_t *off,
- xfs_fsblock_t *bno,
- xfs_filblks_t *len,
- xfs_exntst_t *state,
- int *stat)
-{
- xfs_bmbt_block_t *block;
- xfs_buf_t *bp;
-#ifdef DEBUG
- int error;
-#endif
- int ptr;
- xfs_bmbt_rec_t *rp;
-
- block = xfs_bmbt_get_block(cur, 0, &bp);
- ptr = cur->bc_ptrs[0];
-#ifdef DEBUG
- if ((error = xfs_btree_check_lblock(cur, block, 0, bp)))
- return error;
-#endif
- if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
- *stat = 0;
- return 0;
- }
- rp = XFS_BMAP_REC_IADDR(block, ptr, cur);
- *off = xfs_bmbt_disk_get_startoff(rp);
- *bno = xfs_bmbt_disk_get_startblock(rp);
- *len = xfs_bmbt_disk_get_blockcount(rp);
- *state = xfs_bmbt_disk_get_state(rp);
- *stat = 1;
- return 0;
-}
-#endif
-
/*
* Insert one record/level. Return information to the caller
* allowing the next level up to proceed if necessary.
@@ -1731,9 +1690,9 @@ xfs_bmdr_to_bmbt(
rblock->bb_leftsib = cpu_to_be64(NULLDFSBNO);
rblock->bb_rightsib = cpu_to_be64(NULLDFSBNO);
dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
- fkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+ fkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
tkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- fpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+ fpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
tpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
@@ -1862,7 +1821,7 @@ xfs_bmbt_delete(
* xfs_bmbt_get_startblock, xfs_bmbt_get_blockcount and xfs_bmbt_get_state.
*/
-STATIC __inline__ void
+STATIC_INLINE void
__xfs_bmbt_get_all(
__uint64_t l0,
__uint64_t l1,
@@ -2016,30 +1975,6 @@ xfs_bmbt_disk_get_blockcount(
}
/*
- * Extract the startblock field from an on disk bmap extent record.
- */
-xfs_fsblock_t
-xfs_bmbt_disk_get_startblock(
- xfs_bmbt_rec_t *r)
-{
-#if XFS_BIG_BLKNOS
- return (((xfs_fsblock_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
- (((xfs_fsblock_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
-#else
-#ifdef DEBUG
- xfs_dfsbno_t b;
-
- b = (((xfs_dfsbno_t)INT_GET(r->l0, ARCH_CONVERT) & XFS_MASK64LO(9)) << 43) |
- (((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
- ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b));
- return (xfs_fsblock_t)b;
-#else /* !DEBUG */
- return (xfs_fsblock_t)(((xfs_dfsbno_t)INT_GET(r->l1, ARCH_CONVERT)) >> 21);
-#endif /* DEBUG */
-#endif /* XFS_BIG_BLKNOS */
-}
-
-/*
* Extract the startoff field from a disk format bmap extent record.
*/
xfs_fileoff_t
@@ -2049,17 +1984,6 @@ xfs_bmbt_disk_get_startoff(
return ((xfs_fileoff_t)INT_GET(r->l0, ARCH_CONVERT) &
XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9;
}
-
-xfs_exntst_t
-xfs_bmbt_disk_get_state(
- xfs_bmbt_rec_t *r)
-{
- int ext_flag;
-
- ext_flag = (int)((INT_GET(r->l0, ARCH_CONVERT)) >> (64 - BMBT_EXNTFLAG_BITLEN));
- return xfs_extent_state(xfs_bmbt_disk_get_blockcount(r),
- ext_flag);
-}
#endif /* XFS_NATIVE_HOST */
@@ -2684,9 +2608,9 @@ xfs_bmbt_to_bmdr(
dblock->bb_numrecs = rblock->bb_numrecs;
dmxr = (int)XFS_BTREE_BLOCK_MAXRECS(dblocklen, xfs_bmdr, 0);
fkp = XFS_BMAP_BROOT_KEY_ADDR(rblock, 1, rblocklen);
- tkp = XFS_BTREE_KEY_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+ tkp = XFS_BTREE_KEY_ADDR(xfs_bmdr, dblock, 1);
fpp = XFS_BMAP_BROOT_PTR_ADDR(rblock, 1, rblocklen);
- tpp = XFS_BTREE_PTR_ADDR(dblocklen, xfs_bmdr, dblock, 1, dmxr);
+ tpp = XFS_BTREE_PTR_ADDR(xfs_bmdr, dblock, 1, dmxr);
dmxr = be16_to_cpu(dblock->bb_numrecs);
memcpy(tkp, fkp, sizeof(*fkp) * dmxr);
memcpy(tpp, fpp, sizeof(*fpp) * dmxr);
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 49539de9525b..a77b1b753d0c 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -175,19 +175,11 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
#define XFS_BUF_TO_BMBT_BLOCK(bp) ((xfs_bmbt_block_t *)XFS_BUF_PTR(bp))
-#define XFS_BMAP_IBLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
#define XFS_BMAP_RBLOCK_DSIZE(lev,cur) ((cur)->bc_private.b.forksize)
#define XFS_BMAP_RBLOCK_ISIZE(lev,cur) \
((int)XFS_IFORK_PTR((cur)->bc_private.b.ip, \
(cur)->bc_private.b.whichfork)->if_broot_bytes)
-#define XFS_BMAP_BLOCK_DSIZE(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BMAP_RBLOCK_DSIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
-#define XFS_BMAP_BLOCK_ISIZE(lev,cur) \
- (((lev) == (cur)->bc_nlevels - 1 ? \
- XFS_BMAP_RBLOCK_ISIZE(lev,cur) : XFS_BMAP_IBLOCK_SIZE(lev,cur)))
-
#define XFS_BMAP_BLOCK_DMAXRECS(lev,cur) \
(((lev) == (cur)->bc_nlevels - 1 ? \
XFS_BTREE_BLOCK_MAXRECS(XFS_BMAP_RBLOCK_DSIZE(lev,cur), \
@@ -210,37 +202,21 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
xfs_bmbt, (lev) == 0) : \
((cur)->bc_mp->m_bmap_dmnr[(lev) != 0])))
-#define XFS_BMAP_REC_DADDR(bb,i,cur) \
- (XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_DSIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
-#define XFS_BMAP_REC_IADDR(bb,i,cur) \
- (XFS_BTREE_REC_ADDR(XFS_BMAP_BLOCK_ISIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
+#define XFS_BMAP_REC_DADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
+
+#define XFS_BMAP_REC_IADDR(bb,i,cur) (XFS_BTREE_REC_ADDR(xfs_bmbt, bb, i))
#define XFS_BMAP_KEY_DADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_DSIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
+ (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
+
#define XFS_BMAP_KEY_IADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(XFS_BMAP_BLOCK_ISIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
- be16_to_cpu((bb)->bb_level), cur)))
+ (XFS_BTREE_KEY_ADDR(xfs_bmbt, bb, i))
#define XFS_BMAP_PTR_DADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_DSIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
+ (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_DMAXRECS( \
be16_to_cpu((bb)->bb_level), cur)))
#define XFS_BMAP_PTR_IADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(XFS_BMAP_BLOCK_ISIZE( \
- be16_to_cpu((bb)->bb_level), cur), \
- xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
+ (XFS_BTREE_PTR_ADDR(xfs_bmbt, bb, i, XFS_BMAP_BLOCK_IMAXRECS( \
be16_to_cpu((bb)->bb_level), cur)))
/*
@@ -248,11 +224,11 @@ typedef struct xfs_btree_lblock xfs_bmbt_block_t;
* we don't have a cursor.
*/
#define XFS_BMAP_BROOT_REC_ADDR(bb,i,sz) \
- (XFS_BTREE_REC_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
+ (XFS_BTREE_REC_ADDR(xfs_bmbt,bb,i))
#define XFS_BMAP_BROOT_KEY_ADDR(bb,i,sz) \
- (XFS_BTREE_KEY_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
+ (XFS_BTREE_KEY_ADDR(xfs_bmbt,bb,i))
#define XFS_BMAP_BROOT_PTR_ADDR(bb,i,sz) \
- (XFS_BTREE_PTR_ADDR(sz,xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
+ (XFS_BTREE_PTR_ADDR(xfs_bmbt,bb,i,XFS_BMAP_BROOT_MAXRECS(sz)))
#define XFS_BMAP_BROOT_NUMRECS(bb) be16_to_cpu((bb)->bb_numrecs)
#define XFS_BMAP_BROOT_MAXRECS(sz) XFS_BTREE_BLOCK_MAXRECS(sz,xfs_bmbt,0)
@@ -315,15 +291,11 @@ extern xfs_exntst_t xfs_bmbt_get_state(xfs_bmbt_rec_t *r);
#ifndef XFS_NATIVE_HOST
extern void xfs_bmbt_disk_get_all(xfs_bmbt_rec_t *r, xfs_bmbt_irec_t *s);
-extern xfs_exntst_t xfs_bmbt_disk_get_state(xfs_bmbt_rec_t *r);
extern xfs_filblks_t xfs_bmbt_disk_get_blockcount(xfs_bmbt_rec_t *r);
-extern xfs_fsblock_t xfs_bmbt_disk_get_startblock(xfs_bmbt_rec_t *r);
extern xfs_fileoff_t xfs_bmbt_disk_get_startoff(xfs_bmbt_rec_t *r);
#else
#define xfs_bmbt_disk_get_all(r, s) xfs_bmbt_get_all(r, s)
-#define xfs_bmbt_disk_get_state(r) xfs_bmbt_get_state(r)
#define xfs_bmbt_disk_get_blockcount(r) xfs_bmbt_get_blockcount(r)
-#define xfs_bmbt_disk_get_startblock(r) xfs_bmbt_get_blockcount(r)
#define xfs_bmbt_disk_get_startoff(r) xfs_bmbt_get_startoff(r)
#endif /* XFS_NATIVE_HOST */
@@ -364,15 +336,6 @@ extern void xfs_bmbt_to_bmdr(xfs_bmbt_block_t *, int, xfs_bmdr_block_t *, int);
extern int xfs_bmbt_update(struct xfs_btree_cur *, xfs_fileoff_t,
xfs_fsblock_t, xfs_filblks_t, xfs_exntst_t);
-#ifdef DEBUG
-/*
- * Get the data from the pointed-to record.
- */
-extern int xfs_bmbt_get_rec(struct xfs_btree_cur *, xfs_fileoff_t *,
- xfs_fsblock_t *, xfs_filblks_t *,
- xfs_exntst_t *, int *);
-#endif
-
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 892b06c54263..4e27d55a1e73 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -122,13 +122,13 @@ extern const __uint32_t xfs_magics[];
* Given block size, type prefix, block pointer, and index of requested entry
* (first entry numbered 1).
*/
-#define XFS_BTREE_REC_ADDR(bsz,t,bb,i,mxr) \
+#define XFS_BTREE_REC_ADDR(t,bb,i) \
((t ## _rec_t *)((char *)(bb) + sizeof(t ## _block_t) + \
((i) - 1) * sizeof(t ## _rec_t)))
-#define XFS_BTREE_KEY_ADDR(bsz,t,bb,i,mxr) \
+#define XFS_BTREE_KEY_ADDR(t,bb,i) \
((t ## _key_t *)((char *)(bb) + sizeof(t ## _block_t) + \
((i) - 1) * sizeof(t ## _key_t)))
-#define XFS_BTREE_PTR_ADDR(bsz,t,bb,i,mxr) \
+#define XFS_BTREE_PTR_ADDR(t,bb,i,mxr) \
((t ## _ptr_t *)((char *)(bb) + sizeof(t ## _block_t) + \
(mxr) * sizeof(t ## _key_t) + ((i) - 1) * sizeof(t ## _ptr_t)))
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7a55c248ea70..6c1bddc04e31 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -660,7 +660,7 @@ xfs_buf_item_committing(xfs_buf_log_item_t *bip, xfs_lsn_t commit_lsn)
/*
* This is the ops vector shared by all buf log items.
*/
-STATIC struct xfs_item_ops xfs_buf_item_ops = {
+static struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_buf_item_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_buf_item_format,
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 07c708c2b529..d7e136143066 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -21,23 +21,7 @@
/*
* This is the structure used to lay out a buf log item in the
* log. The data map describes which 128 byte chunks of the buffer
- * have been logged. This structure works only on buffers that
- * reside up to the first TB in the filesystem. These buffers are
- * generated only by pre-6.2 systems and are known as XFS_LI_6_1_BUF.
- */
-typedef struct xfs_buf_log_format_v1 {
- unsigned short blf_type; /* buf log item type indicator */
- unsigned short blf_size; /* size of this item */
- __int32_t blf_blkno; /* starting blkno of this buf */
- ushort blf_flags; /* misc state */
- ushort blf_len; /* number of blocks in this buf */
- unsigned int blf_map_size; /* size of data bitmap in words */
- unsigned int blf_data_map[1];/* variable size bitmap of */
- /* regions of buffer in this item */
-} xfs_buf_log_format_v1_t;
-
-/*
- * This is a form of the above structure with a 64 bit blkno field.
+ * have been logged.
* For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
*/
typedef struct xfs_buf_log_format_t {
diff --git a/fs/xfs/xfs_cap.h b/fs/xfs/xfs_cap.h
deleted file mode 100644
index 7a0e482dd436..000000000000
--- a/fs/xfs/xfs_cap.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_CAP_H__
-#define __XFS_CAP_H__
-
-/*
- * Capabilities
- */
-typedef __uint64_t xfs_cap_value_t;
-
-typedef struct xfs_cap_set {
- xfs_cap_value_t cap_effective; /* use in capability checks */
- xfs_cap_value_t cap_permitted; /* combined with file attrs */
- xfs_cap_value_t cap_inheritable;/* pass through exec */
-} xfs_cap_set_t;
-
-/* On-disk XFS extended attribute names */
-#define SGI_CAP_FILE "SGI_CAP_FILE"
-#define SGI_CAP_FILE_SIZE (sizeof(SGI_CAP_FILE)-1)
-#define SGI_CAP_LINUX "SGI_CAP_LINUX"
-#define SGI_CAP_LINUX_SIZE (sizeof(SGI_CAP_LINUX)-1)
-
-/*
- * For Linux, we take the bitfields directly from capability.h
- * and no longer attempt to keep this attribute ondisk compatible
- * with IRIX. Since this attribute is only set on executables,
- * it just doesn't make much sense to try. We do use a different
- * named attribute though, to avoid confusion.
- */
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_FS_POSIX_CAP
-
-#include <linux/posix_cap_xattr.h>
-
-struct bhv_vnode;
-
-extern int xfs_cap_vhascap(struct bhv_vnode *);
-extern int xfs_cap_vset(struct bhv_vnode *, void *, size_t);
-extern int xfs_cap_vget(struct bhv_vnode *, void *, size_t);
-extern int xfs_cap_vremove(struct bhv_vnode *);
-
-#define _CAP_EXISTS xfs_cap_vhascap
-
-#else
-#define xfs_cap_vset(v,p,sz) (-EOPNOTSUPP)
-#define xfs_cap_vget(v,p,sz) (-EOPNOTSUPP)
-#define xfs_cap_vremove(v) (-EOPNOTSUPP)
-#define _CAP_EXISTS (NULL)
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* __XFS_CAP_H__ */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index a68bc1f1a313..aea37df4aa62 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1090,8 +1090,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
if (blk->magic == XFS_DA_NODE_MAGIC) {
node = blk->bp->data;
max = be16_to_cpu(node->hdr.count);
- btreehashval = node->btree[max-1].hashval;
- blk->hashval = be32_to_cpu(btreehashval);
+ blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
/*
* Binary search. (note: small blocks will skip loop)
@@ -2166,21 +2165,6 @@ xfs_da_reada_buf(
return rval;
}
-/*
- * Calculate the number of bits needed to hold i different values.
- */
-uint
-xfs_da_log2_roundup(uint i)
-{
- uint rval;
-
- for (rval = 0; rval < NBBY * sizeof(i); rval++) {
- if ((1 << rval) >= i)
- break;
- }
- return(rval);
-}
-
kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 4ab865ec8b82..44dabf02f2a3 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -249,7 +249,6 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
xfs_dabuf_t *dead_buf);
uint xfs_da_hashname(const uchar_t *name_string, int name_length);
-uint xfs_da_log2_roundup(uint i);
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 50d0faea371d..b847e6a7a3f0 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -41,7 +41,6 @@
#include "xfs_itable.h"
#include "xfs_dfrag.h"
#include "xfs_error.h"
-#include "xfs_mac.h"
#include "xfs_rw.h"
/*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index b95681b03d81..b1af54464f00 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -132,32 +132,6 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
}
int
-xfs_errortag_clear(int error_tag, xfs_mount_t *mp)
-{
- int i;
- int64_t fsid;
-
- memcpy(&fsid, mp->m_fixedfsid, sizeof(xfs_fsid_t));
-
- for (i = 0; i < XFS_NUM_INJECT_ERROR; i++) {
- if (xfs_etest_fsid[i] == fsid && xfs_etest[i] == error_tag) {
- xfs_etest[i] = 0;
- xfs_etest_fsid[i] = 0LL;
- kmem_free(xfs_etest_fsname[i],
- strlen(xfs_etest_fsname[i]) + 1);
- xfs_etest_fsname[i] = NULL;
- cmn_err(CE_WARN, "Cleared XFS error tag #%d",
- error_tag);
- return 0;
- }
- }
-
- cmn_err(CE_WARN, "XFS error tag %d not on", error_tag);
-
- return 1;
-}
-
-int
xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud)
{
int i;
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 0893e16b7d83..5599ada456a1 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -144,7 +144,6 @@ extern void xfs_error_test_init(void);
#endif /* __ANSI_CPP__ */
extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
-extern int xfs_errortag_clear(int error_tag, xfs_mount_t *mp);
extern int xfs_errortag_clearall(xfs_mount_t *mp);
extern int xfs_errortag_clearall_umount(int64_t fsid, char *fsname, int loud);
#else
@@ -180,6 +179,6 @@ extern void xfs_fs_cmn_err(int level, struct xfs_mount *mp, char *fmt, ...);
xfs_fs_cmn_err(level, mp, fmt " Unmount and run xfs_repair.", ## args)
#define xfs_fs_mount_cmn_err(f, fmt, args...) \
- ((f & XFS_MFSI_QUIET)? cmn_err(CE_WARN, "XFS: " fmt, ## args) : (void)0)
+ ((f & XFS_MFSI_QUIET)? (void)0 : cmn_err(CE_WARN, "XFS: " fmt, ## args))
#endif /* __XFS_ERROR_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6dba78199faf..3b14427ee123 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -227,7 +227,7 @@ xfs_efi_item_committing(xfs_efi_log_item_t *efip, xfs_lsn_t lsn)
/*
* This is the ops vector shared by all efi log items.
*/
-STATIC struct xfs_item_ops xfs_efi_item_ops = {
+static struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_efi_item_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_efi_item_format,
@@ -525,7 +525,7 @@ xfs_efd_item_committing(xfs_efd_log_item_t *efip, xfs_lsn_t lsn)
/*
* This is the ops vector shared by all efd log items.
*/
-STATIC struct xfs_item_ops xfs_efd_item_ops = {
+static struct xfs_item_ops xfs_efd_item_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_efd_item_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_efd_item_format,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index c064e72ada9e..32c37c1c47ab 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -250,8 +250,7 @@ xfs_growfs_data_private(
block->bb_numrecs = cpu_to_be16(1);
block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc,
- block, 1, mp->m_alloc_mxr[0]);
+ arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
@@ -272,8 +271,7 @@ xfs_growfs_data_private(
block->bb_numrecs = cpu_to_be16(1);
block->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- arec = XFS_BTREE_REC_ADDR(mp->m_sb.sb_blocksize, xfs_alloc,
- block, 1, mp->m_alloc_mxr[0]);
+ arec = XFS_BTREE_REC_ADDR(xfs_alloc, block, 1);
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
@@ -460,7 +458,7 @@ xfs_fs_counts(
{
unsigned long s;
- xfs_icsb_sync_counters_lazy(mp);
+ xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
s = XFS_SB_LOCK(mp);
cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents;
@@ -491,7 +489,7 @@ xfs_reserve_blocks(
__uint64_t *inval,
xfs_fsop_resblks_t *outval)
{
- __int64_t lcounter, delta;
+ __int64_t lcounter, delta, fdblks_delta;
__uint64_t request;
unsigned long s;
@@ -504,17 +502,35 @@ xfs_reserve_blocks(
}
request = *inval;
+
+ /*
+ * With per-cpu counters, this becomes an interesting
+ * problem. we needto work out if we are freeing or allocation
+ * blocks first, then we can do the modification as necessary.
+ *
+ * We do this under the XFS_SB_LOCK so that if we are near
+ * ENOSPC, we will hold out any changes while we work out
+ * what to do. This means that the amount of free space can
+ * change while we do this, so we need to retry if we end up
+ * trying to reserve more space than is available.
+ *
+ * We also use the xfs_mod_incore_sb() interface so that we
+ * don't have to care about whether per cpu counter are
+ * enabled, disabled or even compiled in....
+ */
+retry:
s = XFS_SB_LOCK(mp);
+ xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
/*
* If our previous reservation was larger than the current value,
* then move any unused blocks back to the free pool.
*/
-
+ fdblks_delta = 0;
if (mp->m_resblks > request) {
lcounter = mp->m_resblks_avail - request;
if (lcounter > 0) { /* release unused blocks */
- mp->m_sb.sb_fdblocks += lcounter;
+ fdblks_delta = lcounter;
mp->m_resblks_avail -= lcounter;
}
mp->m_resblks = request;
@@ -522,24 +538,50 @@ xfs_reserve_blocks(
__int64_t free;
free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+ if (!free)
+ goto out; /* ENOSPC and fdblks_delta = 0 */
+
delta = request - mp->m_resblks;
lcounter = free - delta;
if (lcounter < 0) {
/* We can't satisfy the request, just get what we can */
mp->m_resblks += free;
mp->m_resblks_avail += free;
+ fdblks_delta = -free;
mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
} else {
+ fdblks_delta = -delta;
mp->m_sb.sb_fdblocks =
lcounter + XFS_ALLOC_SET_ASIDE(mp);
mp->m_resblks = request;
mp->m_resblks_avail += delta;
}
}
-
+out:
outval->resblks = mp->m_resblks;
outval->resblks_avail = mp->m_resblks_avail;
XFS_SB_UNLOCK(mp, s);
+
+ if (fdblks_delta) {
+ /*
+ * If we are putting blocks back here, m_resblks_avail is
+ * already at it's max so this will put it in the free pool.
+ *
+ * If we need space, we'll either succeed in getting it
+ * from the free block count or we'll get an enospc. If
+ * we get a ENOSPC, it means things changed while we were
+ * calculating fdblks_delta and so we should try again to
+ * see if there is anything left to reserve.
+ *
+ * Don't set the reserved flag here - we don't want to reserve
+ * the extra reserve blocks from the reserve.....
+ */
+ int error;
+ error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0);
+ if (error == ENOSPC)
+ goto retry;
+ }
+
return 0;
}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a446e5a115c6..b5feb3e77116 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -342,7 +342,7 @@ xfs_ialloc_ag_alloc(
return 0;
}
-STATIC __inline xfs_agnumber_t
+STATIC_INLINE xfs_agnumber_t
xfs_ialloc_next_ag(
xfs_mount_t *mp)
{
diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h
index 2c0e49893ff7..bf8e9aff272e 100644
--- a/fs/xfs/xfs_ialloc_btree.h
+++ b/fs/xfs/xfs_ialloc_btree.h
@@ -89,7 +89,6 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
/*
* Real block structures have a size equal to the disk block size.
*/
-#define XFS_INOBT_BLOCK_SIZE(lev,cur) (1 << (cur)->bc_blocklog)
#define XFS_INOBT_BLOCK_MAXRECS(lev,cur) ((cur)->bc_mp->m_inobt_mxr[lev != 0])
#define XFS_INOBT_BLOCK_MINRECS(lev,cur) ((cur)->bc_mp->m_inobt_mnr[lev != 0])
#define XFS_INOBT_IS_LAST_REC(cur) \
@@ -110,14 +109,13 @@ typedef struct xfs_btree_sblock xfs_inobt_block_t;
* Record, key, and pointer address macros for btree blocks.
*/
#define XFS_INOBT_REC_ADDR(bb,i,cur) \
- (XFS_BTREE_REC_ADDR(XFS_INOBT_BLOCK_SIZE(0,cur), xfs_inobt, bb, \
- i, XFS_INOBT_BLOCK_MAXRECS(0, cur)))
+ (XFS_BTREE_REC_ADDR(xfs_inobt, bb, i))
+
#define XFS_INOBT_KEY_ADDR(bb,i,cur) \
- (XFS_BTREE_KEY_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \
- i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
+ (XFS_BTREE_KEY_ADDR(xfs_inobt, bb, i))
#define XFS_INOBT_PTR_ADDR(bb,i,cur) \
- (XFS_BTREE_PTR_ADDR(XFS_INOBT_BLOCK_SIZE(1,cur), xfs_inobt, bb, \
+ (XFS_BTREE_PTR_ADDR(xfs_inobt, bb, \
i, XFS_INOBT_BLOCK_MAXRECS(1, cur)))
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 44dfac521285..3da9829c19d5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -47,7 +47,6 @@
#include "xfs_utils.h"
#include "xfs_dir2_trace.h"
#include "xfs_quota.h"
-#include "xfs_mac.h"
#include "xfs_acl.h"
@@ -1699,8 +1698,7 @@ xfs_itruncate_finish(
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(tp, &free_list, first_block,
- &committed);
+ error = xfs_bmap_finish(tp, &free_list, &committed);
ntp = *tp;
if (error) {
/*
@@ -1810,7 +1808,7 @@ xfs_igrow_start(
* and any blocks between the old and new file sizes.
*/
error = xfs_zero_eof(XFS_ITOV(ip), &ip->i_iocore, new_size,
- ip->i_d.di_size, new_size);
+ ip->i_d.di_size);
return error;
}
@@ -2125,7 +2123,7 @@ xfs_iunlink_remove(
return 0;
}
-static __inline__ int xfs_inode_clean(xfs_inode_t *ip)
+STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip)
{
return (((ip->i_itemp == NULL) ||
!(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) &&
@@ -2707,10 +2705,24 @@ xfs_idestroy(
ktrace_free(ip->i_dir_trace);
#endif
if (ip->i_itemp) {
- /* XXXdpd should be able to assert this but shutdown
- * is leaving the AIL behind. */
- ASSERT(((ip->i_itemp->ili_item.li_flags & XFS_LI_IN_AIL) == 0) ||
- XFS_FORCED_SHUTDOWN(ip->i_mount));
+ /*
+ * Only if we are shutting down the fs will we see an
+ * inode still in the AIL. If it is there, we should remove
+ * it to prevent a use-after-free from occurring.
+ */
+ xfs_mount_t *mp = ip->i_mount;
+ xfs_log_item_t *lip = &ip->i_itemp->ili_item;
+ int s;
+
+ ASSERT(((lip->li_flags & XFS_LI_IN_AIL) == 0) ||
+ XFS_FORCED_SHUTDOWN(ip->i_mount));
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ AIL_LOCK(mp, s);
+ if (lip->li_flags & XFS_LI_IN_AIL)
+ xfs_trans_delete_ail(mp, lip, s);
+ else
+ AIL_UNLOCK(mp, s);
+ }
xfs_inode_item_destroy(ip);
}
kmem_zone_free(xfs_inode_zone, ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index a7a92251eb56..565d470a6b4a 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -887,7 +887,7 @@ xfs_inode_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-STATIC struct xfs_item_ops xfs_inode_item_ops = {
+static struct xfs_item_ops xfs_inode_item_ops = {
.iop_size = (uint(*)(xfs_log_item_t*))xfs_inode_item_size,
.iop_format = (void(*)(xfs_log_item_t*, xfs_log_iovec_t*))
xfs_inode_item_format,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 19655124da78..cc6a7b5a9912 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -43,8 +43,6 @@
#include "xfs_itable.h"
#include "xfs_rw.h"
#include "xfs_acl.h"
-#include "xfs_cap.h"
-#include "xfs_mac.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -542,7 +540,7 @@ xfs_iomap_write_direct(
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto error0;
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
@@ -838,8 +836,7 @@ xfs_iomap_write_allocate(
if (error)
goto trans_cancel;
- error = xfs_bmap_finish(&tp, &free_list,
- first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error)
goto trans_cancel;
@@ -947,8 +944,7 @@ xfs_iomap_write_unwritten(
if (error)
goto error_on_bmapi_transaction;
- error = xfs_bmap_finish(&(tp), &(free_list),
- firstfsb, &committed);
+ error = xfs_bmap_finish(&(tp), &(free_list), &committed);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 3cb678e3a132..ca74d3f5910e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1514,7 +1514,6 @@ xlog_recover_reorder_trans(
{
xlog_recover_item_t *first_item, *itemq, *itemq_next;
xfs_buf_log_format_t *buf_f;
- xfs_buf_log_format_v1_t *obuf_f;
ushort flags = 0;
first_item = itemq = trans->r_itemq;
@@ -1522,29 +1521,16 @@ xlog_recover_reorder_trans(
do {
itemq_next = itemq->ri_next;
buf_f = (xfs_buf_log_format_t *)itemq->ri_buf[0].i_addr;
- switch (ITEM_TYPE(itemq)) {
- case XFS_LI_BUF:
- flags = buf_f->blf_flags;
- break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- flags = obuf_f->blf_flags;
- break;
- }
switch (ITEM_TYPE(itemq)) {
case XFS_LI_BUF:
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
+ flags = buf_f->blf_flags;
if (!(flags & XFS_BLI_CANCEL)) {
xlog_recover_insert_item_frontq(&trans->r_itemq,
itemq);
break;
}
case XFS_LI_INODE:
- case XFS_LI_6_1_INODE:
- case XFS_LI_5_3_INODE:
case XFS_LI_DQUOT:
case XFS_LI_QUOTAOFF:
case XFS_LI_EFD:
@@ -1583,7 +1569,6 @@ xlog_recover_do_buffer_pass1(
xfs_buf_cancel_t *nextp;
xfs_buf_cancel_t *prevp;
xfs_buf_cancel_t **bucket;
- xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
uint len = 0;
ushort flags = 0;
@@ -1594,13 +1579,6 @@ xlog_recover_do_buffer_pass1(
len = buf_f->blf_len;
flags = buf_f->blf_flags;
break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- blkno = (xfs_daddr_t) obuf_f->blf_blkno;
- len = obuf_f->blf_len;
- flags = obuf_f->blf_flags;
- break;
}
/*
@@ -1746,7 +1724,6 @@ xlog_recover_do_buffer_pass2(
xlog_t *log,
xfs_buf_log_format_t *buf_f)
{
- xfs_buf_log_format_v1_t *obuf_f;
xfs_daddr_t blkno = 0;
ushort flags = 0;
uint len = 0;
@@ -1757,13 +1734,6 @@ xlog_recover_do_buffer_pass2(
flags = buf_f->blf_flags;
len = buf_f->blf_len;
break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- blkno = (xfs_daddr_t) obuf_f->blf_blkno;
- flags = obuf_f->blf_flags;
- len = (xfs_daddr_t) obuf_f->blf_len;
- break;
}
return xlog_check_buffer_cancelled(log, blkno, len, flags);
@@ -1799,7 +1769,6 @@ xlog_recover_do_inode_buffer(
int inodes_per_buf;
xfs_agino_t *logged_nextp;
xfs_agino_t *buffer_nextp;
- xfs_buf_log_format_v1_t *obuf_f;
unsigned int *data_map = NULL;
unsigned int map_size = 0;
@@ -1808,12 +1777,6 @@ xlog_recover_do_inode_buffer(
data_map = buf_f->blf_data_map;
map_size = buf_f->blf_map_size;
break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- data_map = obuf_f->blf_data_map;
- map_size = obuf_f->blf_map_size;
- break;
}
/*
* Set the variables corresponding to the current region to
@@ -1912,7 +1875,6 @@ xlog_recover_do_reg_buffer(
int i;
int bit;
int nbits;
- xfs_buf_log_format_v1_t *obuf_f;
unsigned int *data_map = NULL;
unsigned int map_size = 0;
int error;
@@ -1922,12 +1884,6 @@ xlog_recover_do_reg_buffer(
data_map = buf_f->blf_data_map;
map_size = buf_f->blf_map_size;
break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- data_map = obuf_f->blf_data_map;
- map_size = obuf_f->blf_map_size;
- break;
}
bit = 0;
i = 1; /* 0 is the buf format structure */
@@ -2160,7 +2116,6 @@ xlog_recover_do_buffer_trans(
int pass)
{
xfs_buf_log_format_t *buf_f;
- xfs_buf_log_format_v1_t *obuf_f;
xfs_mount_t *mp;
xfs_buf_t *bp;
int error;
@@ -2197,13 +2152,6 @@ xlog_recover_do_buffer_trans(
len = buf_f->blf_len;
flags = buf_f->blf_flags;
break;
- case XFS_LI_6_1_BUF:
- case XFS_LI_5_3_BUF:
- obuf_f = (xfs_buf_log_format_v1_t*)buf_f;
- blkno = obuf_f->blf_blkno;
- len = obuf_f->blf_len;
- flags = obuf_f->blf_flags;
- break;
default:
xfs_fs_cmn_err(CE_ALERT, log->l_mp,
"xfs_log_recover: unknown buffer type 0x%x, logdev %s",
@@ -2830,9 +2778,7 @@ xlog_recover_do_trans(
* where xfs_daddr_t is 32-bits but mount will warn us
* off a > 1 TB filesystem before we get here.
*/
- if ((ITEM_TYPE(item) == XFS_LI_BUF) ||
- (ITEM_TYPE(item) == XFS_LI_6_1_BUF) ||
- (ITEM_TYPE(item) == XFS_LI_5_3_BUF)) {
+ if ((ITEM_TYPE(item) == XFS_LI_BUF)) {
if ((error = xlog_recover_do_buffer_trans(log, item,
pass)))
break;
@@ -3902,6 +3848,9 @@ xlog_do_recover(
ASSERT(XFS_SB_GOOD_VERSION(sbp));
xfs_buf_relse(bp);
+ /* We've re-read the superblock so re-initialize per-cpu counters */
+ xfs_icsb_reinit_counters(log->l_mp);
+
xlog_recover_check_summary(log);
/* Normal transactions can now occur */
diff --git a/fs/xfs/xfs_mac.h b/fs/xfs/xfs_mac.h
deleted file mode 100644
index 18e0e98e03d0..000000000000
--- a/fs/xfs/xfs_mac.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_MAC_H__
-#define __XFS_MAC_H__
-
-/*
- * Mandatory Access Control
- *
- * Layout of a composite MAC label:
- * ml_list contains the list of categories (MSEN) followed by the list of
- * divisions (MINT). This is actually a header for the data structure which
- * will have an ml_list with more than one element.
- *
- * -------------------------------
- * | ml_msen_type | ml_mint_type |
- * -------------------------------
- * | ml_level | ml_grade |
- * -------------------------------
- * | ml_catcount |
- * -------------------------------
- * | ml_divcount |
- * -------------------------------
- * | category 1 |
- * | . . . |
- * | category N | (where N = ml_catcount)
- * -------------------------------
- * | division 1 |
- * | . . . |
- * | division M | (where M = ml_divcount)
- * -------------------------------
- */
-#define XFS_MAC_MAX_SETS 250
-typedef struct xfs_mac_label {
- __uint8_t ml_msen_type; /* MSEN label type */
- __uint8_t ml_mint_type; /* MINT label type */
- __uint8_t ml_level; /* Hierarchical level */
- __uint8_t ml_grade; /* Hierarchical grade */
- __uint16_t ml_catcount; /* Category count */
- __uint16_t ml_divcount; /* Division count */
- /* Category set, then Division set */
- __uint16_t ml_list[XFS_MAC_MAX_SETS];
-} xfs_mac_label_t;
-
-/* MSEN label type names. Choose an upper case ASCII character. */
-#define XFS_MSEN_ADMIN_LABEL 'A' /* Admin: low<admin != tcsec<high */
-#define XFS_MSEN_EQUAL_LABEL 'E' /* Wildcard - always equal */
-#define XFS_MSEN_HIGH_LABEL 'H' /* System High - always dominates */
-#define XFS_MSEN_MLD_HIGH_LABEL 'I' /* System High, multi-level dir */
-#define XFS_MSEN_LOW_LABEL 'L' /* System Low - always dominated */
-#define XFS_MSEN_MLD_LABEL 'M' /* TCSEC label on a multi-level dir */
-#define XFS_MSEN_MLD_LOW_LABEL 'N' /* System Low, multi-level dir */
-#define XFS_MSEN_TCSEC_LABEL 'T' /* TCSEC label */
-#define XFS_MSEN_UNKNOWN_LABEL 'U' /* unknown label */
-
-/* MINT label type names. Choose a lower case ASCII character. */
-#define XFS_MINT_BIBA_LABEL 'b' /* Dual of a TCSEC label */
-#define XFS_MINT_EQUAL_LABEL 'e' /* Wildcard - always equal */
-#define XFS_MINT_HIGH_LABEL 'h' /* High Grade - always dominates */
-#define XFS_MINT_LOW_LABEL 'l' /* Low Grade - always dominated */
-
-/* On-disk XFS extended attribute names */
-#define SGI_MAC_FILE "SGI_MAC_FILE"
-#define SGI_MAC_FILE_SIZE (sizeof(SGI_MAC_FILE)-1)
-
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_FS_POSIX_MAC
-
-/* NOT YET IMPLEMENTED */
-
-#define MACEXEC 00100
-#define MACWRITE 00200
-#define MACREAD 00400
-
-struct xfs_inode;
-extern int xfs_mac_iaccess(struct xfs_inode *, mode_t, cred_t *);
-
-#define _MAC_XFS_IACCESS(i,m,c) (xfs_mac_iaccess(i,m,c))
-#define _MAC_VACCESS(v,c,m) (xfs_mac_vaccess(v,c,m))
-#define _MAC_EXISTS xfs_mac_vhaslabel
-
-#else
-#define _MAC_XFS_IACCESS(i,m,c) (0)
-#define _MAC_VACCESS(v,c,m) (0)
-#define _MAC_EXISTS (NULL)
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* __XFS_MAC_H__ */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9dfae18d995f..3bed0cf0d8af 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -52,21 +52,19 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
#ifdef HAVE_PERCPU_SB
STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
-STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
+STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
+ int, int);
STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
- int, int);
-STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
- int, int);
+ int64_t, int);
STATIC int xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
#else
#define xfs_icsb_destroy_counters(mp) do { } while (0)
-#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0)
#define xfs_icsb_sync_counters(mp) do { } while (0)
#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
-#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
#endif
@@ -545,9 +543,8 @@ xfs_readsb(xfs_mount_t *mp, int flags)
ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
}
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+ /* Initialize per-cpu counters */
+ xfs_icsb_reinit_counters(mp);
mp->m_sb_bp = bp;
xfs_buf_relse(bp);
@@ -1254,8 +1251,11 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
* The SB_LOCK must be held when this routine is called.
*/
int
-xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
- int delta, int rsvd)
+xfs_mod_incore_sb_unlocked(
+ xfs_mount_t *mp,
+ xfs_sb_field_t field,
+ int64_t delta,
+ int rsvd)
{
int scounter; /* short counter for 32 bit fields */
long long lcounter; /* long counter for 64 bit fields */
@@ -1287,7 +1287,6 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
mp->m_sb.sb_ifree = lcounter;
return 0;
case XFS_SBS_FDBLOCKS:
-
lcounter = (long long)
mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
@@ -1418,7 +1417,11 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
* routine to do the work.
*/
int
-xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
+xfs_mod_incore_sb(
+ xfs_mount_t *mp,
+ xfs_sb_field_t field,
+ int64_t delta,
+ int rsvd)
{
unsigned long s;
int status;
@@ -1485,9 +1488,11 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
case XFS_SBS_IFREE:
case XFS_SBS_FDBLOCKS:
if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
- status = xfs_icsb_modify_counters_locked(mp,
+ XFS_SB_UNLOCK(mp, s);
+ status = xfs_icsb_modify_counters(mp,
msbp->msb_field,
msbp->msb_delta, rsvd);
+ s = XFS_SB_LOCK(mp);
break;
}
/* FALLTHROUGH */
@@ -1521,11 +1526,12 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
case XFS_SBS_IFREE:
case XFS_SBS_FDBLOCKS:
if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
- status =
- xfs_icsb_modify_counters_locked(mp,
+ XFS_SB_UNLOCK(mp, s);
+ status = xfs_icsb_modify_counters(mp,
msbp->msb_field,
-(msbp->msb_delta),
rsvd);
+ s = XFS_SB_LOCK(mp);
break;
}
/* FALLTHROUGH */
@@ -1733,14 +1739,17 @@ xfs_icsb_cpu_notify(
memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
break;
case CPU_ONLINE:
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
+ xfs_icsb_lock(mp);
+ xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+ xfs_icsb_unlock(mp);
break;
case CPU_DEAD:
/* Disable all the counters, then fold the dead cpu's
* count into the total on the global superblock and
* re-enable the counters. */
+ xfs_icsb_lock(mp);
s = XFS_SB_LOCK(mp);
xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
@@ -1752,10 +1761,14 @@ xfs_icsb_cpu_notify(
memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, XFS_ICSB_SB_LOCKED);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, XFS_ICSB_SB_LOCKED);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, XFS_ICSB_SB_LOCKED);
+ xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
+ XFS_ICSB_SB_LOCKED, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
+ XFS_ICSB_SB_LOCKED, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
+ XFS_ICSB_SB_LOCKED, 0);
XFS_SB_UNLOCK(mp, s);
+ xfs_icsb_unlock(mp);
break;
}
@@ -1784,6 +1797,9 @@ xfs_icsb_init_counters(
cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
}
+
+ mutex_init(&mp->m_icsb_mutex);
+
/*
* start with all counters disabled so that the
* initial balance kicks us off correctly
@@ -1792,6 +1808,22 @@ xfs_icsb_init_counters(
return 0;
}
+void
+xfs_icsb_reinit_counters(
+ xfs_mount_t *mp)
+{
+ xfs_icsb_lock(mp);
+ /*
+ * start with all counters disabled so that the
+ * initial balance kicks us off correctly
+ */
+ mp->m_icsb_counters = -1;
+ xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+ xfs_icsb_unlock(mp);
+}
+
STATIC void
xfs_icsb_destroy_counters(
xfs_mount_t *mp)
@@ -1800,9 +1832,10 @@ xfs_icsb_destroy_counters(
unregister_hotcpu_notifier(&mp->m_icsb_notifier);
free_percpu(mp->m_sb_cnts);
}
+ mutex_destroy(&mp->m_icsb_mutex);
}
-STATIC inline void
+STATIC_INLINE void
xfs_icsb_lock_cntr(
xfs_icsb_cnts_t *icsbp)
{
@@ -1811,7 +1844,7 @@ xfs_icsb_lock_cntr(
}
}
-STATIC inline void
+STATIC_INLINE void
xfs_icsb_unlock_cntr(
xfs_icsb_cnts_t *icsbp)
{
@@ -1819,7 +1852,7 @@ xfs_icsb_unlock_cntr(
}
-STATIC inline void
+STATIC_INLINE void
xfs_icsb_lock_all_counters(
xfs_mount_t *mp)
{
@@ -1832,7 +1865,7 @@ xfs_icsb_lock_all_counters(
}
}
-STATIC inline void
+STATIC_INLINE void
xfs_icsb_unlock_all_counters(
xfs_mount_t *mp)
{
@@ -1888,6 +1921,17 @@ xfs_icsb_disable_counter(
ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
+ /*
+ * If we are already disabled, then there is nothing to do
+ * here. We check before locking all the counters to avoid
+ * the expensive lock operation when being called in the
+ * slow path and the counter is already disabled. This is
+ * safe because the only time we set or clear this state is under
+ * the m_icsb_mutex.
+ */
+ if (xfs_icsb_counter_disabled(mp, field))
+ return 0;
+
xfs_icsb_lock_all_counters(mp);
if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
/* drain back to superblock */
@@ -1948,8 +1992,8 @@ xfs_icsb_enable_counter(
xfs_icsb_unlock_all_counters(mp);
}
-STATIC void
-xfs_icsb_sync_counters_int(
+void
+xfs_icsb_sync_counters_flags(
xfs_mount_t *mp,
int flags)
{
@@ -1981,40 +2025,39 @@ STATIC void
xfs_icsb_sync_counters(
xfs_mount_t *mp)
{
- xfs_icsb_sync_counters_int(mp, 0);
-}
-
-/*
- * lazy addition used for things like df, background sb syncs, etc
- */
-void
-xfs_icsb_sync_counters_lazy(
- xfs_mount_t *mp)
-{
- xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
+ xfs_icsb_sync_counters_flags(mp, 0);
}
/*
* Balance and enable/disable counters as necessary.
*
- * Thresholds for re-enabling counters are somewhat magic.
- * inode counts are chosen to be the same number as single
- * on disk allocation chunk per CPU, and free blocks is
- * something far enough zero that we aren't going thrash
- * when we get near ENOSPC.
+ * Thresholds for re-enabling counters are somewhat magic. inode counts are
+ * chosen to be the same number as single on disk allocation chunk per CPU, and
+ * free blocks is something far enough zero that we aren't going thrash when we
+ * get near ENOSPC. We also need to supply a minimum we require per cpu to
+ * prevent looping endlessly when xfs_alloc_space asks for more than will
+ * be distributed to a single CPU but each CPU has enough blocks to be
+ * reenabled.
+ *
+ * Note that we can be called when counters are already disabled.
+ * xfs_icsb_disable_counter() optimises the counter locking in this case to
+ * prevent locking every per-cpu counter needlessly.
*/
-#define XFS_ICSB_INO_CNTR_REENABLE 64
+
+#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
- (512 + XFS_ALLOC_SET_ASIDE(mp))
+ (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
STATIC void
xfs_icsb_balance_counter(
xfs_mount_t *mp,
xfs_sb_field_t field,
- int flags)
+ int flags,
+ int min_per_cpu)
{
uint64_t count, resid;
int weight = num_online_cpus();
int s;
+ uint64_t min = (uint64_t)min_per_cpu;
if (!(flags & XFS_ICSB_SB_LOCKED))
s = XFS_SB_LOCK(mp);
@@ -2027,19 +2070,19 @@ xfs_icsb_balance_counter(
case XFS_SBS_ICOUNT:
count = mp->m_sb.sb_icount;
resid = do_div(count, weight);
- if (count < XFS_ICSB_INO_CNTR_REENABLE)
+ if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
goto out;
break;
case XFS_SBS_IFREE:
count = mp->m_sb.sb_ifree;
resid = do_div(count, weight);
- if (count < XFS_ICSB_INO_CNTR_REENABLE)
+ if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
goto out;
break;
case XFS_SBS_FDBLOCKS:
count = mp->m_sb.sb_fdblocks;
resid = do_div(count, weight);
- if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp))
+ if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
goto out;
break;
default:
@@ -2054,32 +2097,39 @@ out:
XFS_SB_UNLOCK(mp, s);
}
-STATIC int
-xfs_icsb_modify_counters_int(
+int
+xfs_icsb_modify_counters(
xfs_mount_t *mp,
xfs_sb_field_t field,
- int delta,
- int rsvd,
- int flags)
+ int64_t delta,
+ int rsvd)
{
xfs_icsb_cnts_t *icsbp;
long long lcounter; /* long counter for 64 bit fields */
- int cpu, s, locked = 0;
- int ret = 0, balance_done = 0;
+ int cpu, ret = 0, s;
+ might_sleep();
again:
cpu = get_cpu();
- icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
- xfs_icsb_lock_cntr(icsbp);
+ icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu);
+
+ /*
+ * if the counter is disabled, go to slow path
+ */
if (unlikely(xfs_icsb_counter_disabled(mp, field)))
goto slow_path;
+ xfs_icsb_lock_cntr(icsbp);
+ if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
+ xfs_icsb_unlock_cntr(icsbp);
+ goto slow_path;
+ }
switch (field) {
case XFS_SBS_ICOUNT:
lcounter = icsbp->icsb_icount;
lcounter += delta;
if (unlikely(lcounter < 0))
- goto slow_path;
+ goto balance_counter;
icsbp->icsb_icount = lcounter;
break;
@@ -2087,7 +2137,7 @@ again:
lcounter = icsbp->icsb_ifree;
lcounter += delta;
if (unlikely(lcounter < 0))
- goto slow_path;
+ goto balance_counter;
icsbp->icsb_ifree = lcounter;
break;
@@ -2097,7 +2147,7 @@ again:
lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
lcounter += delta;
if (unlikely(lcounter < 0))
- goto slow_path;
+ goto balance_counter;
icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
break;
default:
@@ -2106,72 +2156,78 @@ again:
}
xfs_icsb_unlock_cntr(icsbp);
put_cpu();
- if (locked)
- XFS_SB_UNLOCK(mp, s);
return 0;
- /*
- * The slow path needs to be run with the SBLOCK
- * held so that we prevent other threads from
- * attempting to run this path at the same time.
- * this provides exclusion for the balancing code,
- * and exclusive fallback if the balance does not
- * provide enough resources to continue in an unlocked
- * manner.
- */
slow_path:
- xfs_icsb_unlock_cntr(icsbp);
put_cpu();
- /* need to hold superblock incase we need
- * to disable a counter */
- if (!(flags & XFS_ICSB_SB_LOCKED)) {
- s = XFS_SB_LOCK(mp);
- locked = 1;
- flags |= XFS_ICSB_SB_LOCKED;
- }
- if (!balance_done) {
- xfs_icsb_balance_counter(mp, field, flags);
- balance_done = 1;
+ /*
+ * serialise with a mutex so we don't burn lots of cpu on
+ * the superblock lock. We still need to hold the superblock
+ * lock, however, when we modify the global structures.
+ */
+ xfs_icsb_lock(mp);
+
+ /*
+ * Now running atomically.
+ *
+ * If the counter is enabled, someone has beaten us to rebalancing.
+ * Drop the lock and try again in the fast path....
+ */
+ if (!(xfs_icsb_counter_disabled(mp, field))) {
+ xfs_icsb_unlock(mp);
goto again;
- } else {
- /*
- * we might not have enough on this local
- * cpu to allocate for a bulk request.
- * We need to drain this field from all CPUs
- * and disable the counter fastpath
- */
- xfs_icsb_disable_counter(mp, field);
}
+ /*
+ * The counter is currently disabled. Because we are
+ * running atomically here, we know a rebalance cannot
+ * be in progress. Hence we can go straight to operating
+ * on the global superblock. We do not call xfs_mod_incore_sb()
+ * here even though we need to get the SB_LOCK. Doing so
+ * will cause us to re-enter this function and deadlock.
+ * Hence we get the SB_LOCK ourselves and then call
+ * xfs_mod_incore_sb_unlocked() as the unlocked path operates
+ * directly on the global counters.
+ */
+ s = XFS_SB_LOCK(mp);
ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+ XFS_SB_UNLOCK(mp, s);
- if (locked)
- XFS_SB_UNLOCK(mp, s);
+ /*
+ * Now that we've modified the global superblock, we
+ * may be able to re-enable the distributed counters
+ * (e.g. lots of space just got freed). After that
+ * we are done.
+ */
+ if (ret != ENOSPC)
+ xfs_icsb_balance_counter(mp, field, 0, 0);
+ xfs_icsb_unlock(mp);
return ret;
-}
-STATIC int
-xfs_icsb_modify_counters(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int delta,
- int rsvd)
-{
- return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
-}
+balance_counter:
+ xfs_icsb_unlock_cntr(icsbp);
+ put_cpu();
-/*
- * Called when superblock is already locked
- */
-STATIC int
-xfs_icsb_modify_counters_locked(
- xfs_mount_t *mp,
- xfs_sb_field_t field,
- int delta,
- int rsvd)
-{
- return xfs_icsb_modify_counters_int(mp, field, delta,
- rsvd, XFS_ICSB_SB_LOCKED);
+ /*
+ * We may have multiple threads here if multiple per-cpu
+ * counters run dry at the same time. This will mean we can
+ * do more balances than strictly necessary but it is not
+ * the common slowpath case.
+ */
+ xfs_icsb_lock(mp);
+
+ /*
+ * running atomically.
+ *
+ * This will leave the counter in the correct state for future
+ * accesses. After the rebalance, we simply try again and our retry
+ * will either succeed through the fast path or slow path without
+ * another balance operation being required.
+ */
+ xfs_icsb_balance_counter(mp, field, 0, delta);
+ xfs_icsb_unlock(mp);
+ goto again;
}
+
#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e5f396ff9a3d..82304b94646d 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,6 +18,7 @@
#ifndef __XFS_MOUNT_H__
#define __XFS_MOUNT_H__
+
typedef struct xfs_trans_reservations {
uint tr_write; /* extent alloc trans */
uint tr_itruncate; /* truncate trans */
@@ -306,11 +307,13 @@ typedef struct xfs_icsb_cnts {
#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
extern int xfs_icsb_init_counters(struct xfs_mount *);
-extern void xfs_icsb_sync_counters_lazy(struct xfs_mount *);
+extern void xfs_icsb_reinit_counters(struct xfs_mount *);
+extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
#else
#define xfs_icsb_init_counters(mp) (0)
-#define xfs_icsb_sync_counters_lazy(mp) do { } while (0)
+#define xfs_icsb_reinit_counters(mp) do { } while (0)
+#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0)
#endif
typedef struct xfs_mount {
@@ -419,6 +422,7 @@ typedef struct xfs_mount {
xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */
unsigned long m_icsb_counters; /* disabled per-cpu counters */
struct notifier_block m_icsb_notifier; /* hotplug cpu notifier */
+ struct mutex m_icsb_mutex; /* balancer sync lock */
#endif
} xfs_mount_t;
@@ -563,11 +567,32 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/*
+ * Per-cpu superblock locking functions
+ */
+#ifdef HAVE_PERCPU_SB
+STATIC_INLINE void
+xfs_icsb_lock(xfs_mount_t *mp)
+{
+ mutex_lock(&mp->m_icsb_mutex);
+}
+
+STATIC_INLINE void
+xfs_icsb_unlock(xfs_mount_t *mp)
+{
+ mutex_unlock(&mp->m_icsb_mutex);
+}
+#else
+#define xfs_icsb_lock(mp)
+#define xfs_icsb_unlock(mp)
+#endif
+
+/*
* This structure is for use by the xfs_mod_incore_sb_batch() routine.
+ * xfs_growfs can specify a few fields which are more than int limit
*/
typedef struct xfs_mod_sb {
xfs_sb_field_t msb_field; /* Field to modify, see below */
- int msb_delta; /* Change to make to specified field */
+ int64_t msb_delta; /* Change to make to specified field */
} xfs_mod_sb_t;
#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
@@ -585,17 +610,17 @@ extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
extern int xfs_unmountfs_writesb(xfs_mount_t *);
extern int xfs_unmount_flush(xfs_mount_t *, int);
-extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
+extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
- int, int);
+ int64_t, int);
extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
uint, int);
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
extern void xfs_do_force_shutdown(bhv_desc_t *, int, char *, int);
-extern int xfs_syncsub(xfs_mount_t *, int, int, int *);
-extern int xfs_sync_inodes(xfs_mount_t *, int, int, int *);
+extern int xfs_syncsub(xfs_mount_t *, int, int *);
+extern int xfs_sync_inodes(xfs_mount_t *, int, int *);
extern xfs_agnumber_t xfs_initialize_perag(struct bhv_vfs *, xfs_mount_t *,
xfs_agnumber_t);
extern void xfs_xlatesb(void *, struct xfs_sb *, int, __int64_t);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d98171deaa1c..4c6573d784cd 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -565,7 +565,7 @@ xfs_rename(
IHOLD(target_ip);
IHOLD(src_ip);
- error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 880c73271c05..6fff19dc3cf9 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -147,7 +147,7 @@ xfs_growfs_rt_alloc(
/*
* Free any blocks freed up in the transaction, then commit.
*/
- error = xfs_bmap_finish(&tp, &flist, firstblock, &committed);
+ error = xfs_bmap_finish(&tp, &flist, &committed);
if (error)
goto error_exit;
xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
@@ -913,57 +913,6 @@ xfs_rtcheck_alloc_range(
}
#endif
-#ifdef DEBUG
-/*
- * Check whether the given block in the bitmap has the given value.
- */
-STATIC int /* 1 for matches, 0 for not */
-xfs_rtcheck_bit(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* bit (block) to check */
- int val) /* 1 for free, 0 for allocated */
-{
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- xfs_buf_t *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* pointer into the buffer */
- /* REFERENCED */
- int error; /* error value */
- xfs_rtword_t wdiff; /* difference between bit & expected */
- int word; /* word number in the buffer */
- xfs_rtword_t wval; /* word value from buffer */
-
- block = XFS_BITTOBLOCK(mp, start);
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
- word = XFS_BITTOWORD(mp, start);
- bit = (int)(start & (XFS_NBWORD - 1));
- wval = bufp[word];
- xfs_trans_brelse(tp, bp);
- wdiff = (wval ^ -val) & ((xfs_rtword_t)1 << bit);
- return !wdiff;
-}
-#endif /* DEBUG */
-
-#if 0
-/*
- * Check that the given extent (block range) is free already.
- */
-STATIC int /* error */
-xfs_rtcheck_free_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number of extent */
- xfs_extlen_t len, /* length of extent */
- int *stat) /* out: 1 for free, 0 for not */
-{
- xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
-
- return xfs_rtcheck_range(mp, tp, bno, len, 1, &new, stat);
-}
-#endif
-
/*
* Check that the given range is either all allocated (val = 0) or
* all free (val = 1).
@@ -2382,60 +2331,3 @@ xfs_rtpick_extent(
*pick = b;
return 0;
}
-
-#ifdef DEBUG
-/*
- * Debug code: print out the value of a range in the bitmap.
- */
-void
-xfs_rtprint_range(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to print */
- xfs_extlen_t len) /* length to print */
-{
- xfs_extlen_t i; /* block number in the extent */
-
- cmn_err(CE_DEBUG, "%Ld: ", (long long)start);
- for (i = 0; i < len; i++)
- cmn_err(CE_DEBUG, "%d", xfs_rtcheck_bit(mp, tp, start + i, 1));
- cmn_err(CE_DEBUG, "\n");
-}
-
-/*
- * Debug code: print the summary file.
- */
-void
-xfs_rtprint_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp) /* transaction pointer */
-{
- xfs_suminfo_t c; /* summary data */
- xfs_rtblock_t i; /* bitmap block number */
- int l; /* summary information level */
- int p; /* flag for printed anything */
- xfs_fsblock_t sb; /* summary block number */
- xfs_buf_t *sumbp; /* summary block buffer */
-
- sumbp = NULL;
- for (l = 0; l < mp->m_rsumlevels; l++) {
- for (p = 0, i = 0; i < mp->m_sb.sb_rbmblocks; i++) {
- (void)xfs_rtget_summary(mp, tp, l, i, &sumbp, &sb, &c);
- if (c) {
- if (!p) {
- cmn_err(CE_DEBUG, "%Ld-%Ld:", 1LL << l,
- XFS_RTMIN((1LL << l) +
- ((1LL << l) - 1LL),
- mp->m_sb.sb_rextents));
- p = 1;
- }
- cmn_err(CE_DEBUG, " %Ld:%d", (long long)i, c);
- }
- }
- if (p)
- cmn_err(CE_DEBUG, "\n");
- }
- if (sumbp)
- xfs_trans_brelse(tp, sumbp);
-}
-#endif /* DEBUG */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 0e0b4d2ec202..799c1f871263 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -134,24 +134,6 @@ xfs_rtpick_extent(
xfs_rtblock_t *pick); /* result rt extent */
/*
- * Debug code: print out the value of a range in the bitmap.
- */
-void
-xfs_rtprint_range(
- struct xfs_mount *mp, /* file system mount structure */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to print */
- xfs_extlen_t len); /* length to print */
-
-/*
- * Debug code: print the summary file.
- */
-void
-xfs_rtprint_summary(
- struct xfs_mount *mp, /* file system mount structure */
- struct xfs_trans *tp); /* transaction pointer */
-
-/*
* Grow the realtime area of the filesystem.
*/
int
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index defb2febaaf5..1ea7c0ca6ae0 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -42,7 +42,6 @@
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_acl.h"
-#include "xfs_mac.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index ee2721e0de4d..301ff9445b6f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -339,7 +339,7 @@ xfs_trans_reserve(
*/
if (blocks > 0) {
error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
- -blocks, rsvd);
+ -((int64_t)blocks), rsvd);
if (error != 0) {
current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
return (XFS_ERROR(ENOSPC));
@@ -380,7 +380,7 @@ xfs_trans_reserve(
*/
if (rtextents > 0) {
error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
- -rtextents, rsvd);
+ -((int64_t)rtextents), rsvd);
if (error) {
error = XFS_ERROR(ENOSPC);
goto undo_log;
@@ -410,7 +410,7 @@ undo_log:
undo_blocks:
if (blocks > 0) {
(void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS,
- blocks, rsvd);
+ (int64_t)blocks, rsvd);
tp->t_blk_res = 0;
}
@@ -432,7 +432,7 @@ void
xfs_trans_mod_sb(
xfs_trans_t *tp,
uint field,
- long delta)
+ int64_t delta)
{
switch (field) {
@@ -663,62 +663,62 @@ xfs_trans_unreserve_and_mod_sb(
if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
if (tp->t_icount_delta != 0) {
msbp->msb_field = XFS_SBS_ICOUNT;
- msbp->msb_delta = (int)tp->t_icount_delta;
+ msbp->msb_delta = tp->t_icount_delta;
msbp++;
}
if (tp->t_ifree_delta != 0) {
msbp->msb_field = XFS_SBS_IFREE;
- msbp->msb_delta = (int)tp->t_ifree_delta;
+ msbp->msb_delta = tp->t_ifree_delta;
msbp++;
}
if (tp->t_fdblocks_delta != 0) {
msbp->msb_field = XFS_SBS_FDBLOCKS;
- msbp->msb_delta = (int)tp->t_fdblocks_delta;
+ msbp->msb_delta = tp->t_fdblocks_delta;
msbp++;
}
if (tp->t_frextents_delta != 0) {
msbp->msb_field = XFS_SBS_FREXTENTS;
- msbp->msb_delta = (int)tp->t_frextents_delta;
+ msbp->msb_delta = tp->t_frextents_delta;
msbp++;
}
if (tp->t_dblocks_delta != 0) {
msbp->msb_field = XFS_SBS_DBLOCKS;
- msbp->msb_delta = (int)tp->t_dblocks_delta;
+ msbp->msb_delta = tp->t_dblocks_delta;
msbp++;
}
if (tp->t_agcount_delta != 0) {
msbp->msb_field = XFS_SBS_AGCOUNT;
- msbp->msb_delta = (int)tp->t_agcount_delta;
+ msbp->msb_delta = tp->t_agcount_delta;
msbp++;
}
if (tp->t_imaxpct_delta != 0) {
msbp->msb_field = XFS_SBS_IMAX_PCT;
- msbp->msb_delta = (int)tp->t_imaxpct_delta;
+ msbp->msb_delta = tp->t_imaxpct_delta;
msbp++;
}
if (tp->t_rextsize_delta != 0) {
msbp->msb_field = XFS_SBS_REXTSIZE;
- msbp->msb_delta = (int)tp->t_rextsize_delta;
+ msbp->msb_delta = tp->t_rextsize_delta;
msbp++;
}
if (tp->t_rbmblocks_delta != 0) {
msbp->msb_field = XFS_SBS_RBMBLOCKS;
- msbp->msb_delta = (int)tp->t_rbmblocks_delta;
+ msbp->msb_delta = tp->t_rbmblocks_delta;
msbp++;
}
if (tp->t_rblocks_delta != 0) {
msbp->msb_field = XFS_SBS_RBLOCKS;
- msbp->msb_delta = (int)tp->t_rblocks_delta;
+ msbp->msb_delta = tp->t_rblocks_delta;
msbp++;
}
if (tp->t_rextents_delta != 0) {
msbp->msb_field = XFS_SBS_REXTENTS;
- msbp->msb_delta = (int)tp->t_rextents_delta;
+ msbp->msb_delta = tp->t_rextents_delta;
msbp++;
}
if (tp->t_rextslog_delta != 0) {
msbp->msb_field = XFS_SBS_REXTSLOG;
- msbp->msb_delta = (int)tp->t_rextslog_delta;
+ msbp->msb_delta = tp->t_rextslog_delta;
msbp++;
}
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c68e00105d23..f1d7ab236726 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -39,13 +39,9 @@ typedef struct xfs_trans_header {
/*
* Log item types.
*/
-#define XFS_LI_5_3_BUF 0x1234 /* v1 bufs, 1-block inode buffers */
-#define XFS_LI_5_3_INODE 0x1235 /* 1-block inode buffers */
#define XFS_LI_EFI 0x1236
#define XFS_LI_EFD 0x1237
#define XFS_LI_IUNLINK 0x1238
-#define XFS_LI_6_1_INODE 0x1239 /* 4K non-aligned inode bufs */
-#define XFS_LI_6_1_BUF 0x123a /* v1, 4K inode buffers */
#define XFS_LI_INODE 0x123b /* aligned ino chunks, var-size ibufs */
#define XFS_LI_BUF 0x123c /* v2 bufs, variable sized inode bufs */
#define XFS_LI_DQUOT 0x123d
@@ -354,25 +350,25 @@ typedef struct xfs_trans {
xfs_trans_callback_t t_callback; /* transaction callback */
void *t_callarg; /* callback arg */
unsigned int t_flags; /* misc flags */
- long t_icount_delta; /* superblock icount change */
- long t_ifree_delta; /* superblock ifree change */
- long t_fdblocks_delta; /* superblock fdblocks chg */
- long t_res_fdblocks_delta; /* on-disk only chg */
- long t_frextents_delta;/* superblock freextents chg*/
- long t_res_frextents_delta; /* on-disk only chg */
+ int64_t t_icount_delta; /* superblock icount change */
+ int64_t t_ifree_delta; /* superblock ifree change */
+ int64_t t_fdblocks_delta; /* superblock fdblocks chg */
+ int64_t t_res_fdblocks_delta; /* on-disk only chg */
+ int64_t t_frextents_delta;/* superblock freextents chg*/
+ int64_t t_res_frextents_delta; /* on-disk only chg */
#ifdef DEBUG
- long t_ag_freeblks_delta; /* debugging counter */
- long t_ag_flist_delta; /* debugging counter */
- long t_ag_btree_delta; /* debugging counter */
+ int64_t t_ag_freeblks_delta; /* debugging counter */
+ int64_t t_ag_flist_delta; /* debugging counter */
+ int64_t t_ag_btree_delta; /* debugging counter */
#endif
- long t_dblocks_delta;/* superblock dblocks change */
- long t_agcount_delta;/* superblock agcount change */
- long t_imaxpct_delta;/* superblock imaxpct change */
- long t_rextsize_delta;/* superblock rextsize chg */
- long t_rbmblocks_delta;/* superblock rbmblocks chg */
- long t_rblocks_delta;/* superblock rblocks change */
- long t_rextents_delta;/* superblocks rextents chg */
- long t_rextslog_delta;/* superblocks rextslog chg */
+ int64_t t_dblocks_delta;/* superblock dblocks change */
+ int64_t t_agcount_delta;/* superblock agcount change */
+ int64_t t_imaxpct_delta;/* superblock imaxpct change */
+ int64_t t_rextsize_delta;/* superblock rextsize chg */
+ int64_t t_rbmblocks_delta;/* superblock rbmblocks chg */
+ int64_t t_rblocks_delta;/* superblock rblocks change */
+ int64_t t_rextents_delta;/* superblocks rextents chg */
+ int64_t t_rextslog_delta;/* superblocks rextslog chg */
unsigned int t_items_free; /* log item descs free */
xfs_log_item_chunk_t t_items; /* first log item desc chunk */
xfs_trans_header_t t_header; /* header for in-log trans */
@@ -936,9 +932,9 @@ typedef struct xfs_trans {
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
#ifdef DEBUG
-#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (long)d)
-#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (long)d)
-#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (long)d)
+#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (int64_t)d)
+#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (int64_t)d)
+#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (int64_t)d)
#else
#define xfs_trans_agblocks_delta(tp, d)
#define xfs_trans_agflist_delta(tp, d)
@@ -954,7 +950,7 @@ xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint);
xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
uint, uint);
-void xfs_trans_mod_sb(xfs_trans_t *, uint, long);
+void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
int, uint);
int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index fc39b166d403..ceb4f6e99960 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -90,7 +90,7 @@ xfs_trans_push_ail(
int flush_log;
SPLDECL(s);
-#define XFS_TRANS_PUSH_AIL_RESTARTS 10
+#define XFS_TRANS_PUSH_AIL_RESTARTS 1000
AIL_LOCK(mp,s);
lip = xfs_trans_first_ail(mp, &gen);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 62336a4cc5a4..29f72f613782 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -640,7 +640,7 @@ xfs_quiesce_fs(
* we can write the unmount record.
*/
do {
- xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, 0, NULL);
+ xfs_syncsub(mp, SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT, NULL);
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (!pincount) {
delay(50);
@@ -806,7 +806,7 @@ xfs_statvfs(
statp->f_type = XFS_SB_MAGIC;
- xfs_icsb_sync_counters_lazy(mp);
+ xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
s = XFS_SB_LOCK(mp);
statp->f_bsize = sbp->sb_blocksize;
lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
@@ -872,6 +872,10 @@ xfs_statvfs(
* this by simply making sure the log gets flushed
* if SYNC_BDFLUSH is set, and by actually writing it
* out otherwise.
+ * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete
+ * before we return (including direct I/O). Forms the drain
+ * side of the write barrier needed to safely quiesce the
+ * filesystem.
*
*/
/*ARGSUSED*/
@@ -883,27 +887,20 @@ xfs_sync(
{
xfs_mount_t *mp = XFS_BHVTOM(bdp);
- if (unlikely(flags == SYNC_QUIESCE))
- return xfs_quiesce_fs(mp);
- else
- return xfs_syncsub(mp, flags, 0, NULL);
+ return xfs_syncsub(mp, flags, NULL);
}
/*
* xfs sync routine for internal use
*
* This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync. In the interests of not
- * changing interfaces within the 6.5 family, additional internally-
- * required functions are specified within a separate xflags parameter,
- * only available by calling this routine.
+ * interface as explained above under xfs_sync.
*
*/
int
xfs_sync_inodes(
xfs_mount_t *mp,
int flags,
- int xflags,
int *bypassed)
{
xfs_inode_t *ip = NULL;
@@ -1176,6 +1173,13 @@ xfs_sync_inodes(
}
}
+ /*
+ * When freezing, we need to wait ensure all I/O (including direct
+ * I/O) is complete to ensure no further data modification can take
+ * place after this point
+ */
+ if (flags & SYNC_IOWAIT)
+ vn_iowait(vp);
if (flags & SYNC_BDFLUSH) {
if ((flags & SYNC_ATTR) &&
@@ -1412,17 +1416,13 @@ xfs_sync_inodes(
* xfs sync routine for internal use
*
* This routine supports all of the flags defined for the generic vfs_sync
- * interface as explained above under xfs_sync. In the interests of not
- * changing interfaces within the 6.5 family, additional internally-
- * required functions are specified within a separate xflags parameter,
- * only available by calling this routine.
+ * interface as explained above under xfs_sync.
*
*/
int
xfs_syncsub(
xfs_mount_t *mp,
int flags,
- int xflags,
int *bypassed)
{
int error = 0;
@@ -1444,7 +1444,7 @@ xfs_syncsub(
if (flags & SYNC_BDFLUSH)
xfs_finish_reclaim_all(mp, 1);
else
- error = xfs_sync_inodes(mp, flags, xflags, bypassed);
+ error = xfs_sync_inodes(mp, flags, bypassed);
}
/*
@@ -1958,15 +1958,26 @@ xfs_showargs(
return 0;
}
+/*
+ * Second stage of a freeze. The data is already frozen, now we have to take
+ * care of the metadata. New transactions are already blocked, so we need to
+ * wait for any remaining transactions to drain out before proceding.
+ */
STATIC void
xfs_freeze(
bhv_desc_t *bdp)
{
xfs_mount_t *mp = XFS_BHVTOM(bdp);
+ /* wait for all modifications to complete */
while (atomic_read(&mp->m_active_trans) > 0)
delay(100);
+ /* flush inodes and push all remaining buffers out to disk */
+ xfs_quiesce_fs(mp);
+
+ ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0);
+
/* Push the superblock and write an unmount record */
xfs_log_unmount_write(mp);
xfs_unmountfs_writesb(mp);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index bda774a04b8f..52c41714ec54 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -51,7 +51,6 @@
#include "xfs_refcache.h"
#include "xfs_trans_space.h"
#include "xfs_log_priv.h"
-#include "xfs_mac.h"
STATIC int
xfs_open(
@@ -1381,7 +1380,7 @@ xfs_inactive_symlink_rmt(
/*
* Commit the first transaction. This logs the EFI and the inode.
*/
- if ((error = xfs_bmap_finish(&tp, &free_list, first_block, &committed)))
+ if ((error = xfs_bmap_finish(&tp, &free_list, &committed)))
goto error1;
/*
* The transaction must have been committed, since there were
@@ -1790,8 +1789,7 @@ xfs_inactive(
* Just ignore errors at this point. There is
* nothing we can do except to try to keep going.
*/
- (void) xfs_bmap_finish(&tp, &free_list, first_block,
- &committed);
+ (void) xfs_bmap_finish(&tp, &free_list, &committed);
(void) xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES, NULL);
}
/*
@@ -2022,7 +2020,7 @@ xfs_create(
IHOLD(ip);
vp = XFS_ITOV(ip);
- error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
goto abort_rele;
@@ -2507,7 +2505,7 @@ xfs_remove(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
REMOVE_DEBUG_TRACE(__LINE__);
goto error_rele;
@@ -2715,7 +2713,7 @@ xfs_link(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish (&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
goto abort_return;
@@ -2932,7 +2930,7 @@ xfs_mkdir(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
IRELE(cdp);
goto error2;
@@ -3183,7 +3181,7 @@ xfs_rmdir(
xfs_trans_set_sync(tp);
}
- error = xfs_bmap_finish (&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish (&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
@@ -3533,7 +3531,7 @@ xfs_symlink(
*/
IHOLD(ip);
- error = xfs_bmap_finish(&tp, &free_list, first_block, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
goto error2;
}
@@ -4145,7 +4143,7 @@ retry:
/*
* Complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
goto error0;
}
@@ -4452,7 +4450,7 @@ xfs_free_file_space(
/*
* complete the transaction
*/
- error = xfs_bmap_finish(&tp, &free_list, firstfsb, &committed);
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
goto error0;
}