summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 15:20:36 -0700
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/nfsd
downloadlinux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.gz
linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2
linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.zip
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Makefile12
-rw-r--r--fs/nfsd/auth.c63
-rw-r--r--fs/nfsd/export.c1200
-rw-r--r--fs/nfsd/lockd.c79
-rw-r--r--fs/nfsd/nfs3proc.c702
-rw-r--r--fs/nfsd/nfs3xdr.c1092
-rw-r--r--fs/nfsd/nfs4acl.c954
-rw-r--r--fs/nfsd/nfs4callback.c547
-rw-r--r--fs/nfsd/nfs4idmap.c588
-rw-r--r--fs/nfsd/nfs4proc.c984
-rw-r--r--fs/nfsd/nfs4state.c3320
-rw-r--r--fs/nfsd/nfs4xdr.c2536
-rw-r--r--fs/nfsd/nfscache.c328
-rw-r--r--fs/nfsd/nfsctl.c438
-rw-r--r--fs/nfsd/nfsfh.c532
-rw-r--r--fs/nfsd/nfsproc.c605
-rw-r--r--fs/nfsd/nfssvc.c385
-rw-r--r--fs/nfsd/nfsxdr.c511
-rw-r--r--fs/nfsd/stats.c101
-rw-r--r--fs/nfsd/vfs.c1859
20 files changed, 16836 insertions, 0 deletions
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
new file mode 100644
index 000000000000..b8680a247f8b
--- /dev/null
+++ b/fs/nfsd/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the Linux nfs server
+#
+
+obj-$(CONFIG_NFSD) += nfsd.o
+
+nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
+nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
+ nfs4acl.o nfs4callback.o
+nfsd-objs := $(nfsd-y)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
new file mode 100644
index 000000000000..cfe9ce881613
--- /dev/null
+++ b/fs/nfsd/auth.c
@@ -0,0 +1,63 @@
+/*
+ * linux/fs/nfsd/auth.c
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcauth.h>
+#include <linux/nfsd/nfsd.h>
+
+#define CAP_NFSD_MASK (CAP_FS_MASK|CAP_TO_MASK(CAP_SYS_RESOURCE))
+
+int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
+{
+ struct svc_cred *cred = &rqstp->rq_cred;
+ int i;
+ int ret;
+
+ if (exp->ex_flags & NFSEXP_ALLSQUASH) {
+ cred->cr_uid = exp->ex_anon_uid;
+ cred->cr_gid = exp->ex_anon_gid;
+ put_group_info(cred->cr_group_info);
+ cred->cr_group_info = groups_alloc(0);
+ } else if (exp->ex_flags & NFSEXP_ROOTSQUASH) {
+ struct group_info *gi;
+ if (!cred->cr_uid)
+ cred->cr_uid = exp->ex_anon_uid;
+ if (!cred->cr_gid)
+ cred->cr_gid = exp->ex_anon_gid;
+ gi = groups_alloc(cred->cr_group_info->ngroups);
+ if (gi)
+ for (i = 0; i < cred->cr_group_info->ngroups; i++) {
+ if (!GROUP_AT(cred->cr_group_info, i))
+ GROUP_AT(gi, i) = exp->ex_anon_gid;
+ else
+ GROUP_AT(gi, i) = GROUP_AT(cred->cr_group_info, i);
+ }
+ put_group_info(cred->cr_group_info);
+ cred->cr_group_info = gi;
+ }
+
+ if (cred->cr_uid != (uid_t) -1)
+ current->fsuid = cred->cr_uid;
+ else
+ current->fsuid = exp->ex_anon_uid;
+ if (cred->cr_gid != (gid_t) -1)
+ current->fsgid = cred->cr_gid;
+ else
+ current->fsgid = exp->ex_anon_gid;
+
+ if (!cred->cr_group_info)
+ return -ENOMEM;
+ ret = set_current_groups(cred->cr_group_info);
+ if ((cred->cr_uid)) {
+ cap_t(current->cap_effective) &= ~CAP_NFSD_MASK;
+ } else {
+ cap_t(current->cap_effective) |= (CAP_NFSD_MASK &
+ current->cap_permitted);
+ }
+ return ret;
+}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
new file mode 100644
index 000000000000..9a11aa39e2e4
--- /dev/null
+++ b/fs/nfsd/export.c
@@ -0,0 +1,1200 @@
+#define MSNFS /* HACK HACK */
+/*
+ * linux/fs/nfsd/export.c
+ *
+ * NFS exporting and validation.
+ *
+ * We maintain a list of clients, each of which has a list of
+ * exports. To export an fs to a given client, you first have
+ * to create the client entry with NFSCTL_ADDCLIENT, which
+ * creates a client control block and adds it to the hash
+ * table. Then, you call NFSCTL_EXPORT for each fs.
+ *
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
+ */
+
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/in.h>
+#include <linux/seq_file.h>
+#include <linux/syscalls.h>
+#include <linux/rwsem.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/mount.h>
+#include <linux/hash.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/nfsfh.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_EXPORT
+#define NFSD_PARANOIA 1
+
+typedef struct auth_domain svc_client;
+typedef struct svc_export svc_export;
+
+static void exp_do_unexport(svc_export *unexp);
+static int exp_verify_string(char *cp, int max);
+
+/*
+ * We have two caches.
+ * One maps client+vfsmnt+dentry to export options - the export map
+ * The other maps client+filehandle-fragment to export options. - the expkey map
+ *
+ * The export options are actually stored in the first map, and the
+ * second map contains a reference to the entry in the first map.
+ */
+
+#define EXPKEY_HASHBITS 8
+#define EXPKEY_HASHMAX (1 << EXPKEY_HASHBITS)
+#define EXPKEY_HASHMASK (EXPKEY_HASHMAX -1)
+static struct cache_head *expkey_table[EXPKEY_HASHMAX];
+
+static inline int svc_expkey_hash(struct svc_expkey *item)
+{
+ int hash = item->ek_fsidtype;
+ char * cp = (char*)item->ek_fsid;
+ int len = key_len(item->ek_fsidtype);
+
+ hash ^= hash_mem(cp, len, EXPKEY_HASHBITS);
+ hash ^= hash_ptr(item->ek_client, EXPKEY_HASHBITS);
+ return hash & EXPKEY_HASHMASK;
+}
+
+void expkey_put(struct cache_head *item, struct cache_detail *cd)
+{
+ if (cache_put(item, cd)) {
+ struct svc_expkey *key = container_of(item, struct svc_expkey, h);
+ if (test_bit(CACHE_VALID, &item->flags) &&
+ !test_bit(CACHE_NEGATIVE, &item->flags))
+ exp_put(key->ek_export);
+ auth_domain_put(key->ek_client);
+ kfree(key);
+ }
+}
+
+static void expkey_request(struct cache_detail *cd,
+ struct cache_head *h,
+ char **bpp, int *blen)
+{
+ /* client fsidtype \xfsid */
+ struct svc_expkey *ek = container_of(h, struct svc_expkey, h);
+ char type[5];
+
+ qword_add(bpp, blen, ek->ek_client->name);
+ snprintf(type, 5, "%d", ek->ek_fsidtype);
+ qword_add(bpp, blen, type);
+ qword_addhex(bpp, blen, (char*)ek->ek_fsid, key_len(ek->ek_fsidtype));
+ (*bpp)[-1] = '\n';
+}
+
+static struct svc_expkey *svc_expkey_lookup(struct svc_expkey *, int);
+static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ /* client fsidtype fsid [path] */
+ char *buf;
+ int len;
+ struct auth_domain *dom = NULL;
+ int err;
+ int fsidtype;
+ char *ep;
+ struct svc_expkey key;
+
+ if (mesg[mlen-1] != '\n')
+ return -EINVAL;
+ mesg[mlen-1] = 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!buf) goto out;
+
+ err = -EINVAL;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+
+ err = -ENOENT;
+ dom = auth_domain_find(buf);
+ if (!dom)
+ goto out;
+ dprintk("found domain %s\n", buf);
+
+ err = -EINVAL;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+ fsidtype = simple_strtoul(buf, &ep, 10);
+ if (*ep)
+ goto out;
+ dprintk("found fsidtype %d\n", fsidtype);
+ if (fsidtype > 2)
+ goto out;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+ dprintk("found fsid length %d\n", len);
+ if (len != key_len(fsidtype))
+ goto out;
+
+ /* OK, we seem to have a valid key */
+ key.h.flags = 0;
+ key.h.expiry_time = get_expiry(&mesg);
+ if (key.h.expiry_time == 0)
+ goto out;
+
+ key.ek_client = dom;
+ key.ek_fsidtype = fsidtype;
+ memcpy(key.ek_fsid, buf, len);
+
+ /* now we want a pathname, or empty meaning NEGATIVE */
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0)
+ goto out;
+ dprintk("Path seems to be <%s>\n", buf);
+ err = 0;
+ if (len == 0) {
+ struct svc_expkey *ek;
+ set_bit(CACHE_NEGATIVE, &key.h.flags);
+ ek = svc_expkey_lookup(&key, 1);
+ if (ek)
+ expkey_put(&ek->h, &svc_expkey_cache);
+ } else {
+ struct nameidata nd;
+ struct svc_expkey *ek;
+ struct svc_export *exp;
+ err = path_lookup(buf, 0, &nd);
+ if (err)
+ goto out;
+
+ dprintk("Found the path %s\n", buf);
+ exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+
+ err = -ENOENT;
+ if (!exp)
+ goto out_nd;
+ key.ek_export = exp;
+ dprintk("And found export\n");
+
+ ek = svc_expkey_lookup(&key, 1);
+ if (ek)
+ expkey_put(&ek->h, &svc_expkey_cache);
+ exp_put(exp);
+ err = 0;
+ out_nd:
+ path_release(&nd);
+ }
+ cache_flush();
+ out:
+ if (dom)
+ auth_domain_put(dom);
+ if (buf)
+ kfree(buf);
+ return err;
+}
+
+static int expkey_show(struct seq_file *m,
+ struct cache_detail *cd,
+ struct cache_head *h)
+{
+ struct svc_expkey *ek ;
+
+ if (h ==NULL) {
+ seq_puts(m, "#domain fsidtype fsid [path]\n");
+ return 0;
+ }
+ ek = container_of(h, struct svc_expkey, h);
+ seq_printf(m, "%s %d 0x%08x", ek->ek_client->name,
+ ek->ek_fsidtype, ek->ek_fsid[0]);
+ if (ek->ek_fsidtype != 1)
+ seq_printf(m, "%08x", ek->ek_fsid[1]);
+ if (ek->ek_fsidtype == 2)
+ seq_printf(m, "%08x", ek->ek_fsid[2]);
+ if (test_bit(CACHE_VALID, &h->flags) &&
+ !test_bit(CACHE_NEGATIVE, &h->flags)) {
+ seq_printf(m, " ");
+ seq_path(m, ek->ek_export->ex_mnt, ek->ek_export->ex_dentry, "\\ \t\n");
+ }
+ seq_printf(m, "\n");
+ return 0;
+}
+
+struct cache_detail svc_expkey_cache = {
+ .hash_size = EXPKEY_HASHMAX,
+ .hash_table = expkey_table,
+ .name = "nfsd.fh",
+ .cache_put = expkey_put,
+ .cache_request = expkey_request,
+ .cache_parse = expkey_parse,
+ .cache_show = expkey_show,
+};
+
+static inline int svc_expkey_match (struct svc_expkey *a, struct svc_expkey *b)
+{
+ if (a->ek_fsidtype != b->ek_fsidtype ||
+ a->ek_client != b->ek_client ||
+ memcmp(a->ek_fsid, b->ek_fsid, key_len(a->ek_fsidtype)) != 0)
+ return 0;
+ return 1;
+}
+
+static inline void svc_expkey_init(struct svc_expkey *new, struct svc_expkey *item)
+{
+ cache_get(&item->ek_client->h);
+ new->ek_client = item->ek_client;
+ new->ek_fsidtype = item->ek_fsidtype;
+ new->ek_fsid[0] = item->ek_fsid[0];
+ new->ek_fsid[1] = item->ek_fsid[1];
+ new->ek_fsid[2] = item->ek_fsid[2];
+}
+
+static inline void svc_expkey_update(struct svc_expkey *new, struct svc_expkey *item)
+{
+ cache_get(&item->ek_export->h);
+ new->ek_export = item->ek_export;
+}
+
+static DefineSimpleCacheLookup(svc_expkey,0) /* no inplace updates */
+
+#define EXPORT_HASHBITS 8
+#define EXPORT_HASHMAX (1<< EXPORT_HASHBITS)
+#define EXPORT_HASHMASK (EXPORT_HASHMAX -1)
+
+static struct cache_head *export_table[EXPORT_HASHMAX];
+
+static inline int svc_export_hash(struct svc_export *item)
+{
+ int rv;
+
+ rv = hash_ptr(item->ex_client, EXPORT_HASHBITS);
+ rv ^= hash_ptr(item->ex_dentry, EXPORT_HASHBITS);
+ rv ^= hash_ptr(item->ex_mnt, EXPORT_HASHBITS);
+ return rv;
+}
+
+void svc_export_put(struct cache_head *item, struct cache_detail *cd)
+{
+ if (cache_put(item, cd)) {
+ struct svc_export *exp = container_of(item, struct svc_export, h);
+ dput(exp->ex_dentry);
+ mntput(exp->ex_mnt);
+ auth_domain_put(exp->ex_client);
+ kfree(exp);
+ }
+}
+
+static void svc_export_request(struct cache_detail *cd,
+ struct cache_head *h,
+ char **bpp, int *blen)
+{
+ /* client path */
+ struct svc_export *exp = container_of(h, struct svc_export, h);
+ char *pth;
+
+ qword_add(bpp, blen, exp->ex_client->name);
+ pth = d_path(exp->ex_dentry, exp->ex_mnt, *bpp, *blen);
+ if (IS_ERR(pth)) {
+ /* is this correct? */
+ (*bpp)[0] = '\n';
+ return;
+ }
+ qword_add(bpp, blen, pth);
+ (*bpp)[-1] = '\n';
+}
+
+static struct svc_export *svc_export_lookup(struct svc_export *, int);
+
+static int check_export(struct inode *inode, int flags)
+{
+
+ /* We currently export only dirs and regular files.
+ * This is what umountd does.
+ */
+ if (!S_ISDIR(inode->i_mode) &&
+ !S_ISREG(inode->i_mode))
+ return -ENOTDIR;
+
+ /* There are two requirements on a filesystem to be exportable.
+ * 1: We must be able to identify the filesystem from a number.
+ * either a device number (so FS_REQUIRES_DEV needed)
+ * or an FSID number (so NFSEXP_FSID needed).
+ * 2: We must be able to find an inode from a filehandle.
+ * This means that s_export_op must be set.
+ */
+ if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
+ !(flags & NFSEXP_FSID)) {
+ dprintk("exp_export: export of non-dev fs without fsid");
+ return -EINVAL;
+ }
+ if (!inode->i_sb->s_export_op) {
+ dprintk("exp_export: export of invalid fs type.\n");
+ return -EINVAL;
+ }
+
+ /* Ok, we can export it */;
+ if (!inode->i_sb->s_export_op->find_exported_dentry)
+ inode->i_sb->s_export_op->find_exported_dentry =
+ find_exported_dentry;
+ return 0;
+
+}
+
+static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
+{
+ /* client path expiry [flags anonuid anongid fsid] */
+ char *buf;
+ int len;
+ int err;
+ struct auth_domain *dom = NULL;
+ struct nameidata nd;
+ struct svc_export exp, *expp;
+ int an_int;
+
+ nd.dentry = NULL;
+
+ if (mesg[mlen-1] != '\n')
+ return -EINVAL;
+ mesg[mlen-1] = 0;
+
+ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ err = -ENOMEM;
+ if (!buf) goto out;
+
+ /* client */
+ len = qword_get(&mesg, buf, PAGE_SIZE);
+ err = -EINVAL;
+ if (len <= 0) goto out;
+
+ err = -ENOENT;
+ dom = auth_domain_find(buf);
+ if (!dom)
+ goto out;
+
+ /* path */
+ err = -EINVAL;
+ if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
+ goto out;
+ err = path_lookup(buf, 0, &nd);
+ if (err) goto out;
+
+ exp.h.flags = 0;
+ exp.ex_client = dom;
+ exp.ex_mnt = nd.mnt;
+ exp.ex_dentry = nd.dentry;
+
+ /* expiry */
+ err = -EINVAL;
+ exp.h.expiry_time = get_expiry(&mesg);
+ if (exp.h.expiry_time == 0)
+ goto out;
+
+ /* flags */
+ err = get_int(&mesg, &an_int);
+ if (err == -ENOENT)
+ set_bit(CACHE_NEGATIVE, &exp.h.flags);
+ else {
+ if (err || an_int < 0) goto out;
+ exp.ex_flags= an_int;
+
+ /* anon uid */
+ err = get_int(&mesg, &an_int);
+ if (err) goto out;
+ exp.ex_anon_uid= an_int;
+
+ /* anon gid */
+ err = get_int(&mesg, &an_int);
+ if (err) goto out;
+ exp.ex_anon_gid= an_int;
+
+ /* fsid */
+ err = get_int(&mesg, &an_int);
+ if (err) goto out;
+ exp.ex_fsid = an_int;
+
+ err = check_export(nd.dentry->d_inode, exp.ex_flags);
+ if (err) goto out;
+ }
+
+ expp = svc_export_lookup(&exp, 1);
+ if (expp)
+ exp_put(expp);
+ err = 0;
+ cache_flush();
+ out:
+ if (nd.dentry)
+ path_release(&nd);
+ if (dom)
+ auth_domain_put(dom);
+ if (buf)
+ kfree(buf);
+ return err;
+}
+
+static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong);
+
+static int svc_export_show(struct seq_file *m,
+ struct cache_detail *cd,
+ struct cache_head *h)
+{
+ struct svc_export *exp ;
+
+ if (h ==NULL) {
+ seq_puts(m, "#path domain(flags)\n");
+ return 0;
+ }
+ exp = container_of(h, struct svc_export, h);
+ seq_path(m, exp->ex_mnt, exp->ex_dentry, " \t\n\\");
+ seq_putc(m, '\t');
+ seq_escape(m, exp->ex_client->name, " \t\n\\");
+ seq_putc(m, '(');
+ if (test_bit(CACHE_VALID, &h->flags) &&
+ !test_bit(CACHE_NEGATIVE, &h->flags))
+ exp_flags(m, exp->ex_flags, exp->ex_fsid,
+ exp->ex_anon_uid, exp->ex_anon_gid);
+ seq_puts(m, ")\n");
+ return 0;
+}
+struct cache_detail svc_export_cache = {
+ .hash_size = EXPORT_HASHMAX,
+ .hash_table = export_table,
+ .name = "nfsd.export",
+ .cache_put = svc_export_put,
+ .cache_request = svc_export_request,
+ .cache_parse = svc_export_parse,
+ .cache_show = svc_export_show,
+};
+
+static inline int svc_export_match(struct svc_export *a, struct svc_export *b)
+{
+ return a->ex_client == b->ex_client &&
+ a->ex_dentry == b->ex_dentry &&
+ a->ex_mnt == b->ex_mnt;
+}
+static inline void svc_export_init(struct svc_export *new, struct svc_export *item)
+{
+ cache_get(&item->ex_client->h);
+ new->ex_client = item->ex_client;
+ new->ex_dentry = dget(item->ex_dentry);
+ new->ex_mnt = mntget(item->ex_mnt);
+}
+
+static inline void svc_export_update(struct svc_export *new, struct svc_export *item)
+{
+ new->ex_flags = item->ex_flags;
+ new->ex_anon_uid = item->ex_anon_uid;
+ new->ex_anon_gid = item->ex_anon_gid;
+ new->ex_fsid = item->ex_fsid;
+}
+
+static DefineSimpleCacheLookup(svc_export,1) /* allow inplace updates */
+
+
+struct svc_expkey *
+exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp)
+{
+ struct svc_expkey key, *ek;
+ int err;
+
+ if (!clp)
+ return NULL;
+
+ key.ek_client = clp;
+ key.ek_fsidtype = fsid_type;
+ memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+
+ ek = svc_expkey_lookup(&key, 0);
+ if (ek != NULL)
+ if ((err = cache_check(&svc_expkey_cache, &ek->h, reqp)))
+ ek = ERR_PTR(err);
+ return ek;
+}
+
+static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv,
+ struct svc_export *exp)
+{
+ struct svc_expkey key, *ek;
+
+ key.ek_client = clp;
+ key.ek_fsidtype = fsid_type;
+ memcpy(key.ek_fsid, fsidv, key_len(fsid_type));
+ key.ek_export = exp;
+ key.h.expiry_time = NEVER;
+ key.h.flags = 0;
+
+ ek = svc_expkey_lookup(&key, 1);
+ if (ek) {
+ expkey_put(&ek->h, &svc_expkey_cache);
+ return 0;
+ }
+ return -ENOMEM;
+}
+
+/*
+ * Find the client's export entry matching xdev/xino.
+ */
+static inline struct svc_expkey *
+exp_get_key(svc_client *clp, dev_t dev, ino_t ino)
+{
+ u32 fsidv[3];
+
+ if (old_valid_dev(dev)) {
+ mk_fsid_v0(fsidv, dev, ino);
+ return exp_find_key(clp, 0, fsidv, NULL);
+ }
+ mk_fsid_v3(fsidv, dev, ino);
+ return exp_find_key(clp, 3, fsidv, NULL);
+}
+
+/*
+ * Find the client's export entry matching fsid
+ */
+static inline struct svc_expkey *
+exp_get_fsid_key(svc_client *clp, int fsid)
+{
+ u32 fsidv[2];
+
+ mk_fsid_v1(fsidv, fsid);
+
+ return exp_find_key(clp, 1, fsidv, NULL);
+}
+
+svc_export *
+exp_get_by_name(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+ struct cache_req *reqp)
+{
+ struct svc_export *exp, key;
+
+ if (!clp)
+ return NULL;
+
+ key.ex_client = clp;
+ key.ex_mnt = mnt;
+ key.ex_dentry = dentry;
+
+ exp = svc_export_lookup(&key, 0);
+ if (exp != NULL)
+ switch (cache_check(&svc_export_cache, &exp->h, reqp)) {
+ case 0: break;
+ case -EAGAIN:
+ exp = ERR_PTR(-EAGAIN);
+ break;
+ default:
+ exp = NULL;
+ }
+
+ return exp;
+}
+
+/*
+ * Find the export entry for a given dentry.
+ */
+struct svc_export *
+exp_parent(svc_client *clp, struct vfsmount *mnt, struct dentry *dentry,
+ struct cache_req *reqp)
+{
+ svc_export *exp;
+
+ dget(dentry);
+ exp = exp_get_by_name(clp, mnt, dentry, reqp);
+
+ while (exp == NULL && !IS_ROOT(dentry)) {
+ struct dentry *parent;
+
+ parent = dget_parent(dentry);
+ dput(dentry);
+ dentry = parent;
+ exp = exp_get_by_name(clp, mnt, dentry, reqp);
+ }
+ dput(dentry);
+ return exp;
+}
+
+/*
+ * Hashtable locking. Write locks are placed only by user processes
+ * wanting to modify export information.
+ * Write locking only done in this file. Read locking
+ * needed externally.
+ */
+
+static DECLARE_RWSEM(hash_sem);
+
+void
+exp_readlock(void)
+{
+ down_read(&hash_sem);
+}
+
+static inline void
+exp_writelock(void)
+{
+ down_write(&hash_sem);
+}
+
+void
+exp_readunlock(void)
+{
+ up_read(&hash_sem);
+}
+
+static inline void
+exp_writeunlock(void)
+{
+ up_write(&hash_sem);
+}
+
+static void exp_fsid_unhash(struct svc_export *exp)
+{
+ struct svc_expkey *ek;
+
+ if ((exp->ex_flags & NFSEXP_FSID) == 0)
+ return;
+
+ ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid);
+ if (ek && !IS_ERR(ek)) {
+ ek->h.expiry_time = get_seconds()-1;
+ expkey_put(&ek->h, &svc_expkey_cache);
+ }
+ svc_expkey_cache.nextcheck = get_seconds();
+}
+
+static int exp_fsid_hash(svc_client *clp, struct svc_export *exp)
+{
+ u32 fsid[2];
+
+ if ((exp->ex_flags & NFSEXP_FSID) == 0)
+ return 0;
+
+ mk_fsid_v1(fsid, exp->ex_fsid);
+ return exp_set_key(clp, 1, fsid, exp);
+}
+
+static int exp_hash(struct auth_domain *clp, struct svc_export *exp)
+{
+ u32 fsid[2];
+ struct inode *inode = exp->ex_dentry->d_inode;
+ dev_t dev = inode->i_sb->s_dev;
+
+ if (old_valid_dev(dev)) {
+ mk_fsid_v0(fsid, dev, inode->i_ino);
+ return exp_set_key(clp, 0, fsid, exp);
+ }
+ mk_fsid_v3(fsid, dev, inode->i_ino);
+ return exp_set_key(clp, 3, fsid, exp);
+}
+
+static void exp_unhash(struct svc_export *exp)
+{
+ struct svc_expkey *ek;
+ struct inode *inode = exp->ex_dentry->d_inode;
+
+ ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino);
+ if (ek && !IS_ERR(ek)) {
+ ek->h.expiry_time = get_seconds()-1;
+ expkey_put(&ek->h, &svc_expkey_cache);
+ }
+ svc_expkey_cache.nextcheck = get_seconds();
+}
+
+/*
+ * Export a file system.
+ */
+int
+exp_export(struct nfsctl_export *nxp)
+{
+ svc_client *clp;
+ struct svc_export *exp = NULL;
+ struct svc_export new;
+ struct svc_expkey *fsid_key = NULL;
+ struct nameidata nd;
+ int err;
+
+ /* Consistency check */
+ err = -EINVAL;
+ if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) ||
+ !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX))
+ goto out;
+
+ dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n",
+ nxp->ex_client, nxp->ex_path,
+ (unsigned)nxp->ex_dev, (long)nxp->ex_ino,
+ nxp->ex_flags);
+
+ /* Try to lock the export table for update */
+ exp_writelock();
+
+ /* Look up client info */
+ if (!(clp = auth_domain_find(nxp->ex_client)))
+ goto out_unlock;
+
+
+ /* Look up the dentry */
+ err = path_lookup(nxp->ex_path, 0, &nd);
+ if (err)
+ goto out_unlock;
+ err = -EINVAL;
+
+ exp = exp_get_by_name(clp, nd.mnt, nd.dentry, NULL);
+
+ /* must make sure there won't be an ex_fsid clash */
+ if ((nxp->ex_flags & NFSEXP_FSID) &&
+ (fsid_key = exp_get_fsid_key(clp, nxp->ex_dev)) &&
+ !IS_ERR(fsid_key) &&
+ fsid_key->ek_export &&
+ fsid_key->ek_export != exp)
+ goto finish;
+
+ if (exp) {
+ /* just a flags/id/fsid update */
+
+ exp_fsid_unhash(exp);
+ exp->ex_flags = nxp->ex_flags;
+ exp->ex_anon_uid = nxp->ex_anon_uid;
+ exp->ex_anon_gid = nxp->ex_anon_gid;
+ exp->ex_fsid = nxp->ex_dev;
+
+ err = exp_fsid_hash(clp, exp);
+ goto finish;
+ }
+
+ err = check_export(nd.dentry->d_inode, nxp->ex_flags);
+ if (err) goto finish;
+
+ err = -ENOMEM;
+
+ dprintk("nfsd: creating export entry %p for client %p\n", exp, clp);
+
+ new.h.expiry_time = NEVER;
+ new.h.flags = 0;
+ new.ex_client = clp;
+ new.ex_mnt = nd.mnt;
+ new.ex_dentry = nd.dentry;
+ new.ex_flags = nxp->ex_flags;
+ new.ex_anon_uid = nxp->ex_anon_uid;
+ new.ex_anon_gid = nxp->ex_anon_gid;
+ new.ex_fsid = nxp->ex_dev;
+
+ exp = svc_export_lookup(&new, 1);
+
+ if (exp == NULL)
+ goto finish;
+
+ err = 0;
+
+ if (exp_hash(clp, exp) ||
+ exp_fsid_hash(clp, exp)) {
+ /* failed to create at least one index */
+ exp_do_unexport(exp);
+ cache_flush();
+ err = -ENOMEM;
+ }
+
+finish:
+ if (exp)
+ exp_put(exp);
+ if (fsid_key && !IS_ERR(fsid_key))
+ expkey_put(&fsid_key->h, &svc_expkey_cache);
+ if (clp)
+ auth_domain_put(clp);
+ path_release(&nd);
+out_unlock:
+ exp_writeunlock();
+out:
+ return err;
+}
+
+/*
+ * Unexport a file system. The export entry has already
+ * been removed from the client's list of exported fs's.
+ */
+static void
+exp_do_unexport(svc_export *unexp)
+{
+ unexp->h.expiry_time = get_seconds()-1;
+ svc_export_cache.nextcheck = get_seconds();
+ exp_unhash(unexp);
+ exp_fsid_unhash(unexp);
+}
+
+
+/*
+ * unexport syscall.
+ */
+int
+exp_unexport(struct nfsctl_export *nxp)
+{
+ struct auth_domain *dom;
+ svc_export *exp;
+ struct nameidata nd;
+ int err;
+
+ /* Consistency check */
+ if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) ||
+ !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX))
+ return -EINVAL;
+
+ exp_writelock();
+
+ err = -EINVAL;
+ dom = auth_domain_find(nxp->ex_client);
+ if (!dom) {
+ dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client);
+ goto out_unlock;
+ }
+
+ err = path_lookup(nxp->ex_path, 0, &nd);
+ if (err)
+ goto out_domain;
+
+ err = -EINVAL;
+ exp = exp_get_by_name(dom, nd.mnt, nd.dentry, NULL);
+ path_release(&nd);
+ if (!exp)
+ goto out_domain;
+
+ exp_do_unexport(exp);
+ exp_put(exp);
+ err = 0;
+
+out_domain:
+ auth_domain_put(dom);
+ cache_flush();
+out_unlock:
+ exp_writeunlock();
+ return err;
+}
+
+/*
+ * Obtain the root fh on behalf of a client.
+ * This could be done in user space, but I feel that it adds some safety
+ * since its harder to fool a kernel module than a user space program.
+ */
+int
+exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
+{
+ struct svc_export *exp;
+ struct nameidata nd;
+ struct inode *inode;
+ struct svc_fh fh;
+ int err;
+
+ err = -EPERM;
+ /* NB: we probably ought to check that it's NUL-terminated */
+ if (path_lookup(path, 0, &nd)) {
+ printk("nfsd: exp_rootfh path not found %s", path);
+ return err;
+ }
+ inode = nd.dentry->d_inode;
+
+ dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
+ path, nd.dentry, clp->name,
+ inode->i_sb->s_id, inode->i_ino);
+ exp = exp_parent(clp, nd.mnt, nd.dentry, NULL);
+ if (!exp) {
+ dprintk("nfsd: exp_rootfh export not found.\n");
+ goto out;
+ }
+
+ /*
+ * fh must be initialized before calling fh_compose
+ */
+ fh_init(&fh, maxsize);
+ if (fh_compose(&fh, exp, nd.dentry, NULL))
+ err = -EINVAL;
+ else
+ err = 0;
+ memcpy(f, &fh.fh_handle, sizeof(struct knfsd_fh));
+ fh_put(&fh);
+ exp_put(exp);
+out:
+ path_release(&nd);
+ return err;
+}
+
+/*
+ * Called when we need the filehandle for the root of the pseudofs,
+ * for a given NFSv4 client. The root is defined to be the
+ * export point with fsid==0
+ */
+int
+exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
+ struct cache_req *creq)
+{
+ struct svc_expkey *fsid_key;
+ int rv;
+ u32 fsidv[2];
+
+ mk_fsid_v1(fsidv, 0);
+
+ fsid_key = exp_find_key(clp, 1, fsidv, creq);
+ if (IS_ERR(fsid_key) && PTR_ERR(fsid_key) == -EAGAIN)
+ return nfserr_dropit;
+ if (!fsid_key || IS_ERR(fsid_key))
+ return nfserr_perm;
+
+ rv = fh_compose(fhp, fsid_key->ek_export,
+ fsid_key->ek_export->ex_dentry, NULL);
+ expkey_put(&fsid_key->h, &svc_expkey_cache);
+ return rv;
+}
+
+/* Iterator */
+
+static void *e_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t n = *pos;
+ unsigned hash, export;
+ struct cache_head *ch;
+
+ exp_readlock();
+ read_lock(&svc_export_cache.hash_lock);
+ if (!n--)
+ return (void *)1;
+ hash = n >> 32;
+ export = n & ((1LL<<32) - 1);
+
+
+ for (ch=export_table[hash]; ch; ch=ch->next)
+ if (!export--)
+ return ch;
+ n &= ~((1LL<<32) - 1);
+ do {
+ hash++;
+ n += 1LL<<32;
+ } while(hash < EXPORT_HASHMAX && export_table[hash]==NULL);
+ if (hash >= EXPORT_HASHMAX)
+ return NULL;
+ *pos = n+1;
+ return export_table[hash];
+}
+
+static void *e_next(struct seq_file *m, void *p, loff_t *pos)
+{
+ struct cache_head *ch = p;
+ int hash = (*pos >> 32);
+
+ if (p == (void *)1)
+ hash = 0;
+ else if (ch->next == NULL) {
+ hash++;
+ *pos += 1LL<<32;
+ } else {
+ ++*pos;
+ return ch->next;
+ }
+ *pos &= ~((1LL<<32) - 1);
+ while (hash < EXPORT_HASHMAX && export_table[hash] == NULL) {
+ hash++;
+ *pos += 1LL<<32;
+ }
+ if (hash >= EXPORT_HASHMAX)
+ return NULL;
+ ++*pos;
+ return export_table[hash];
+}
+
+static void e_stop(struct seq_file *m, void *p)
+{
+ read_unlock(&svc_export_cache.hash_lock);
+ exp_readunlock();
+}
+
+static struct flags {
+ int flag;
+ char *name[2];
+} expflags[] = {
+ { NFSEXP_READONLY, {"ro", "rw"}},
+ { NFSEXP_INSECURE_PORT, {"insecure", ""}},
+ { NFSEXP_ROOTSQUASH, {"root_squash", "no_root_squash"}},
+ { NFSEXP_ALLSQUASH, {"all_squash", ""}},
+ { NFSEXP_ASYNC, {"async", "sync"}},
+ { NFSEXP_GATHERED_WRITES, {"wdelay", "no_wdelay"}},
+ { NFSEXP_NOHIDE, {"nohide", ""}},
+ { NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
+ { NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
+ { NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+#ifdef MSNFS
+ { NFSEXP_MSNFS, {"msnfs", ""}},
+#endif
+ { 0, {"", ""}}
+};
+
+static void exp_flags(struct seq_file *m, int flag, int fsid, uid_t anonu, uid_t anong)
+{
+ int first = 0;
+ struct flags *flg;
+
+ for (flg = expflags; flg->flag; flg++) {
+ int state = (flg->flag & flag)?0:1;
+ if (*flg->name[state])
+ seq_printf(m, "%s%s", first++?",":"", flg->name[state]);
+ }
+ if (flag & NFSEXP_FSID)
+ seq_printf(m, "%sfsid=%d", first++?",":"", fsid);
+ if (anonu != (uid_t)-2 && anonu != (0x10000-2))
+ seq_printf(m, "%sanonuid=%d", first++?",":"", anonu);
+ if (anong != (gid_t)-2 && anong != (0x10000-2))
+ seq_printf(m, "%sanongid=%d", first++?",":"", anong);
+}
+
+static int e_show(struct seq_file *m, void *p)
+{
+ struct cache_head *cp = p;
+ struct svc_export *exp = container_of(cp, struct svc_export, h);
+ svc_client *clp;
+
+ if (p == (void *)1) {
+ seq_puts(m, "# Version 1.1\n");
+ seq_puts(m, "# Path Client(Flags) # IPs\n");
+ return 0;
+ }
+
+ clp = exp->ex_client;
+ cache_get(&exp->h);
+ if (cache_check(&svc_export_cache, &exp->h, NULL))
+ return 0;
+ if (cache_put(&exp->h, &svc_export_cache)) BUG();
+ return svc_export_show(m, &svc_export_cache, cp);
+}
+
+struct seq_operations nfs_exports_op = {
+ .start = e_start,
+ .next = e_next,
+ .stop = e_stop,
+ .show = e_show,
+};
+
+/*
+ * Add or modify a client.
+ * Change requests may involve the list of host addresses. The list of
+ * exports and possibly existing uid maps are left untouched.
+ */
+int
+exp_addclient(struct nfsctl_client *ncp)
+{
+ struct auth_domain *dom;
+ int i, err;
+
+ /* First, consistency check. */
+ err = -EINVAL;
+ if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX))
+ goto out;
+ if (ncp->cl_naddr > NFSCLNT_ADDRMAX)
+ goto out;
+
+ /* Lock the hashtable */
+ exp_writelock();
+
+ dom = unix_domain_find(ncp->cl_ident);
+
+ err = -ENOMEM;
+ if (!dom)
+ goto out_unlock;
+
+ /* Insert client into hashtable. */
+ for (i = 0; i < ncp->cl_naddr; i++)
+ auth_unix_add_addr(ncp->cl_addrlist[i], dom);
+
+ auth_unix_forget_old(dom);
+ auth_domain_put(dom);
+
+ err = 0;
+
+out_unlock:
+ exp_writeunlock();
+out:
+ return err;
+}
+
+/*
+ * Delete a client given an identifier.
+ */
+int
+exp_delclient(struct nfsctl_client *ncp)
+{
+ int err;
+ struct auth_domain *dom;
+
+ err = -EINVAL;
+ if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX))
+ goto out;
+
+ /* Lock the hashtable */
+ exp_writelock();
+
+ dom = auth_domain_find(ncp->cl_ident);
+ /* just make sure that no addresses work
+ * and that it will expire soon
+ */
+ if (dom) {
+ err = auth_unix_forget_old(dom);
+ dom->h.expiry_time = get_seconds();
+ auth_domain_put(dom);
+ }
+
+ exp_writeunlock();
+out:
+ return err;
+}
+
+/*
+ * Verify that string is non-empty and does not exceed max length.
+ */
+static int
+exp_verify_string(char *cp, int max)
+{
+ int i;
+
+ for (i = 0; i < max; i++)
+ if (!cp[i])
+ return i;
+ cp[i] = 0;
+ printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp);
+ return 0;
+}
+
+/*
+ * Initialize the exports module.
+ */
+void
+nfsd_export_init(void)
+{
+ dprintk("nfsd: initializing export module.\n");
+
+ cache_register(&svc_export_cache);
+ cache_register(&svc_expkey_cache);
+
+}
+
+/*
+ * Flush exports table - called when last nfsd thread is killed
+ */
+void
+nfsd_export_flush(void)
+{
+ exp_writelock();
+ cache_purge(&svc_expkey_cache);
+ cache_purge(&svc_export_cache);
+ exp_writeunlock();
+}
+
+/*
+ * Shutdown the exports module.
+ */
+void
+nfsd_export_shutdown(void)
+{
+
+ dprintk("nfsd: shutting down export module.\n");
+
+ exp_writelock();
+
+ if (cache_unregister(&svc_expkey_cache))
+ printk(KERN_ERR "nfsd: failed to unregister expkey cache\n");
+ if (cache_unregister(&svc_export_cache))
+ printk(KERN_ERR "nfsd: failed to unregister export cache\n");
+ svcauth_unix_purge();
+
+ exp_writeunlock();
+ dprintk("nfsd: export shutdown complete.\n");
+}
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
new file mode 100644
index 000000000000..7b889ff15ae6
--- /dev/null
+++ b/fs/nfsd/lockd.c
@@ -0,0 +1,79 @@
+/*
+ * linux/fs/nfsd/lockd.c
+ *
+ * This file contains all the stubs needed when communicating with lockd.
+ * This level of indirection is necessary so we can run nfsd+lockd without
+ * requiring the nfs client to be compiled in/loaded, and vice versa.
+ *
+ * Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_LOCKD
+
+/*
+ * Note: we hold the dentry use count while the file is open.
+ */
+static u32
+nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
+{
+ u32 nfserr;
+ struct svc_fh fh;
+
+ /* must initialize before using! but maxsize doesn't matter */
+ fh_init(&fh,0);
+ fh.fh_handle.fh_size = f->size;
+ memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size);
+ fh.fh_export = NULL;
+
+ exp_readlock();
+ nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp);
+ fh_put(&fh);
+ rqstp->rq_client = NULL;
+ exp_readunlock();
+ /* nlm and nfsd don't share error codes.
+ * we invent: 0 = no error
+ * 1 = stale file handle
+ * 2 = other error
+ */
+ switch (nfserr) {
+ case nfs_ok:
+ return 0;
+ case nfserr_stale:
+ return 1;
+ default:
+ return 2;
+ }
+}
+
+static void
+nlm_fclose(struct file *filp)
+{
+ fput(filp);
+}
+
+static struct nlmsvc_binding nfsd_nlm_ops = {
+ .fopen = nlm_fopen, /* open file for locking */
+ .fclose = nlm_fclose, /* close file */
+};
+
+void
+nfsd_lockd_init(void)
+{
+ dprintk("nfsd: initializing lockd\n");
+ nlmsvc_ops = &nfsd_nlm_ops;
+}
+
+void
+nfsd_lockd_shutdown(void)
+{
+ nlmsvc_ops = NULL;
+}
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
new file mode 100644
index 000000000000..041380fe667b
--- /dev/null
+++ b/fs/nfsd/nfs3proc.c
@@ -0,0 +1,702 @@
+/*
+ * linux/fs/nfsd/nfs3proc.c
+ *
+ * Process version 3 NFS requests.
+ *
+ * Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/major.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr3.h>
+#include <linux/nfs3.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+#define RETURN_STATUS(st) { resp->status = (st); return (st); }
+
+static int nfs3_ftypes[] = {
+ 0, /* NF3NON */
+ S_IFREG, /* NF3REG */
+ S_IFDIR, /* NF3DIR */
+ S_IFBLK, /* NF3BLK */
+ S_IFCHR, /* NF3CHR */
+ S_IFLNK, /* NF3LNK */
+ S_IFSOCK, /* NF3SOCK */
+ S_IFIFO, /* NF3FIFO */
+};
+
+/*
+ * NULL call.
+ */
+static int
+nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+/*
+ * Get a file's attributes
+ */
+static int
+nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_attrstat *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: GETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Set a file's attributes
+ */
+static int
+nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: SETATTR(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_setattr(rqstp, &resp->fh, &argp->attrs,
+ argp->check_guard, argp->guardtime);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Look up a path name component
+ */
+static int
+nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: LOOKUP(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+
+ nfserr = nfsd_lookup(rqstp, &resp->dirfh,
+ argp->name,
+ argp->len,
+ &resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Check file access
+ */
+static int
+nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+ struct nfsd3_accessres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: ACCESS(3) %s 0x%x\n",
+ SVCFH_fmt(&argp->fh),
+ argp->access);
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->access = argp->access;
+ nfserr = nfsd_access(rqstp, &resp->fh, &resp->access, NULL);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a symlink.
+ */
+static int
+nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
+ struct nfsd3_readlinkres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: READLINK(3) %s\n", SVCFH_fmt(&argp->fh));
+
+ /* Read the symlink. */
+ fh_copy(&resp->fh, &argp->fh);
+ resp->len = NFS3_MAXPATHLEN;
+ nfserr = nfsd_readlink(rqstp, &resp->fh, argp->buffer, &resp->len);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a file.
+ */
+static int
+nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
+ struct nfsd3_readres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: READ(3) %s %lu bytes at %lu\n",
+ SVCFH_fmt(&argp->fh),
+ (unsigned long) argp->count,
+ (unsigned long) argp->offset);
+
+ /* Obtain buffer pointer for payload.
+ * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
+ * + 1 (xdr opaque byte count) = 26
+ */
+
+ resp->count = argp->count;
+ if (NFSSVC_MAXBLKSIZE < resp->count)
+ resp->count = NFSSVC_MAXBLKSIZE;
+
+ svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_read(rqstp, &resp->fh, NULL,
+ argp->offset,
+ argp->vec, argp->vlen,
+ &resp->count);
+ if (nfserr == 0) {
+ struct inode *inode = resp->fh.fh_dentry->d_inode;
+
+ resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ }
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Write data to a file
+ */
+static int
+nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
+ struct nfsd3_writeres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: WRITE(3) %s %d bytes at %ld%s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ (unsigned long) argp->offset,
+ argp->stable? " stable" : "");
+
+ fh_copy(&resp->fh, &argp->fh);
+ resp->committed = argp->stable;
+ nfserr = nfsd_write(rqstp, &resp->fh, NULL,
+ argp->offset,
+ argp->vec, argp->vlen,
+ argp->len,
+ &resp->committed);
+ resp->count = argp->count;
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * With NFSv3, CREATE processing is a lot easier than with NFSv2.
+ * At least in theory; we'll see how it fares in practice when the
+ * first reports about SunOS compatibility problems start to pour in...
+ */
+static int
+nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ svc_fh *dirfhp, *newfhp = NULL;
+ struct iattr *attr;
+ u32 nfserr;
+
+ dprintk("nfsd: CREATE(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ dirfhp = fh_copy(&resp->dirfh, &argp->fh);
+ newfhp = fh_init(&resp->fh, NFS3_FHSIZE);
+ attr = &argp->attrs;
+
+ /* Get the directory inode */
+ nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE);
+ if (nfserr)
+ RETURN_STATUS(nfserr);
+
+ /* Unfudge the mode bits */
+ attr->ia_mode &= ~S_IFMT;
+ if (!(attr->ia_valid & ATTR_MODE)) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = S_IFREG;
+ } else {
+ attr->ia_mode = (attr->ia_mode & ~S_IFMT) | S_IFREG;
+ }
+
+ /* Now create the file and set attributes */
+ nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
+ attr, newfhp,
+ argp->createmode, argp->verf, NULL);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make directory. This operation is not idempotent.
+ */
+static int
+nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: MKDIR(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ argp->attrs.ia_valid &= ~ATTR_SIZE;
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &argp->attrs, S_IFDIR, 0, &resp->fh);
+
+ RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: SYMLINK(3) %s %.*s -> %.*s\n",
+ SVCFH_fmt(&argp->ffh),
+ argp->flen, argp->fname,
+ argp->tlen, argp->tname);
+
+ fh_copy(&resp->dirfh, &argp->ffh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+ nfserr = nfsd_symlink(rqstp, &resp->dirfh, argp->fname, argp->flen,
+ argp->tname, argp->tlen,
+ &resp->fh, &argp->attrs);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Make socket/fifo/device.
+ */
+static int
+nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
+ struct nfsd3_diropres *resp)
+{
+ int nfserr, type;
+ dev_t rdev = 0;
+
+ dprintk("nfsd: MKNOD(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->dirfh, &argp->fh);
+ fh_init(&resp->fh, NFS3_FHSIZE);
+
+ if (argp->ftype == 0 || argp->ftype >= NF3BAD)
+ RETURN_STATUS(nfserr_inval);
+ if (argp->ftype == NF3CHR || argp->ftype == NF3BLK) {
+ rdev = MKDEV(argp->major, argp->minor);
+ if (MAJOR(rdev) != argp->major ||
+ MINOR(rdev) != argp->minor)
+ RETURN_STATUS(nfserr_inval);
+ } else
+ if (argp->ftype != NF3SOCK && argp->ftype != NF3FIFO)
+ RETURN_STATUS(nfserr_inval);
+
+ type = nfs3_ftypes[argp->ftype];
+ nfserr = nfsd_create(rqstp, &resp->dirfh, argp->name, argp->len,
+ &argp->attrs, type, rdev, &resp->fh);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove file/fifo/socket etc.
+ */
+static int
+nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: REMOVE(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ /* Unlink. -S_IFDIR means file must not be a directory */
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_unlink(rqstp, &resp->fh, -S_IFDIR, argp->name, argp->len);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Remove a directory
+ */
+static int
+nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
+ struct nfsd3_attrstat *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: RMDIR(3) %s %.*s\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len,
+ argp->name);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_unlink(rqstp, &resp->fh, S_IFDIR, argp->name, argp->len);
+ RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
+ struct nfsd3_renameres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: RENAME(3) %s %.*s ->\n",
+ SVCFH_fmt(&argp->ffh),
+ argp->flen,
+ argp->fname);
+ dprintk("nfsd: -> %s %.*s\n",
+ SVCFH_fmt(&argp->tfh),
+ argp->tlen,
+ argp->tname);
+
+ fh_copy(&resp->ffh, &argp->ffh);
+ fh_copy(&resp->tfh, &argp->tfh);
+ nfserr = nfsd_rename(rqstp, &resp->ffh, argp->fname, argp->flen,
+ &resp->tfh, argp->tname, argp->tlen);
+ RETURN_STATUS(nfserr);
+}
+
+static int
+nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
+ struct nfsd3_linkres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: LINK(3) %s ->\n",
+ SVCFH_fmt(&argp->ffh));
+ dprintk("nfsd: -> %s %.*s\n",
+ SVCFH_fmt(&argp->tfh),
+ argp->tlen,
+ argp->tname);
+
+ fh_copy(&resp->fh, &argp->ffh);
+ fh_copy(&resp->tfh, &argp->tfh);
+ nfserr = nfsd_link(rqstp, &resp->tfh, argp->tname, argp->tlen,
+ &resp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory.
+ */
+static int
+nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+ struct nfsd3_readdirres *resp)
+{
+ int nfserr, count;
+
+ dprintk("nfsd: READDIR(3) %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, (u32) argp->cookie);
+
+ /* Make sure we've room for the NULL ptr & eof flag, and shrink to
+ * client read size */
+ count = (argp->count >> 2) - 2;
+
+ /* Read directory and encode entries on the fly */
+ fh_copy(&resp->fh, &argp->fh);
+
+ resp->buflen = count;
+ resp->common.err = nfs_ok;
+ resp->buffer = argp->buffer;
+ resp->rqstp = rqstp;
+ nfserr = nfsd_readdir(rqstp, &resp->fh, (loff_t*) &argp->cookie,
+ &resp->common, nfs3svc_encode_entry);
+ memcpy(resp->verf, argp->verf, 8);
+ resp->count = resp->buffer - argp->buffer;
+ if (resp->offset)
+ xdr_encode_hyper(resp->offset, argp->cookie);
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Read a portion of a directory, including file handles and attrs.
+ * For now, we choose to ignore the dircount parameter.
+ */
+static int
+nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
+ struct nfsd3_readdirres *resp)
+{
+ int nfserr, count = 0;
+ loff_t offset;
+ int i;
+ caddr_t page_addr = NULL;
+
+ dprintk("nfsd: READDIR+(3) %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, (u32) argp->cookie);
+
+ /* Convert byte count to number of words (i.e. >> 2),
+ * and reserve room for the NULL ptr & eof flag (-2 words) */
+ resp->count = (argp->count >> 2) - 2;
+
+ /* Read directory and encode entries on the fly */
+ fh_copy(&resp->fh, &argp->fh);
+
+ resp->common.err = nfs_ok;
+ resp->buffer = argp->buffer;
+ resp->buflen = resp->count;
+ resp->rqstp = rqstp;
+ offset = argp->cookie;
+ nfserr = nfsd_readdir(rqstp, &resp->fh,
+ &offset,
+ &resp->common,
+ nfs3svc_encode_entry_plus);
+ memcpy(resp->verf, argp->verf, 8);
+ for (i=1; i<rqstp->rq_resused ; i++) {
+ page_addr = page_address(rqstp->rq_respages[i]);
+
+ if (((caddr_t)resp->buffer >= page_addr) &&
+ ((caddr_t)resp->buffer < page_addr + PAGE_SIZE)) {
+ count += (caddr_t)resp->buffer - page_addr;
+ break;
+ }
+ count += PAGE_SIZE;
+ }
+ resp->count = count >> 2;
+ if (resp->offset) {
+ if (unlikely(resp->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *resp->offset = htonl(offset >> 32);
+ *resp->offset1 = htonl(offset & 0xffffffff);
+ resp->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(resp->offset, offset);
+ }
+ }
+
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system stats
+ */
+static int
+nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_fsstatres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: FSSTAT(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats);
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get file system info
+ */
+static int
+nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_fsinfores *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: FSINFO(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ resp->f_rtmax = NFSSVC_MAXBLKSIZE;
+ resp->f_rtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_rtmult = PAGE_SIZE;
+ resp->f_wtmax = NFSSVC_MAXBLKSIZE;
+ resp->f_wtpref = NFSSVC_MAXBLKSIZE;
+ resp->f_wtmult = PAGE_SIZE;
+ resp->f_dtpref = PAGE_SIZE;
+ resp->f_maxfilesize = ~(u32) 0;
+ resp->f_properties = NFS3_FSF_DEFAULT;
+
+ nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+
+ /* Check special features of the file system. May request
+ * different read/write sizes for file systems known to have
+ * problems with large blocks */
+ if (nfserr == 0) {
+ struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+ /* Note that we don't care for remote fs's here */
+ if (sb->s_magic == 0x4d44 /* MSDOS_SUPER_MAGIC */) {
+ resp->f_properties = NFS3_FSF_BILLYBOY;
+ }
+ resp->f_maxfilesize = sb->s_maxbytes;
+ }
+
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+/*
+ * Get pathconf info for the specified file
+ */
+static int
+nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd3_pathconfres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: PATHCONF(3) %s\n",
+ SVCFH_fmt(&argp->fh));
+
+ /* Set default pathconf */
+ resp->p_link_max = 255; /* at least */
+ resp->p_name_max = 255; /* at least */
+ resp->p_no_trunc = 0;
+ resp->p_chown_restricted = 1;
+ resp->p_case_insensitive = 0;
+ resp->p_case_preserving = 1;
+
+ nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP);
+
+ if (nfserr == 0) {
+ struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
+
+ /* Note that we don't care for remote fs's here */
+ switch (sb->s_magic) {
+ case EXT2_SUPER_MAGIC:
+ resp->p_link_max = EXT2_LINK_MAX;
+ resp->p_name_max = EXT2_NAME_LEN;
+ break;
+ case 0x4d44: /* MSDOS_SUPER_MAGIC */
+ resp->p_case_insensitive = 1;
+ resp->p_case_preserving = 0;
+ break;
+ }
+ }
+
+ fh_put(&argp->fh);
+ RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * Commit a file (range) to stable storage.
+ */
+static int
+nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
+ struct nfsd3_commitres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: COMMIT(3) %s %u@%Lu\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count,
+ (unsigned long long) argp->offset);
+
+ if (argp->offset > NFS_OFFSET_MAX)
+ RETURN_STATUS(nfserr_inval);
+
+ fh_copy(&resp->fh, &argp->fh);
+ nfserr = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count);
+
+ RETURN_STATUS(nfserr);
+}
+
+
+/*
+ * NFSv3 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+#define nfs3svc_decode_voidargs NULL
+#define nfs3svc_release_void NULL
+#define nfs3svc_decode_fhandleargs nfs3svc_decode_fhandle
+#define nfs3svc_encode_attrstatres nfs3svc_encode_attrstat
+#define nfs3svc_encode_wccstatres nfs3svc_encode_wccstat
+#define nfsd3_mkdirargs nfsd3_createargs
+#define nfsd3_readdirplusargs nfsd3_readdirargs
+#define nfsd3_fhandleargs nfsd_fhandle
+#define nfsd3_fhandleres nfsd3_attrstat
+#define nfsd3_attrstatres nfsd3_attrstat
+#define nfsd3_wccstatres nfsd3_attrstat
+#define nfsd3_createres nfsd3_diropres
+#define nfsd3_voidres nfsd3_voidargs
+struct nfsd3_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsd3_proc_##name, \
+ (kxdrproc_t) nfs3svc_decode_##argt##args, \
+ (kxdrproc_t) nfs3svc_encode_##rest##res, \
+ (kxdrproc_t) nfs3svc_release_##relt, \
+ sizeof(struct nfsd3_##argt##args), \
+ sizeof(struct nfsd3_##rest##res), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+#define ST 1 /* status*/
+#define FH 17 /* filehandle with length */
+#define AT 21 /* attributes */
+#define pAT (1+AT) /* post attributes - conditional */
+#define WC (7+pAT) /* WCC attributes */
+
+static struct svc_procedure nfsd_procedures3[22] = {
+ PROC(null, void, void, void, RC_NOCACHE, ST),
+ PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT),
+ PROC(setattr, sattr, wccstat, fhandle, RC_REPLBUFF, ST+WC),
+ PROC(lookup, dirop, dirop, fhandle2, RC_NOCACHE, ST+FH+pAT+pAT),
+ PROC(access, access, access, fhandle, RC_NOCACHE, ST+pAT+1),
+ PROC(readlink, readlink, readlink, fhandle, RC_NOCACHE, ST+pAT+1+NFS3_MAXPATHLEN/4),
+ PROC(read, read, read, fhandle, RC_NOCACHE, ST+pAT+4+NFSSVC_MAXBLKSIZE),
+ PROC(write, write, write, fhandle, RC_REPLBUFF, ST+WC+4),
+ PROC(create, create, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+ PROC(mkdir, mkdir, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+ PROC(symlink, symlink, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+ PROC(mknod, mknod, create, fhandle2, RC_REPLBUFF, ST+(1+FH+pAT)+WC),
+ PROC(remove, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC),
+ PROC(rmdir, dirop, wccstat, fhandle, RC_REPLBUFF, ST+WC),
+ PROC(rename, rename, rename, fhandle2, RC_REPLBUFF, ST+WC+WC),
+ PROC(link, link, link, fhandle2, RC_REPLBUFF, ST+pAT+WC),
+ PROC(readdir, readdir, readdir, fhandle, RC_NOCACHE, 0),
+ PROC(readdirplus,readdirplus, readdir, fhandle, RC_NOCACHE, 0),
+ PROC(fsstat, fhandle, fsstat, void, RC_NOCACHE, ST+pAT+2*6+1),
+ PROC(fsinfo, fhandle, fsinfo, void, RC_NOCACHE, ST+pAT+12),
+ PROC(pathconf, fhandle, pathconf, void, RC_NOCACHE, ST+pAT+6),
+ PROC(commit, commit, commit, fhandle, RC_NOCACHE, ST+WC+2),
+};
+
+struct svc_version nfsd_version3 = {
+ .vs_vers = 3,
+ .vs_nproc = 22,
+ .vs_proc = nfsd_procedures3,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS3_SVC_XDRSIZE,
+};
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
new file mode 100644
index 000000000000..11f806835c5a
--- /dev/null
+++ b/fs/nfsd/nfs3xdr.c
@@ -0,0 +1,1092 @@
+/*
+ * linux/fs/nfsd/nfs3xdr.c
+ *
+ * XDR support for nfsd/protocol version 3.
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/nfs3.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/xdr3.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_XDR
+
+#ifdef NFSD_OPTIMIZE_SPACE
+# define inline
+#endif
+
+
+/*
+ * Mapping of S_IF* types to NFS file types
+ */
+static u32 nfs3_ftypes[] = {
+ NF3NON, NF3FIFO, NF3CHR, NF3BAD,
+ NF3DIR, NF3BAD, NF3BLK, NF3BAD,
+ NF3REG, NF3BAD, NF3LNK, NF3BAD,
+ NF3SOCK, NF3BAD, NF3LNK, NF3BAD,
+};
+
+/*
+ * XDR functions for basic NFS types
+ */
+static inline u32 *
+encode_time3(u32 *p, struct timespec *time)
+{
+ *p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
+ return p;
+}
+
+static inline u32 *
+decode_time3(u32 *p, struct timespec *time)
+{
+ time->tv_sec = ntohl(*p++);
+ time->tv_nsec = ntohl(*p++);
+ return p;
+}
+
+static inline u32 *
+decode_fh(u32 *p, struct svc_fh *fhp)
+{
+ unsigned int size;
+ fh_init(fhp, NFS3_FHSIZE);
+ size = ntohl(*p++);
+ if (size > NFS3_FHSIZE)
+ return NULL;
+
+ memcpy(&fhp->fh_handle.fh_base, p, size);
+ fhp->fh_handle.fh_size = size;
+ return p + XDR_QUADLEN(size);
+}
+
+static inline u32 *
+encode_fh(u32 *p, struct svc_fh *fhp)
+{
+ unsigned int size = fhp->fh_handle.fh_size;
+ *p++ = htonl(size);
+ if (size) p[XDR_QUADLEN(size)-1]=0;
+ memcpy(p, &fhp->fh_handle.fh_base, size);
+ return p + XDR_QUADLEN(size);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static inline u32 *
+decode_filename(u32 *p, char **namp, int *lenp)
+{
+ char *name;
+ int i;
+
+ if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS3_MAXNAMLEN)) != NULL) {
+ for (i = 0, name = *namp; i < *lenp; i++, name++) {
+ if (*name == '\0' || *name == '/')
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static inline u32 *
+decode_sattr3(u32 *p, struct iattr *iap)
+{
+ u32 tmp;
+
+ iap->ia_valid = 0;
+
+ if (*p++) {
+ iap->ia_valid |= ATTR_MODE;
+ iap->ia_mode = ntohl(*p++);
+ }
+ if (*p++) {
+ iap->ia_valid |= ATTR_UID;
+ iap->ia_uid = ntohl(*p++);
+ }
+ if (*p++) {
+ iap->ia_valid |= ATTR_GID;
+ iap->ia_gid = ntohl(*p++);
+ }
+ if (*p++) {
+ u64 newsize;
+
+ iap->ia_valid |= ATTR_SIZE;
+ p = xdr_decode_hyper(p, &newsize);
+ if (newsize <= NFS_OFFSET_MAX)
+ iap->ia_size = newsize;
+ else
+ iap->ia_size = NFS_OFFSET_MAX;
+ }
+ if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ iap->ia_valid |= ATTR_ATIME;
+ } else if (tmp == 2) { /* set to client time */
+ iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+ iap->ia_atime.tv_sec = ntohl(*p++);
+ iap->ia_atime.tv_nsec = ntohl(*p++);
+ }
+ if ((tmp = ntohl(*p++)) == 1) { /* set to server time */
+ iap->ia_valid |= ATTR_MTIME;
+ } else if (tmp == 2) { /* set to client time */
+ iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+ iap->ia_mtime.tv_sec = ntohl(*p++);
+ iap->ia_mtime.tv_nsec = ntohl(*p++);
+ }
+ return p;
+}
+
+static inline u32 *
+encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+ struct vfsmount *mnt = fhp->fh_export->ex_mnt;
+ struct dentry *dentry = fhp->fh_dentry;
+ struct kstat stat;
+ struct timespec time;
+
+ vfs_getattr(mnt, dentry, &stat);
+
+ *p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]);
+ *p++ = htonl((u32) stat.mode);
+ *p++ = htonl((u32) stat.nlink);
+ *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
+ *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+ if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) {
+ p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+ } else {
+ p = xdr_encode_hyper(p, (u64) stat.size);
+ }
+ p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9);
+ *p++ = htonl((u32) MAJOR(stat.rdev));
+ *p++ = htonl((u32) MINOR(stat.rdev));
+ if (is_fsid(fhp, rqstp->rq_reffh))
+ p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+ else
+ p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev));
+ p = xdr_encode_hyper(p, (u64) stat.ino);
+ p = encode_time3(p, &stat.atime);
+ lease_get_mtime(dentry->d_inode, &time);
+ p = encode_time3(p, &time);
+ p = encode_time3(p, &stat.ctime);
+
+ return p;
+}
+
+static inline u32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+
+ /* Attributes to follow */
+ *p++ = xdr_one;
+
+ *p++ = htonl(nfs3_ftypes[(fhp->fh_post_mode & S_IFMT) >> 12]);
+ *p++ = htonl((u32) fhp->fh_post_mode);
+ *p++ = htonl((u32) fhp->fh_post_nlink);
+ *p++ = htonl((u32) nfsd_ruid(rqstp, fhp->fh_post_uid));
+ *p++ = htonl((u32) nfsd_rgid(rqstp, fhp->fh_post_gid));
+ if (S_ISLNK(fhp->fh_post_mode) && fhp->fh_post_size > NFS3_MAXPATHLEN) {
+ p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
+ } else {
+ p = xdr_encode_hyper(p, (u64) fhp->fh_post_size);
+ }
+ p = xdr_encode_hyper(p, ((u64)fhp->fh_post_blocks) << 9);
+ *p++ = fhp->fh_post_rdev[0];
+ *p++ = fhp->fh_post_rdev[1];
+ if (is_fsid(fhp, rqstp->rq_reffh))
+ p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
+ else
+ p = xdr_encode_hyper(p, (u64)huge_encode_dev(inode->i_sb->s_dev));
+ p = xdr_encode_hyper(p, (u64) inode->i_ino);
+ p = encode_time3(p, &fhp->fh_post_atime);
+ p = encode_time3(p, &fhp->fh_post_mtime);
+ p = encode_time3(p, &fhp->fh_post_ctime);
+
+ return p;
+}
+
+/*
+ * Encode post-operation attributes.
+ * The inode may be NULL if the call failed because of a stale file
+ * handle. In this case, no attributes are returned.
+ */
+static u32 *
+encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+ struct dentry *dentry = fhp->fh_dentry;
+ if (dentry && dentry->d_inode != NULL) {
+ *p++ = xdr_one; /* attributes follow */
+ return encode_fattr3(rqstp, p, fhp);
+ }
+ *p++ = xdr_zero;
+ return p;
+}
+
+/*
+ * Enocde weak cache consistency data
+ */
+static u32 *
+encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+ struct dentry *dentry = fhp->fh_dentry;
+
+ if (dentry && dentry->d_inode && fhp->fh_post_saved) {
+ if (fhp->fh_pre_saved) {
+ *p++ = xdr_one;
+ p = xdr_encode_hyper(p, (u64) fhp->fh_pre_size);
+ p = encode_time3(p, &fhp->fh_pre_mtime);
+ p = encode_time3(p, &fhp->fh_pre_ctime);
+ } else {
+ *p++ = xdr_zero;
+ }
+ return encode_saved_post_attr(rqstp, p, fhp);
+ }
+ /* no pre- or post-attrs */
+ *p++ = xdr_zero;
+ return encode_post_op_attr(rqstp, p, fhp);
+}
+
+
+/*
+ * XDR decode functions
+ */
+int
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_sattrargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_sattr3(p, &args->attrs)))
+ return 0;
+
+ if ((args->check_guard = ntohl(*p++)) != 0) {
+ struct timespec time;
+ p = decode_time3(p, &time);
+ args->guardtime = time.tv_sec;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_diropargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_accessargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ args->access = ntohl(*p++);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readargs *args)
+{
+ unsigned int len;
+ int v,pn;
+
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = xdr_decode_hyper(p, &args->offset)))
+ return 0;
+
+ len = args->count = ntohl(*p++);
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+
+ /* set up the kvec */
+ v=0;
+ while (len > 0) {
+ pn = rqstp->rq_resused;
+ svc_take_page(rqstp);
+ args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ args->vec[v].iov_len = len < PAGE_SIZE? len : PAGE_SIZE;
+ len -= args->vec[v].iov_len;
+ v++;
+ }
+ args->vlen = v;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_writeargs *args)
+{
+ unsigned int len, v, hdr;
+
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = xdr_decode_hyper(p, &args->offset)))
+ return 0;
+
+ args->count = ntohl(*p++);
+ args->stable = ntohl(*p++);
+ len = args->len = ntohl(*p++);
+
+ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
+ if (rqstp->rq_arg.len < len + hdr)
+ return 0;
+
+ args->vec[0].iov_base = (void*)p;
+ args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+ v= 0;
+ while (len > args->vec[v].iov_len) {
+ len -= args->vec[v].iov_len;
+ v++;
+ args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+ args->vec[v].iov_len = PAGE_SIZE;
+ }
+ args->vec[v].iov_len = len;
+ args->vlen = v+1;
+
+ return args->count == args->len && args->vec[0].iov_len > 0;
+}
+
+int
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_createargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ switch (args->createmode = ntohl(*p++)) {
+ case NFS3_CREATE_UNCHECKED:
+ case NFS3_CREATE_GUARDED:
+ if (!(p = decode_sattr3(p, &args->attrs)))
+ return 0;
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ args->verf = p;
+ p += 2;
+ break;
+ default:
+ return 0;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+int
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_createargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len))
+ || !(p = decode_sattr3(p, &args->attrs)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_symlinkargs *args)
+{
+ unsigned int len;
+ int avail;
+ char *old, *new;
+ struct kvec *vec;
+
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_sattr3(p, &args->attrs))
+ )
+ return 0;
+ /* now decode the pathname, which might be larger than the first page.
+ * As we have to check for nul's anyway, we copy it into a new page
+ * This page appears in the rq_res.pages list, but as pages_len is always
+ * 0, it won't get in the way
+ */
+ svc_take_page(rqstp);
+ len = ntohl(*p++);
+ if (len == 0 || len > NFS3_MAXPATHLEN || len >= PAGE_SIZE)
+ return 0;
+ args->tname = new = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+ args->tlen = len;
+ /* first copy and check from the first page */
+ old = (char*)p;
+ vec = &rqstp->rq_arg.head[0];
+ avail = vec->iov_len - (old - (char*)vec->iov_base);
+ while (len && avail && *old) {
+ *new++ = *old++;
+ len--;
+ avail--;
+ }
+ /* now copy next page if there is one */
+ if (len && !avail && rqstp->rq_arg.page_len) {
+ avail = rqstp->rq_arg.page_len;
+ if (avail > PAGE_SIZE) avail = PAGE_SIZE;
+ old = page_address(rqstp->rq_arg.pages[0]);
+ }
+ while (len && avail && *old) {
+ *new++ = *old++;
+ len--;
+ avail--;
+ }
+ *new = '\0';
+ if (len)
+ return 0;
+
+ return 1;
+}
+
+int
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_mknodargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ args->ftype = ntohl(*p++);
+
+ if (args->ftype == NF3BLK || args->ftype == NF3CHR
+ || args->ftype == NF3SOCK || args->ftype == NF3FIFO) {
+ if (!(p = decode_sattr3(p, &args->attrs)))
+ return 0;
+ }
+
+ if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
+ args->major = ntohl(*p++);
+ args->minor = ntohl(*p++);
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_renameargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readlinkargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_linkargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readdirargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->cookie);
+ args->verf = p; p += 2;
+ args->dircount = ~0;
+ args->count = ntohl(*p++);
+
+ if (args->count > PAGE_SIZE)
+ args->count = PAGE_SIZE;
+
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readdirargs *args)
+{
+ int len, pn;
+
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->cookie);
+ args->verf = p; p += 2;
+ args->dircount = ntohl(*p++);
+ args->count = ntohl(*p++);
+
+ len = (args->count > NFSSVC_MAXBLKSIZE) ? NFSSVC_MAXBLKSIZE :
+ args->count;
+ args->count = len;
+
+ while (len > 0) {
+ pn = rqstp->rq_resused;
+ svc_take_page(rqstp);
+ if (!args->buffer)
+ args->buffer = page_address(rqstp->rq_respages[pn]);
+ len -= PAGE_SIZE;
+ }
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_commitargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ p = xdr_decode_hyper(p, &args->offset);
+ args->count = ntohl(*p++);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+int
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* GETATTR */
+int
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ if (resp->status == 0)
+ p = encode_fattr3(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* SETATTR, REMOVE, RMDIR */
+int
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* LOOKUP */
+int
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_diropres *resp)
+{
+ if (resp->status == 0) {
+ p = encode_fh(p, &resp->fh);
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ }
+ p = encode_post_op_attr(rqstp, p, &resp->dirfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* ACCESS */
+int
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_accessres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0)
+ *p++ = htonl(resp->access);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READLINK */
+int
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readlinkres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->len);
+ xdr_ressize_check(rqstp, p);
+ rqstp->rq_res.page_len = resp->len;
+ if (resp->len & 3) {
+ /* need to pad the tail */
+ rqstp->rq_restailpage = 0;
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+ }
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READ */
+int
+nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->count);
+ *p++ = htonl(resp->eof);
+ *p++ = htonl(resp->count); /* xdr opaque count */
+ xdr_ressize_check(rqstp, p);
+ /* now update rqstp->rq_res to reflect data aswell */
+ rqstp->rq_res.page_len = resp->count;
+ if (resp->count & 3) {
+ /* need to pad the tail */
+ rqstp->rq_restailpage = 0;
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
+ }
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* WRITE */
+int
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_writeres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ if (resp->status == 0) {
+ *p++ = htonl(resp->count);
+ *p++ = htonl(resp->committed);
+ *p++ = htonl(nfssvc_boot.tv_sec);
+ *p++ = htonl(nfssvc_boot.tv_usec);
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* CREATE, MKDIR, SYMLINK, MKNOD */
+int
+nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_diropres *resp)
+{
+ if (resp->status == 0) {
+ *p++ = xdr_one;
+ p = encode_fh(p, &resp->fh);
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ }
+ p = encode_wcc_data(rqstp, p, &resp->dirfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* RENAME */
+int
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_renameres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->ffh);
+ p = encode_wcc_data(rqstp, p, &resp->tfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* LINK */
+int
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_linkres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+ p = encode_wcc_data(rqstp, p, &resp->tfh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* READDIR */
+int
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_readdirres *resp)
+{
+ p = encode_post_op_attr(rqstp, p, &resp->fh);
+
+ if (resp->status == 0) {
+ /* stupid readdir cookie */
+ memcpy(p, resp->verf, 8); p += 2;
+ xdr_ressize_check(rqstp, p);
+ if (rqstp->rq_res.head[0].iov_len + (2<<2) > PAGE_SIZE)
+ return 1; /*No room for trailer */
+ rqstp->rq_res.page_len = (resp->count) << 2;
+
+ /* add the 'tail' to the end of the 'head' page - page 0. */
+ rqstp->rq_restailpage = 0;
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p++ = 0; /* no more entries */
+ *p++ = htonl(resp->common.err == nfserr_eof);
+ rqstp->rq_res.tail[0].iov_len = 2<<2;
+ return 1;
+ } else
+ return xdr_ressize_check(rqstp, p);
+}
+
+static inline u32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
+ int namlen, ino_t ino)
+{
+ *p++ = xdr_one; /* mark entry present */
+ p = xdr_encode_hyper(p, ino); /* file id */
+ p = xdr_encode_array(p, name, namlen);/* name length & name */
+
+ cd->offset = p; /* remember pointer */
+ p = xdr_encode_hyper(p, NFS_OFFSET_MAX);/* offset of next entry */
+
+ return p;
+}
+
+static inline u32 *
+encode_entryplus_baggage(struct nfsd3_readdirres *cd, u32 *p,
+ struct svc_fh *fhp)
+{
+ p = encode_post_op_attr(cd->rqstp, p, fhp);
+ *p++ = xdr_one; /* yes, a file handle follows */
+ p = encode_fh(p, fhp);
+ fh_put(fhp);
+ return p;
+}
+
+static int
+compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
+ const char *name, int namlen)
+{
+ struct svc_export *exp;
+ struct dentry *dparent, *dchild;
+ int rv = 0;
+
+ dparent = cd->fh.fh_dentry;
+ exp = cd->fh.fh_export;
+
+ fh_init(fhp, NFS3_FHSIZE);
+ if (isdotent(name, namlen)) {
+ if (namlen == 2) {
+ dchild = dget_parent(dparent);
+ if (dchild == dparent) {
+ /* filesystem root - cannot return filehandle for ".." */
+ dput(dchild);
+ return 1;
+ }
+ } else
+ dchild = dget(dparent);
+ } else
+ dchild = lookup_one_len(name, dparent, namlen);
+ if (IS_ERR(dchild))
+ return 1;
+ if (d_mountpoint(dchild) ||
+ fh_compose(fhp, exp, dchild, &cd->fh) != 0 ||
+ !dchild->d_inode)
+ rv = 1;
+ dput(dchild);
+ return rv;
+}
+
+/*
+ * Encode a directory entry. This one works for both normal readdir
+ * and readdirplus.
+ * The normal readdir reply requires 2 (fileid) + 1 (stringlen)
+ * + string + 2 (cookie) + 1 (next) words, i.e. 6 + strlen.
+ *
+ * The readdirplus baggage is 1+21 words for post_op_attr, plus the
+ * file handle.
+ */
+
+#define NFS3_ENTRY_BAGGAGE (2 + 1 + 2 + 1)
+#define NFS3_ENTRYPLUS_BAGGAGE (1 + 21 + 1 + (NFS3_FHSIZE >> 2))
+static int
+encode_entry(struct readdir_cd *ccd, const char *name,
+ int namlen, off_t offset, ino_t ino, unsigned int d_type, int plus)
+{
+ struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
+ common);
+ u32 *p = cd->buffer;
+ caddr_t curr_page_addr = NULL;
+ int pn; /* current page number */
+ int slen; /* string (name) length */
+ int elen; /* estimated entry length in words */
+ int num_entry_words = 0; /* actual number of words */
+
+ if (cd->offset) {
+ u64 offset64 = offset;
+
+ if (unlikely(cd->offset1)) {
+ /* we ended up with offset on a page boundary */
+ *cd->offset = htonl(offset64 >> 32);
+ *cd->offset1 = htonl(offset64 & 0xffffffff);
+ cd->offset1 = NULL;
+ } else {
+ xdr_encode_hyper(cd->offset, (u64) offset);
+ }
+ }
+
+ /*
+ dprintk("encode_entry(%.*s @%ld%s)\n",
+ namlen, name, (long) offset, plus? " plus" : "");
+ */
+
+ /* truncate filename if too long */
+ if (namlen > NFS3_MAXNAMLEN)
+ namlen = NFS3_MAXNAMLEN;
+
+ slen = XDR_QUADLEN(namlen);
+ elen = slen + NFS3_ENTRY_BAGGAGE
+ + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+
+ if (cd->buflen < elen) {
+ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+
+ /* determine which page in rq_respages[] we are currently filling */
+ for (pn=1; pn < cd->rqstp->rq_resused; pn++) {
+ curr_page_addr = page_address(cd->rqstp->rq_respages[pn]);
+
+ if (((caddr_t)cd->buffer >= curr_page_addr) &&
+ ((caddr_t)cd->buffer < curr_page_addr + PAGE_SIZE))
+ break;
+ }
+
+ if ((caddr_t)(cd->buffer + elen) < (curr_page_addr + PAGE_SIZE)) {
+ /* encode entry in current page */
+
+ p = encode_entry_baggage(cd, p, name, namlen, ino);
+
+ /* throw in readdirplus baggage */
+ if (plus) {
+ struct svc_fh fh;
+
+ if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
+ *p++ = 0;
+ *p++ = 0;
+ } else
+ p = encode_entryplus_baggage(cd, p, &fh);
+ }
+ num_entry_words = p - cd->buffer;
+ } else if (cd->rqstp->rq_respages[pn+1] != NULL) {
+ /* temporarily encode entry into next page, then move back to
+ * current and next page in rq_respages[] */
+ u32 *p1, *tmp;
+ int len1, len2;
+
+ /* grab next page for temporary storage of entry */
+ p1 = tmp = page_address(cd->rqstp->rq_respages[pn+1]);
+
+ p1 = encode_entry_baggage(cd, p1, name, namlen, ino);
+
+ /* throw in readdirplus baggage */
+ if (plus) {
+ struct svc_fh fh;
+
+ if (compose_entry_fh(cd, &fh, name, namlen) > 0) {
+ /* zero out the filehandle */
+ *p1++ = 0;
+ *p1++ = 0;
+ } else
+ p1 = encode_entryplus_baggage(cd, p1, &fh);
+ }
+
+ /* determine entry word length and lengths to go in pages */
+ num_entry_words = p1 - tmp;
+ len1 = curr_page_addr + PAGE_SIZE - (caddr_t)cd->buffer;
+ if ((num_entry_words << 2) < len1) {
+ /* the actual number of words in the entry is less
+ * than elen and can still fit in the current page
+ */
+ memmove(p, tmp, num_entry_words << 2);
+ p += num_entry_words;
+
+ /* update offset */
+ cd->offset = cd->buffer + (cd->offset - tmp);
+ } else {
+ unsigned int offset_r = (cd->offset - tmp) << 2;
+
+ /* update pointer to offset location.
+ * This is a 64bit quantity, so we need to
+ * deal with 3 cases:
+ * - entirely in first page
+ * - entirely in second page
+ * - 4 bytes in each page
+ */
+ if (offset_r + 8 <= len1) {
+ cd->offset = p + (cd->offset - tmp);
+ } else if (offset_r >= len1) {
+ cd->offset -= len1 >> 2;
+ } else {
+ /* sitting on the fence */
+ BUG_ON(offset_r != len1 - 4);
+ cd->offset = p + (cd->offset - tmp);
+ cd->offset1 = tmp;
+ }
+
+ len2 = (num_entry_words << 2) - len1;
+
+ /* move from temp page to current and next pages */
+ memmove(p, tmp, len1);
+ memmove(tmp, (caddr_t)tmp+len1, len2);
+
+ p = tmp + (len2 >> 2);
+ }
+ }
+ else {
+ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+
+ cd->buflen -= num_entry_words;
+ cd->buffer = p;
+ cd->common.err = nfs_ok;
+ return 0;
+
+}
+
+int
+nfs3svc_encode_entry(struct readdir_cd *cd, const char *name,
+ int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+ return encode_entry(cd, name, namlen, offset, ino, d_type, 0);
+}
+
+int
+nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name,
+ int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+ return encode_entry(cd, name, namlen, offset, ino, d_type, 1);
+}
+
+/* FSSTAT */
+int
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_fsstatres *resp)
+{
+ struct kstatfs *s = &resp->stats;
+ u64 bs = s->f_bsize;
+
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ p = xdr_encode_hyper(p, bs * s->f_blocks); /* total bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bfree); /* free bytes */
+ p = xdr_encode_hyper(p, bs * s->f_bavail); /* user available bytes */
+ p = xdr_encode_hyper(p, s->f_files); /* total inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* free inodes */
+ p = xdr_encode_hyper(p, s->f_ffree); /* user available inodes */
+ *p++ = htonl(resp->invarsec); /* mean unchanged time */
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* FSINFO */
+int
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_fsinfores *resp)
+{
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ *p++ = htonl(resp->f_rtmax);
+ *p++ = htonl(resp->f_rtpref);
+ *p++ = htonl(resp->f_rtmult);
+ *p++ = htonl(resp->f_wtmax);
+ *p++ = htonl(resp->f_wtpref);
+ *p++ = htonl(resp->f_wtmult);
+ *p++ = htonl(resp->f_dtpref);
+ p = xdr_encode_hyper(p, resp->f_maxfilesize);
+ *p++ = xdr_one;
+ *p++ = xdr_zero;
+ *p++ = htonl(resp->f_properties);
+ }
+
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* PATHCONF */
+int
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_pathconfres *resp)
+{
+ *p++ = xdr_zero; /* no post_op_attr */
+
+ if (resp->status == 0) {
+ *p++ = htonl(resp->p_link_max);
+ *p++ = htonl(resp->p_name_max);
+ *p++ = htonl(resp->p_no_trunc);
+ *p++ = htonl(resp->p_chown_restricted);
+ *p++ = htonl(resp->p_case_insensitive);
+ *p++ = htonl(resp->p_case_preserving);
+ }
+
+ return xdr_ressize_check(rqstp, p);
+}
+
+/* COMMIT */
+int
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_commitres *resp)
+{
+ p = encode_wcc_data(rqstp, p, &resp->fh);
+ /* Write verifier */
+ if (resp->status == 0) {
+ *p++ = htonl(nfssvc_boot.tv_sec);
+ *p++ = htonl(nfssvc_boot.tv_usec);
+ }
+ return xdr_ressize_check(rqstp, p);
+}
+
+/*
+ * XDR release functions
+ */
+int
+nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_attrstat *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
+
+int
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd3_fhandle_pair *resp)
+{
+ fh_put(&resp->fh1);
+ fh_put(&resp->fh2);
+ return 1;
+}
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
new file mode 100644
index 000000000000..11ebf6c4aa54
--- /dev/null
+++ b/fs/nfsd/nfs4acl.c
@@ -0,0 +1,954 @@
+/*
+ * fs/nfs4acl/acl.c
+ *
+ * Common NFSv4 ACL handling code.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ * Jeff Sedlak <jsedlak@umich.edu>
+ * J. Bruce Fields <bfields@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/nfs_fs.h>
+#include <linux/posix_acl.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+
+
+/* mode bit translations: */
+#define NFS4_READ_MODE (NFS4_ACE_READ_DATA)
+#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_APPEND_DATA)
+#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE
+#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL | NFS4_ACE_SYNCHRONIZE)
+#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
+
+/* We don't support these bits; insist they be neither allowed nor denied */
+#define NFS4_MASK_UNSUPP (NFS4_ACE_DELETE | NFS4_ACE_WRITE_OWNER \
+ | NFS4_ACE_READ_NAMED_ATTRS | NFS4_ACE_WRITE_NAMED_ATTRS)
+
+/* flags used to simulate posix default ACLs */
+#define NFS4_INHERITANCE_FLAGS (NFS4_ACE_FILE_INHERIT_ACE \
+ | NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE)
+
+#define MASK_EQUAL(mask1, mask2) \
+ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
+
+static u32
+mask_from_posix(unsigned short perm, unsigned int flags)
+{
+ int mask = NFS4_ANYONE_MODE;
+
+ if (flags & NFS4_ACL_OWNER)
+ mask |= NFS4_OWNER_MODE;
+ if (perm & ACL_READ)
+ mask |= NFS4_READ_MODE;
+ if (perm & ACL_WRITE)
+ mask |= NFS4_WRITE_MODE;
+ if ((perm & ACL_WRITE) && (flags & NFS4_ACL_DIR))
+ mask |= NFS4_ACE_DELETE_CHILD;
+ if (perm & ACL_EXECUTE)
+ mask |= NFS4_EXECUTE_MODE;
+ return mask;
+}
+
+static u32
+deny_mask(u32 allow_mask, unsigned int flags)
+{
+ u32 ret = ~allow_mask & ~NFS4_MASK_UNSUPP;
+ if (!(flags & NFS4_ACL_DIR))
+ ret &= ~NFS4_ACE_DELETE_CHILD;
+ return ret;
+}
+
+/* XXX: modify functions to return NFS errors; they're only ever
+ * used by nfs code, after all.... */
+
+static int
+mode_from_nfs4(u32 perm, unsigned short *mode, unsigned int flags)
+{
+ u32 ignore = 0;
+
+ if (!(flags & NFS4_ACL_DIR))
+ ignore |= NFS4_ACE_DELETE_CHILD; /* ignore it */
+ perm |= ignore;
+ *mode = 0;
+ if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
+ *mode |= ACL_READ;
+ if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
+ *mode |= ACL_WRITE;
+ if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
+ *mode |= ACL_EXECUTE;
+ if (!MASK_EQUAL(perm, ignore|mask_from_posix(*mode, flags)))
+ return -EINVAL;
+ return 0;
+}
+
+struct ace_container {
+ struct nfs4_ace *ace;
+ struct list_head ace_l;
+};
+
+static short ace2type(struct nfs4_ace *);
+static int _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *, unsigned int);
+static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl *, unsigned int);
+int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
+int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
+
+struct nfs4_acl *
+nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
+ unsigned int flags)
+{
+ struct nfs4_acl *acl;
+ int error = -EINVAL;
+
+ if ((pacl != NULL &&
+ (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) ||
+ (dpacl != NULL &&
+ (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0)))
+ goto out_err;
+
+ acl = nfs4_acl_new();
+ if (acl == NULL) {
+ error = -ENOMEM;
+ goto out_err;
+ }
+
+ if (pacl != NULL) {
+ error = _posix_to_nfsv4_one(pacl, acl,
+ flags & ~NFS4_ACL_TYPE_DEFAULT);
+ if (error < 0)
+ goto out_acl;
+ }
+
+ if (dpacl != NULL) {
+ error = _posix_to_nfsv4_one(dpacl, acl,
+ flags | NFS4_ACL_TYPE_DEFAULT);
+ if (error < 0)
+ goto out_acl;
+ }
+
+ return acl;
+
+out_acl:
+ nfs4_acl_free(acl);
+out_err:
+ acl = ERR_PTR(error);
+
+ return acl;
+}
+
+static int
+nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, int whotype,
+ uid_t owner, unsigned int flags)
+{
+ int error;
+
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+ eflag, mask, whotype, owner);
+ if (error < 0)
+ return error;
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ eflag, deny_mask(mask, flags), whotype, owner);
+ return error;
+}
+
+/* We assume the acl has been verified with posix_acl_valid. */
+static int
+_posix_to_nfsv4_one(struct posix_acl *pacl, struct nfs4_acl *acl,
+ unsigned int flags)
+{
+ struct posix_acl_entry *pa, *pe, *group_owner_entry;
+ int error = -EINVAL;
+ u32 mask, mask_mask;
+ int eflag = ((flags & NFS4_ACL_TYPE_DEFAULT) ?
+ NFS4_INHERITANCE_FLAGS : 0);
+
+ BUG_ON(pacl->a_count < 3);
+ pe = pacl->a_entries + pacl->a_count;
+ pa = pe - 2; /* if mask entry exists, it's second from the last. */
+ if (pa->e_tag == ACL_MASK)
+ mask_mask = deny_mask(mask_from_posix(pa->e_perm, flags), flags);
+ else
+ mask_mask = 0;
+
+ pa = pacl->a_entries;
+ BUG_ON(pa->e_tag != ACL_USER_OBJ);
+ mask = mask_from_posix(pa->e_perm, flags | NFS4_ACL_OWNER);
+ error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_OWNER, 0, flags);
+ if (error < 0)
+ goto out;
+ pa++;
+
+ while (pa->e_tag == ACL_USER) {
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ eflag, mask_mask, NFS4_ACL_WHO_NAMED, pa->e_id);
+ if (error < 0)
+ goto out;
+
+
+ error = nfs4_acl_add_pair(acl, eflag, mask,
+ NFS4_ACL_WHO_NAMED, pa->e_id, flags);
+ if (error < 0)
+ goto out;
+ pa++;
+ }
+
+ /* In the case of groups, we apply allow ACEs first, then deny ACEs,
+ * since a user can be in more than one group. */
+
+ /* allow ACEs */
+
+ if (pacl->a_count > 3) {
+ BUG_ON(pa->e_tag != ACL_GROUP_OBJ);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
+ NFS4_ACL_WHO_GROUP, 0);
+ if (error < 0)
+ goto out;
+ }
+ group_owner_entry = pa;
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
+ NFS4_ACL_WHO_GROUP, 0);
+ if (error < 0)
+ goto out;
+ pa++;
+
+ while (pa->e_tag == ACL_GROUP) {
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
+ NFS4_ACL_WHO_NAMED, pa->e_id);
+ if (error < 0)
+ goto out;
+
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
+ NFS4_ACL_WHO_NAMED, pa->e_id);
+ if (error < 0)
+ goto out;
+ pa++;
+ }
+
+ /* deny ACEs */
+
+ pa = group_owner_entry;
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag,
+ deny_mask(mask, flags), NFS4_ACL_WHO_GROUP, 0);
+ if (error < 0)
+ goto out;
+ pa++;
+ while (pa->e_tag == ACL_GROUP) {
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
+ NFS4_ACE_IDENTIFIER_GROUP | eflag,
+ deny_mask(mask, flags), NFS4_ACL_WHO_NAMED, pa->e_id);
+ if (error < 0)
+ goto out;
+ pa++;
+ }
+
+ if (pa->e_tag == ACL_MASK)
+ pa++;
+ BUG_ON(pa->e_tag != ACL_OTHER);
+ mask = mask_from_posix(pa->e_perm, flags);
+ error = nfs4_acl_add_pair(acl, eflag, mask, NFS4_ACL_WHO_EVERYONE, 0, flags);
+
+out:
+ return error;
+}
+
+static void
+sort_pacl_range(struct posix_acl *pacl, int start, int end) {
+ int sorted = 0, i;
+ struct posix_acl_entry tmp;
+
+ /* We just do a bubble sort; easy to do in place, and we're not
+ * expecting acl's to be long enough to justify anything more. */
+ while (!sorted) {
+ sorted = 1;
+ for (i = start; i < end; i++) {
+ if (pacl->a_entries[i].e_id
+ > pacl->a_entries[i+1].e_id) {
+ sorted = 0;
+ tmp = pacl->a_entries[i];
+ pacl->a_entries[i] = pacl->a_entries[i+1];
+ pacl->a_entries[i+1] = tmp;
+ }
+ }
+ }
+}
+
+static void
+sort_pacl(struct posix_acl *pacl)
+{
+ /* posix_acl_valid requires that users and groups be in order
+ * by uid/gid. */
+ int i, j;
+
+ if (pacl->a_count <= 4)
+ return; /* no users or groups */
+ i = 1;
+ while (pacl->a_entries[i].e_tag == ACL_USER)
+ i++;
+ sort_pacl_range(pacl, 1, i-1);
+
+ BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
+ j = i++;
+ while (pacl->a_entries[j].e_tag == ACL_GROUP)
+ j++;
+ sort_pacl_range(pacl, i, j-1);
+ return;
+}
+
+static int
+write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
+ struct posix_acl_entry **pace, short tag, unsigned int flags)
+{
+ struct posix_acl_entry *this = *pace;
+
+ if (*pace == pacl->a_entries + pacl->a_count)
+ return -EINVAL; /* fell off the end */
+ (*pace)++;
+ this->e_tag = tag;
+ if (tag == ACL_USER_OBJ)
+ flags |= NFS4_ACL_OWNER;
+ if (mode_from_nfs4(ace->access_mask, &this->e_perm, flags))
+ return -EINVAL;
+ this->e_id = (tag == ACL_USER || tag == ACL_GROUP ?
+ ace->who : ACL_UNDEFINED_ID);
+ return 0;
+}
+
+static struct nfs4_ace *
+get_next_v4_ace(struct list_head **p, struct list_head *head)
+{
+ struct nfs4_ace *ace;
+
+ *p = (*p)->next;
+ if (*p == head)
+ return NULL;
+ ace = list_entry(*p, struct nfs4_ace, l_ace);
+
+ return ace;
+}
+
+int
+nfs4_acl_nfsv4_to_posix(struct nfs4_acl *acl, struct posix_acl **pacl,
+ struct posix_acl **dpacl, unsigned int flags)
+{
+ struct nfs4_acl *dacl;
+ int error = -ENOMEM;
+
+ *pacl = NULL;
+ *dpacl = NULL;
+
+ dacl = nfs4_acl_new();
+ if (dacl == NULL)
+ goto out;
+
+ error = nfs4_acl_split(acl, dacl);
+ if (error < 0)
+ goto out_acl;
+
+ if (pacl != NULL) {
+ if (acl->naces == 0) {
+ error = -ENODATA;
+ goto try_dpacl;
+ }
+
+ *pacl = _nfsv4_to_posix_one(acl, flags);
+ if (IS_ERR(*pacl)) {
+ error = PTR_ERR(*pacl);
+ *pacl = NULL;
+ goto out_acl;
+ }
+ }
+
+try_dpacl:
+ if (dpacl != NULL) {
+ if (dacl->naces == 0) {
+ if (pacl == NULL || *pacl == NULL)
+ error = -ENODATA;
+ goto out_acl;
+ }
+
+ error = 0;
+ *dpacl = _nfsv4_to_posix_one(dacl, flags);
+ if (IS_ERR(*dpacl)) {
+ error = PTR_ERR(*dpacl);
+ *dpacl = NULL;
+ goto out_acl;
+ }
+ }
+
+out_acl:
+ if (error && pacl) {
+ posix_acl_release(*pacl);
+ *pacl = NULL;
+ }
+ nfs4_acl_free(dacl);
+out:
+ return error;
+}
+
+static int
+same_who(struct nfs4_ace *a, struct nfs4_ace *b)
+{
+ return a->whotype == b->whotype &&
+ (a->whotype != NFS4_ACL_WHO_NAMED || a->who == b->who);
+}
+
+static int
+complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny,
+ unsigned int flags)
+{
+ int ignore = 0;
+ if (!(flags & NFS4_ACL_DIR))
+ ignore |= NFS4_ACE_DELETE_CHILD;
+ return MASK_EQUAL(ignore|deny_mask(allow->access_mask, flags),
+ ignore|deny->access_mask) &&
+ allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
+ deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
+ allow->flag == deny->flag &&
+ same_who(allow, deny);
+}
+
+static inline int
+user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+ struct posix_acl *pacl, struct posix_acl_entry **pace,
+ unsigned int flags)
+{
+ int error = -EINVAL;
+ struct nfs4_ace *ace, *ace2;
+
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ if (ace2type(ace) != ACL_USER_OBJ)
+ goto out;
+ error = write_pace(ace, pacl, pace, ACL_USER_OBJ, flags);
+ if (error < 0)
+ goto out;
+ error = -EINVAL;
+ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace2 == NULL)
+ goto out;
+ if (!complementary_ace_pair(ace, ace2, flags))
+ goto out;
+ error = 0;
+out:
+ return error;
+}
+
+static inline int
+users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+ struct nfs4_ace **mask_ace,
+ struct posix_acl *pacl, struct posix_acl_entry **pace,
+ unsigned int flags)
+{
+ int error = -EINVAL;
+ struct nfs4_ace *ace, *ace2;
+
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ while (ace2type(ace) == ACL_USER) {
+ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ goto out;
+ if (*mask_ace &&
+ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+ goto out;
+ *mask_ace = ace;
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+ goto out;
+ error = write_pace(ace, pacl, pace, ACL_USER, flags);
+ if (error < 0)
+ goto out;
+ error = -EINVAL;
+ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace2 == NULL)
+ goto out;
+ if (!complementary_ace_pair(ace, ace2, flags))
+ goto out;
+ if ((*mask_ace)->flag != ace2->flag ||
+ !same_who(*mask_ace, ace2))
+ goto out;
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ }
+ error = 0;
+out:
+ return error;
+}
+
+static inline int
+group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+ struct nfs4_ace **mask_ace,
+ struct posix_acl *pacl, struct posix_acl_entry **pace,
+ unsigned int flags)
+{
+ int error = -EINVAL;
+ struct nfs4_ace *ace, *ace2;
+ struct ace_container *ac;
+ struct list_head group_l;
+
+ INIT_LIST_HEAD(&group_l);
+ ace = list_entry(*p, struct nfs4_ace, l_ace);
+
+ /* group owner (mask and allow aces) */
+
+ if (pacl->a_count != 3) {
+ /* then the group owner should be preceded by mask */
+ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
+ goto out;
+ if (*mask_ace &&
+ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+ goto out;
+ *mask_ace = ace;
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+
+ if ((*mask_ace)->flag != ace->flag || !same_who(*mask_ace, ace))
+ goto out;
+ }
+
+ if (ace2type(ace) != ACL_GROUP_OBJ)
+ goto out;
+
+ ac = kmalloc(sizeof(*ac), GFP_KERNEL);
+ error = -ENOMEM;
+ if (ac == NULL)
+ goto out;
+ ac->ace = ace;
+ list_add_tail(&ac->ace_l, &group_l);
+
+ error = -EINVAL;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+ goto out;
+
+ error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, flags);
+ if (error < 0)
+ goto out;
+
+ error = -EINVAL;
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+
+ /* groups (mask and allow aces) */
+
+ while (ace2type(ace) == ACL_GROUP) {
+ if (*mask_ace == NULL)
+ goto out;
+
+ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
+ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
+ goto out;
+ *mask_ace = ace;
+
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ ac = kmalloc(sizeof(*ac), GFP_KERNEL);
+ error = -ENOMEM;
+ if (ac == NULL)
+ goto out;
+ error = -EINVAL;
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
+ !same_who(ace, *mask_ace))
+ goto out;
+
+ ac->ace = ace;
+ list_add_tail(&ac->ace_l, &group_l);
+
+ error = write_pace(ace, pacl, pace, ACL_GROUP, flags);
+ if (error < 0)
+ goto out;
+ error = -EINVAL;
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ }
+
+ /* group owner (deny ace) */
+
+ if (ace2type(ace) != ACL_GROUP_OBJ)
+ goto out;
+ ac = list_entry(group_l.next, struct ace_container, ace_l);
+ ace2 = ac->ace;
+ if (!complementary_ace_pair(ace2, ace, flags))
+ goto out;
+ list_del(group_l.next);
+ kfree(ac);
+
+ /* groups (deny aces) */
+
+ while (!list_empty(&group_l)) {
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ if (ace2type(ace) != ACL_GROUP)
+ goto out;
+ ac = list_entry(group_l.next, struct ace_container, ace_l);
+ ace2 = ac->ace;
+ if (!complementary_ace_pair(ace2, ace, flags))
+ goto out;
+ list_del(group_l.next);
+ kfree(ac);
+ }
+
+ ace = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace == NULL)
+ goto out;
+ if (ace2type(ace) != ACL_OTHER)
+ goto out;
+ error = 0;
+out:
+ while (!list_empty(&group_l)) {
+ ac = list_entry(group_l.next, struct ace_container, ace_l);
+ list_del(group_l.next);
+ kfree(ac);
+ }
+ return error;
+}
+
+static inline int
+mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+ struct nfs4_ace **mask_ace,
+ struct posix_acl *pacl, struct posix_acl_entry **pace,
+ unsigned int flags)
+{
+ int error = -EINVAL;
+ struct nfs4_ace *ace;
+
+ ace = list_entry(*p, struct nfs4_ace, l_ace);
+ if (pacl->a_count != 3) {
+ if (*mask_ace == NULL)
+ goto out;
+ (*mask_ace)->access_mask = deny_mask((*mask_ace)->access_mask, flags);
+ write_pace(*mask_ace, pacl, pace, ACL_MASK, flags);
+ }
+ error = 0;
+out:
+ return error;
+}
+
+static inline int
+other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
+ struct posix_acl *pacl, struct posix_acl_entry **pace,
+ unsigned int flags)
+{
+ int error = -EINVAL;
+ struct nfs4_ace *ace, *ace2;
+
+ ace = list_entry(*p, struct nfs4_ace, l_ace);
+ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
+ goto out;
+ error = write_pace(ace, pacl, pace, ACL_OTHER, flags);
+ if (error < 0)
+ goto out;
+ error = -EINVAL;
+ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
+ if (ace2 == NULL)
+ goto out;
+ if (!complementary_ace_pair(ace, ace2, flags))
+ goto out;
+ error = 0;
+out:
+ return error;
+}
+
+static int
+calculate_posix_ace_count(struct nfs4_acl *n4acl)
+{
+ if (n4acl->naces == 6) /* owner, owner group, and other only */
+ return 3;
+ else { /* Otherwise there must be a mask entry. */
+ /* Also, the remaining entries are for named users and
+ * groups, and come in threes (mask, allow, deny): */
+ if (n4acl->naces < 7)
+ return -1;
+ if ((n4acl->naces - 7) % 3)
+ return -1;
+ return 4 + (n4acl->naces - 7)/3;
+ }
+}
+
+
+static struct posix_acl *
+_nfsv4_to_posix_one(struct nfs4_acl *n4acl, unsigned int flags)
+{
+ struct posix_acl *pacl;
+ int error = -EINVAL, nace = 0;
+ struct list_head *p;
+ struct nfs4_ace *mask_ace = NULL;
+ struct posix_acl_entry *pace;
+
+ nace = calculate_posix_ace_count(n4acl);
+ if (nace < 0)
+ goto out_err;
+
+ pacl = posix_acl_alloc(nace, GFP_KERNEL);
+ error = -ENOMEM;
+ if (pacl == NULL)
+ goto out_err;
+
+ pace = &pacl->a_entries[0];
+ p = &n4acl->ace_head;
+
+ error = user_obj_from_v4(n4acl, &p, pacl, &pace, flags);
+ if (error)
+ goto out_acl;
+
+ error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
+ if (error)
+ goto out_acl;
+
+ error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
+ flags);
+ if (error)
+ goto out_acl;
+
+ error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, flags);
+ if (error)
+ goto out_acl;
+ error = other_from_v4(n4acl, &p, pacl, &pace, flags);
+ if (error)
+ goto out_acl;
+
+ error = -EINVAL;
+ if (p->next != &n4acl->ace_head)
+ goto out_acl;
+ if (pace != pacl->a_entries + pacl->a_count)
+ goto out_acl;
+
+ sort_pacl(pacl);
+
+ return pacl;
+out_acl:
+ posix_acl_release(pacl);
+out_err:
+ pacl = ERR_PTR(error);
+ return pacl;
+}
+
+int
+nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
+{
+ struct list_head *h, *n;
+ struct nfs4_ace *ace;
+ int error = 0;
+
+ list_for_each_safe(h, n, &acl->ace_head) {
+ ace = list_entry(h, struct nfs4_ace, l_ace);
+
+ if ((ace->flag & NFS4_INHERITANCE_FLAGS)
+ != NFS4_INHERITANCE_FLAGS)
+ continue;
+
+ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
+ ace->access_mask, ace->whotype, ace->who) == -1;
+ if (error < 0)
+ goto out;
+
+ list_del(h);
+ kfree(ace);
+ acl->naces--;
+ }
+
+out:
+ return error;
+}
+
+static short
+ace2type(struct nfs4_ace *ace)
+{
+ switch (ace->whotype) {
+ case NFS4_ACL_WHO_NAMED:
+ return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ?
+ ACL_GROUP : ACL_USER);
+ case NFS4_ACL_WHO_OWNER:
+ return ACL_USER_OBJ;
+ case NFS4_ACL_WHO_GROUP:
+ return ACL_GROUP_OBJ;
+ case NFS4_ACL_WHO_EVERYONE:
+ return ACL_OTHER;
+ }
+ BUG();
+ return -1;
+}
+
+EXPORT_SYMBOL(nfs4_acl_posix_to_nfsv4);
+EXPORT_SYMBOL(nfs4_acl_nfsv4_to_posix);
+
+struct nfs4_acl *
+nfs4_acl_new(void)
+{
+ struct nfs4_acl *acl;
+
+ if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL)
+ return NULL;
+
+ acl->naces = 0;
+ INIT_LIST_HEAD(&acl->ace_head);
+
+ return acl;
+}
+
+void
+nfs4_acl_free(struct nfs4_acl *acl)
+{
+ struct list_head *h;
+ struct nfs4_ace *ace;
+
+ if (!acl)
+ return;
+
+ while (!list_empty(&acl->ace_head)) {
+ h = acl->ace_head.next;
+ list_del(h);
+ ace = list_entry(h, struct nfs4_ace, l_ace);
+ kfree(ace);
+ }
+
+ kfree(acl);
+
+ return;
+}
+
+int
+nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
+ int whotype, uid_t who)
+{
+ struct nfs4_ace *ace;
+
+ if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
+ return -1;
+
+ ace->type = type;
+ ace->flag = flag;
+ ace->access_mask = access_mask;
+ ace->whotype = whotype;
+ ace->who = who;
+
+ list_add_tail(&ace->l_ace, &acl->ace_head);
+ acl->naces++;
+
+ return 0;
+}
+
+static struct {
+ char *string;
+ int stringlen;
+ int type;
+} s2t_map[] = {
+ {
+ .string = "OWNER@",
+ .stringlen = sizeof("OWNER@") - 1,
+ .type = NFS4_ACL_WHO_OWNER,
+ },
+ {
+ .string = "GROUP@",
+ .stringlen = sizeof("GROUP@") - 1,
+ .type = NFS4_ACL_WHO_GROUP,
+ },
+ {
+ .string = "EVERYONE@",
+ .stringlen = sizeof("EVERYONE@") - 1,
+ .type = NFS4_ACL_WHO_EVERYONE,
+ },
+};
+
+int
+nfs4_acl_get_whotype(char *p, u32 len)
+{
+ int i;
+
+ for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+ if (s2t_map[i].stringlen == len &&
+ 0 == memcmp(s2t_map[i].string, p, len))
+ return s2t_map[i].type;
+ }
+ return NFS4_ACL_WHO_NAMED;
+}
+
+int
+nfs4_acl_write_who(int who, char *p)
+{
+ int i;
+
+ for (i=0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) {
+ if (s2t_map[i].type == who) {
+ memcpy(p, s2t_map[i].string, s2t_map[i].stringlen);
+ return s2t_map[i].stringlen;
+ }
+ }
+ BUG();
+ return -1;
+}
+
+static inline int
+match_who(struct nfs4_ace *ace, uid_t owner, gid_t group, uid_t who)
+{
+ switch (ace->whotype) {
+ case NFS4_ACL_WHO_NAMED:
+ return who == ace->who;
+ case NFS4_ACL_WHO_OWNER:
+ return who == owner;
+ case NFS4_ACL_WHO_GROUP:
+ return who == group;
+ case NFS4_ACL_WHO_EVERYONE:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+EXPORT_SYMBOL(nfs4_acl_new);
+EXPORT_SYMBOL(nfs4_acl_free);
+EXPORT_SYMBOL(nfs4_acl_add_ace);
+EXPORT_SYMBOL(nfs4_acl_get_whotype);
+EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
new file mode 100644
index 000000000000..c70de9c2af74
--- /dev/null
+++ b/fs/nfsd/nfs4callback.c
@@ -0,0 +1,547 @@
+/*
+ * linux/fs/nfsd/nfs4callback.c
+ *
+ * Copyright (c) 2001 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/inet.h>
+#include <linux/errno.h>
+#include <linux/delay.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs4.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+#define NFSPROC4_CB_NULL 0
+#define NFSPROC4_CB_COMPOUND 1
+
+/* declarations */
+static void nfs4_cb_null(struct rpc_task *task);
+extern spinlock_t recall_lock;
+
+/* Index of predefined Linux callback client operations */
+
+enum {
+ NFSPROC4_CLNT_CB_NULL = 0,
+ NFSPROC4_CLNT_CB_RECALL,
+};
+
+enum nfs_cb_opnum4 {
+ OP_CB_RECALL = 4,
+};
+
+#define NFS4_MAXTAGLEN 20
+
+#define NFS4_enc_cb_null_sz 0
+#define NFS4_dec_cb_null_sz 0
+#define cb_compound_enc_hdr_sz 4
+#define cb_compound_dec_hdr_sz (3 + (NFS4_MAXTAGLEN >> 2))
+#define op_enc_sz 1
+#define op_dec_sz 2
+#define enc_nfs4_fh_sz (1 + (NFS4_FHSIZE >> 2))
+#define enc_stateid_sz 16
+#define NFS4_enc_cb_recall_sz (cb_compound_enc_hdr_sz + \
+ 1 + enc_stateid_sz + \
+ enc_nfs4_fh_sz)
+
+#define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \
+ op_dec_sz)
+
+/*
+* Generic encode routines from fs/nfs/nfs4xdr.c
+*/
+static inline u32 *
+xdr_writemem(u32 *p, const void *ptr, int nbytes)
+{
+ int tmp = XDR_QUADLEN(nbytes);
+ if (!tmp)
+ return p;
+ p[tmp-1] = 0;
+ memcpy(p, ptr, nbytes);
+ return p + tmp;
+}
+
+#define WRITE32(n) *p++ = htonl(n)
+#define WRITEMEM(ptr,nbytes) do { \
+ p = xdr_writemem(p, ptr, nbytes); \
+} while (0)
+#define RESERVE_SPACE(nbytes) do { \
+ p = xdr_reserve_space(xdr, nbytes); \
+ if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+ BUG_ON(!p); \
+} while (0)
+
+/*
+ * Generic decode routines from fs/nfs/nfs4xdr.c
+ */
+#define DECODE_TAIL \
+ status = 0; \
+out: \
+ return status; \
+xdr_error: \
+ dprintk("NFSD: xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ status = -EIO; \
+ goto out
+
+#define READ32(x) (x) = ntohl(*p++)
+#define READ64(x) do { \
+ (x) = (u64)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+#define READTIME(x) do { \
+ p++; \
+ (x.tv_sec) = ntohl(*p++); \
+ (x.tv_nsec) = ntohl(*p++); \
+} while (0)
+#define READ_BUF(nbytes) do { \
+ p = xdr_inline_decode(xdr, nbytes); \
+ if (!p) { \
+ dprintk("NFSD: %s: reply buffer overflowed in line %d.", \
+ __FUNCTION__, __LINE__); \
+ return -EIO; \
+ } \
+} while (0)
+
+struct nfs4_cb_compound_hdr {
+ int status;
+ u32 ident;
+ u32 nops;
+ u32 taglen;
+ char * tag;
+};
+
+static struct {
+int stat;
+int errno;
+} nfs_cb_errtbl[] = {
+ { NFS4_OK, 0 },
+ { NFS4ERR_PERM, EPERM },
+ { NFS4ERR_NOENT, ENOENT },
+ { NFS4ERR_IO, EIO },
+ { NFS4ERR_NXIO, ENXIO },
+ { NFS4ERR_ACCESS, EACCES },
+ { NFS4ERR_EXIST, EEXIST },
+ { NFS4ERR_XDEV, EXDEV },
+ { NFS4ERR_NOTDIR, ENOTDIR },
+ { NFS4ERR_ISDIR, EISDIR },
+ { NFS4ERR_INVAL, EINVAL },
+ { NFS4ERR_FBIG, EFBIG },
+ { NFS4ERR_NOSPC, ENOSPC },
+ { NFS4ERR_ROFS, EROFS },
+ { NFS4ERR_MLINK, EMLINK },
+ { NFS4ERR_NAMETOOLONG, ENAMETOOLONG },
+ { NFS4ERR_NOTEMPTY, ENOTEMPTY },
+ { NFS4ERR_DQUOT, EDQUOT },
+ { NFS4ERR_STALE, ESTALE },
+ { NFS4ERR_BADHANDLE, EBADHANDLE },
+ { NFS4ERR_BAD_COOKIE, EBADCOOKIE },
+ { NFS4ERR_NOTSUPP, ENOTSUPP },
+ { NFS4ERR_TOOSMALL, ETOOSMALL },
+ { NFS4ERR_SERVERFAULT, ESERVERFAULT },
+ { NFS4ERR_BADTYPE, EBADTYPE },
+ { NFS4ERR_LOCKED, EAGAIN },
+ { NFS4ERR_RESOURCE, EREMOTEIO },
+ { NFS4ERR_SYMLINK, ELOOP },
+ { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP },
+ { NFS4ERR_DEADLOCK, EDEADLK },
+ { -1, EIO }
+};
+
+static int
+nfs_cb_stat_to_errno(int stat)
+{
+ int i;
+ for (i = 0; nfs_cb_errtbl[i].stat != -1; i++) {
+ if (nfs_cb_errtbl[i].stat == stat)
+ return nfs_cb_errtbl[i].errno;
+ }
+ /* If we cannot translate the error, the recovery routines should
+ * handle it.
+ * Note: remaining NFSv4 error codes have values > 10000, so should
+ * not conflict with native Linux error codes.
+ */
+ return stat;
+}
+
+/*
+ * XDR encode
+ */
+
+static int
+encode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr)
+{
+ u32 * p;
+
+ RESERVE_SPACE(16);
+ WRITE32(0); /* tag length is always 0 */
+ WRITE32(NFS4_MINOR_VERSION);
+ WRITE32(hdr->ident);
+ WRITE32(hdr->nops);
+ return 0;
+}
+
+static int
+encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec)
+{
+ u32 *p;
+ int len = cb_rec->cbr_fhlen;
+
+ RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len);
+ WRITE32(OP_CB_RECALL);
+ WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t));
+ WRITE32(cb_rec->cbr_trunc);
+ WRITE32(len);
+ WRITEMEM(cb_rec->cbr_fhval, len);
+ return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_null(struct rpc_rqst *req, u32 *p)
+{
+ struct xdr_stream xdrs, *xdr = &xdrs;
+
+ xdr_init_encode(&xdrs, &req->rq_snd_buf, p);
+ RESERVE_SPACE(0);
+ return 0;
+}
+
+static int
+nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, u32 *p, struct nfs4_cb_recall *args)
+{
+ struct xdr_stream xdr;
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = args->cbr_ident,
+ .nops = 1,
+ };
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_cb_compound_hdr(&xdr, &hdr);
+ return (encode_cb_recall(&xdr, args));
+}
+
+
+static int
+decode_cb_compound_hdr(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr){
+ u32 *p;
+
+ READ_BUF(8);
+ READ32(hdr->status);
+ READ32(hdr->taglen);
+ READ_BUF(hdr->taglen + 4);
+ hdr->tag = (char *)p;
+ p += XDR_QUADLEN(hdr->taglen);
+ READ32(hdr->nops);
+ return 0;
+}
+
+static int
+decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ u32 *p;
+ u32 op;
+ int32_t nfserr;
+
+ READ_BUF(8);
+ READ32(op);
+ if (op != expected) {
+ dprintk("NFSD: decode_cb_op_hdr: Callback server returned "
+ " operation %d but we issued a request for %d\n",
+ op, expected);
+ return -EIO;
+ }
+ READ32(nfserr);
+ if (nfserr != NFS_OK)
+ return -nfs_cb_stat_to_errno(nfserr);
+ return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_null(struct rpc_rqst *req, u32 *p)
+{
+ return 0;
+}
+
+static int
+nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, u32 *p)
+{
+ struct xdr_stream xdr;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ status = decode_cb_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_cb_op_hdr(&xdr, OP_CB_RECALL);
+out:
+ return status;
+}
+
+/*
+ * RPC procedure tables
+ */
+#ifndef MAX
+# define MAX(a, b) (((a) > (b))? (a) : (b))
+#endif
+
+#define PROC(proc, call, argtype, restype) \
+[NFSPROC4_CLNT_##proc] = { \
+ .p_proc = NFSPROC4_CB_##call, \
+ .p_encode = (kxdrproc_t) nfs4_xdr_##argtype, \
+ .p_decode = (kxdrproc_t) nfs4_xdr_##restype, \
+ .p_bufsiz = MAX(NFS4_##argtype##_sz,NFS4_##restype##_sz) << 2, \
+}
+
+struct rpc_procinfo nfs4_cb_procedures[] = {
+ PROC(CB_NULL, NULL, enc_cb_null, dec_cb_null),
+ PROC(CB_RECALL, COMPOUND, enc_cb_recall, dec_cb_recall),
+};
+
+struct rpc_version nfs_cb_version4 = {
+ .number = 1,
+ .nrprocs = sizeof(nfs4_cb_procedures)/sizeof(nfs4_cb_procedures[0]),
+ .procs = nfs4_cb_procedures
+};
+
+static struct rpc_version * nfs_cb_version[] = {
+ NULL,
+ &nfs_cb_version4,
+};
+
+/*
+ * Use the SETCLIENTID credential
+ */
+struct rpc_cred *
+nfsd4_lookupcred(struct nfs4_client *clp, int taskflags)
+{
+ struct auth_cred acred;
+ struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+ struct rpc_cred *ret;
+
+ get_group_info(clp->cl_cred.cr_group_info);
+ acred.uid = clp->cl_cred.cr_uid;
+ acred.gid = clp->cl_cred.cr_gid;
+ acred.group_info = clp->cl_cred.cr_group_info;
+
+ dprintk("NFSD: looking up %s cred\n",
+ clnt->cl_auth->au_ops->au_name);
+ ret = rpcauth_lookup_credcache(clnt->cl_auth, &acred, taskflags);
+ put_group_info(clp->cl_cred.cr_group_info);
+ return ret;
+}
+
+/*
+ * Set up the callback client and put a NFSPROC4_CB_NULL on the wire...
+ */
+void
+nfsd4_probe_callback(struct nfs4_client *clp)
+{
+ struct sockaddr_in addr;
+ struct nfs4_callback *cb = &clp->cl_callback;
+ struct rpc_timeout timeparms;
+ struct rpc_xprt * xprt;
+ struct rpc_program * program = &cb->cb_program;
+ struct rpc_stat * stat = &cb->cb_stat;
+ struct rpc_clnt * clnt;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL],
+ .rpc_argp = clp,
+ };
+ char hostname[32];
+ int status;
+
+ dprintk("NFSD: probe_callback. cb_parsed %d cb_set %d\n",
+ cb->cb_parsed, atomic_read(&cb->cb_set));
+ if (!cb->cb_parsed || atomic_read(&cb->cb_set))
+ return;
+
+ /* Initialize address */
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cb->cb_port);
+ addr.sin_addr.s_addr = htonl(cb->cb_addr);
+
+ /* Initialize timeout */
+ timeparms.to_initval = (NFSD_LEASE_TIME/4) * HZ;
+ timeparms.to_retries = 0;
+ timeparms.to_maxval = (NFSD_LEASE_TIME/2) * HZ;
+ timeparms.to_exponential = 1;
+
+ /* Create RPC transport */
+ if (!(xprt = xprt_create_proto(IPPROTO_TCP, &addr, &timeparms))) {
+ dprintk("NFSD: couldn't create callback transport!\n");
+ goto out_err;
+ }
+
+ /* Initialize rpc_program */
+ program->name = "nfs4_cb";
+ program->number = cb->cb_prog;
+ program->nrvers = sizeof(nfs_cb_version)/sizeof(nfs_cb_version[0]);
+ program->version = nfs_cb_version;
+ program->stats = stat;
+
+ /* Initialize rpc_stat */
+ memset(stat, 0, sizeof(struct rpc_stat));
+ stat->program = program;
+
+ /* Create RPC client
+ *
+ * XXX AUTH_UNIX only - need AUTH_GSS....
+ */
+ sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr.sin_addr.s_addr));
+ if (!(clnt = rpc_create_client(xprt, hostname, program, 1, RPC_AUTH_UNIX))) {
+ dprintk("NFSD: couldn't create callback client\n");
+ goto out_xprt;
+ }
+ clnt->cl_intr = 0;
+ clnt->cl_softrtry = 1;
+ clnt->cl_chatty = 1;
+
+ /* Kick rpciod, put the call on the wire. */
+
+ if (rpciod_up() != 0) {
+ dprintk("nfsd: couldn't start rpciod for callbacks!\n");
+ goto out_clnt;
+ }
+
+ /* the task holds a reference to the nfs4_client struct */
+ cb->cb_client = clnt;
+ atomic_inc(&clp->cl_count);
+
+ msg.rpc_cred = nfsd4_lookupcred(clp,0);
+ if (IS_ERR(msg.rpc_cred))
+ goto out_rpciod;
+ status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
+ put_rpccred(msg.rpc_cred);
+
+ if (status != 0) {
+ dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n");
+ goto out_rpciod;
+ }
+ return;
+
+out_rpciod:
+ atomic_dec(&clp->cl_count);
+ rpciod_down();
+out_clnt:
+ rpc_shutdown_client(clnt);
+ goto out_err;
+out_xprt:
+ xprt_destroy(xprt);
+out_err:
+ dprintk("NFSD: warning: no callback path to client %.*s\n",
+ (int)clp->cl_name.len, clp->cl_name.data);
+ cb->cb_client = NULL;
+}
+
+static void
+nfs4_cb_null(struct rpc_task *task)
+{
+ struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
+ struct nfs4_callback *cb = &clp->cl_callback;
+ u32 addr = htonl(cb->cb_addr);
+
+ dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
+
+ if (task->tk_status < 0) {
+ dprintk("NFSD: callback establishment to client %.*s failed\n",
+ (int)clp->cl_name.len, clp->cl_name.data);
+ goto out;
+ }
+ atomic_set(&cb->cb_set, 1);
+ dprintk("NFSD: callback set to client %u.%u.%u.%u\n", NIPQUAD(addr));
+out:
+ put_nfs4_client(clp);
+}
+
+/*
+ * called with dp->dl_count inc'ed.
+ * nfs4_lock_state() may or may not have been called.
+ */
+void
+nfsd4_cb_recall(struct nfs4_delegation *dp)
+{
+ struct nfs4_client *clp = dp->dl_client;
+ struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+ struct nfs4_cb_recall *cbr = &dp->dl_recall;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL],
+ .rpc_argp = cbr,
+ };
+ int retries = 1;
+ int status = 0;
+
+ if ((!atomic_read(&clp->cl_callback.cb_set)) || !clnt)
+ return;
+
+ msg.rpc_cred = nfsd4_lookupcred(clp, 0);
+ if (IS_ERR(msg.rpc_cred))
+ goto out;
+
+ cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */
+ cbr->cbr_dp = dp;
+
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
+ while (retries--) {
+ switch (status) {
+ case -EIO:
+ /* Network partition? */
+ case -EBADHANDLE:
+ case -NFS4ERR_BAD_STATEID:
+ /* Race: client probably got cb_recall
+ * before open reply granting delegation */
+ break;
+ default:
+ goto out_put_cred;
+ }
+ ssleep(2);
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT);
+ }
+out_put_cred:
+ put_rpccred(msg.rpc_cred);
+out:
+ if (status == -EIO)
+ atomic_set(&clp->cl_callback.cb_set, 0);
+ /* Success or failure, now we're either waiting for lease expiration
+ * or deleg_return. */
+ dprintk("NFSD: nfs4_cb_recall: dp %p dl_flock %p dl_count %d\n",dp, dp->dl_flock, atomic_read(&dp->dl_count));
+ nfs4_put_delegation(dp);
+ return;
+}
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
new file mode 100644
index 000000000000..4ba540841cf6
--- /dev/null
+++ b/fs/nfsd/nfs4idmap.c
@@ -0,0 +1,588 @@
+/*
+ * fs/nfsd/nfs4idmap.c
+ *
+ * Mapping of UID/GIDs to name and vice versa.
+ *
+ * Copyright (c) 2002, 2003 The Regents of the University of
+ * Michigan. All rights reserved.
+ *
+ * Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/mm.h>
+#include <linux/utsname.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfs.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
+#include <linux/smp_lock.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/list.h>
+#include <linux/sched.h>
+#include <linux/time.h>
+#include <linux/seq_file.h>
+#include <linux/sunrpc/svcauth.h>
+
+/*
+ * Cache entry
+ */
+
+/*
+ * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on
+ * that.
+ */
+
+#define IDMAP_TYPE_USER 0
+#define IDMAP_TYPE_GROUP 1
+
+struct ent {
+ struct cache_head h;
+ int type; /* User / Group */
+ uid_t id;
+ char name[IDMAP_NAMESZ];
+ char authname[IDMAP_NAMESZ];
+};
+
+#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \
+ DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \
+ (struct STRUCT *item, int set), /*no setup */, \
+ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \
+ STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
+
+/* Common entry handling */
+
+#define ENT_HASHBITS 8
+#define ENT_HASHMAX (1 << ENT_HASHBITS)
+#define ENT_HASHMASK (ENT_HASHMAX - 1)
+
+static inline void
+ent_init(struct ent *new, struct ent *itm)
+{
+ new->id = itm->id;
+ new->type = itm->type;
+
+ strlcpy(new->name, itm->name, sizeof(new->name));
+ strlcpy(new->authname, itm->authname, sizeof(new->name));
+}
+
+static inline void
+ent_update(struct ent *new, struct ent *itm)
+{
+ ent_init(new, itm);
+}
+
+void
+ent_put(struct cache_head *ch, struct cache_detail *cd)
+{
+ if (cache_put(ch, cd)) {
+ struct ent *map = container_of(ch, struct ent, h);
+ kfree(map);
+ }
+}
+
+/*
+ * ID -> Name cache
+ */
+
+static struct cache_head *idtoname_table[ENT_HASHMAX];
+
+static uint32_t
+idtoname_hash(struct ent *ent)
+{
+ uint32_t hash;
+
+ hash = hash_str(ent->authname, ENT_HASHBITS);
+ hash = hash_long(hash ^ ent->id, ENT_HASHBITS);
+
+ /* Flip LSB for user/group */
+ if (ent->type == IDMAP_TYPE_GROUP)
+ hash ^= 1;
+
+ return hash;
+}
+
+static void
+idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+ int *blen)
+{
+ struct ent *ent = container_of(ch, struct ent, h);
+ char idstr[11];
+
+ qword_add(bpp, blen, ent->authname);
+ snprintf(idstr, sizeof(idstr), "%d", ent->id);
+ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+ qword_add(bpp, blen, idstr);
+
+ (*bpp)[-1] = '\n';
+}
+
+static inline int
+idtoname_match(struct ent *a, struct ent *b)
+{
+ return (a->id == b->id && a->type == b->type &&
+ strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+ struct ent *ent;
+
+ if (h == NULL) {
+ seq_puts(m, "#domain type id [name]\n");
+ return 0;
+ }
+ ent = container_of(h, struct ent, h);
+ seq_printf(m, "%s %s %d", ent->authname,
+ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+ ent->id);
+ if (test_bit(CACHE_VALID, &h->flags))
+ seq_printf(m, " %s", ent->name);
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static void
+warn_no_idmapd(struct cache_detail *detail)
+{
+ printk("nfsd: nfsv4 idmapping failing: has idmapd %s?\n",
+ detail->last_close? "died" : "not been started");
+}
+
+
+static int idtoname_parse(struct cache_detail *, char *, int);
+static struct ent *idtoname_lookup(struct ent *, int);
+
+struct cache_detail idtoname_cache = {
+ .hash_size = ENT_HASHMAX,
+ .hash_table = idtoname_table,
+ .name = "nfs4.idtoname",
+ .cache_put = ent_put,
+ .cache_request = idtoname_request,
+ .cache_parse = idtoname_parse,
+ .cache_show = idtoname_show,
+ .warn_no_listener = warn_no_idmapd,
+};
+
+int
+idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+ struct ent ent, *res;
+ char *buf1, *bp;
+ int error = -EINVAL;
+
+ if (buf[buflen - 1] != '\n')
+ return (-EINVAL);
+ buf[buflen - 1]= '\0';
+
+ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf1 == NULL)
+ return (-ENOMEM);
+
+ memset(&ent, 0, sizeof(ent));
+
+ /* Authentication name */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+ /* Type */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.type = strcmp(buf1, "user") == 0 ?
+ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+ /* ID */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.id = simple_strtoul(buf1, &bp, 10);
+ if (bp == buf1)
+ goto out;
+
+ /* expiry */
+ ent.h.expiry_time = get_expiry(&buf);
+ if (ent.h.expiry_time == 0)
+ goto out;
+
+ /* Name */
+ error = qword_get(&buf, buf1, PAGE_SIZE);
+ if (error == -EINVAL)
+ goto out;
+ if (error == -ENOENT)
+ set_bit(CACHE_NEGATIVE, &ent.h.flags);
+ else {
+ if (error >= IDMAP_NAMESZ) {
+ error = -EINVAL;
+ goto out;
+ }
+ memcpy(ent.name, buf1, sizeof(ent.name));
+ }
+ error = -ENOMEM;
+ if ((res = idtoname_lookup(&ent, 1)) == NULL)
+ goto out;
+
+ ent_put(&res->h, &idtoname_cache);
+
+ error = 0;
+out:
+ kfree(buf1);
+
+ return error;
+}
+
+static DefineSimpleCacheLookupMap(ent, idtoname);
+
+/*
+ * Name -> ID cache
+ */
+
+static struct cache_head *nametoid_table[ENT_HASHMAX];
+
+static inline int
+nametoid_hash(struct ent *ent)
+{
+ return hash_str(ent->name, ENT_HASHBITS);
+}
+
+void
+nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
+ int *blen)
+{
+ struct ent *ent = container_of(ch, struct ent, h);
+
+ qword_add(bpp, blen, ent->authname);
+ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
+ qword_add(bpp, blen, ent->name);
+
+ (*bpp)[-1] = '\n';
+}
+
+static inline int
+nametoid_match(struct ent *a, struct ent *b)
+{
+ return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
+ strcmp(a->authname, b->authname) == 0);
+}
+
+static int
+nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
+{
+ struct ent *ent;
+
+ if (h == NULL) {
+ seq_puts(m, "#domain type name [id]\n");
+ return 0;
+ }
+ ent = container_of(h, struct ent, h);
+ seq_printf(m, "%s %s %s", ent->authname,
+ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
+ ent->name);
+ if (test_bit(CACHE_VALID, &h->flags))
+ seq_printf(m, " %d", ent->id);
+ seq_printf(m, "\n");
+ return 0;
+}
+
+static struct ent *nametoid_lookup(struct ent *, int);
+int nametoid_parse(struct cache_detail *, char *, int);
+
+struct cache_detail nametoid_cache = {
+ .hash_size = ENT_HASHMAX,
+ .hash_table = nametoid_table,
+ .name = "nfs4.nametoid",
+ .cache_put = ent_put,
+ .cache_request = nametoid_request,
+ .cache_parse = nametoid_parse,
+ .cache_show = nametoid_show,
+ .warn_no_listener = warn_no_idmapd,
+};
+
+int
+nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
+{
+ struct ent ent, *res;
+ char *buf1;
+ int error = -EINVAL;
+
+ if (buf[buflen - 1] != '\n')
+ return (-EINVAL);
+ buf[buflen - 1]= '\0';
+
+ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (buf1 == NULL)
+ return (-ENOMEM);
+
+ memset(&ent, 0, sizeof(ent));
+
+ /* Authentication name */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ memcpy(ent.authname, buf1, sizeof(ent.authname));
+
+ /* Type */
+ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
+ goto out;
+ ent.type = strcmp(buf1, "user") == 0 ?
+ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
+
+ /* Name */
+ error = qword_get(&buf, buf1, PAGE_SIZE);
+ if (error <= 0 || error >= IDMAP_NAMESZ)
+ goto out;
+ memcpy(ent.name, buf1, sizeof(ent.name));
+
+ /* expiry */
+ ent.h.expiry_time = get_expiry(&buf);
+ if (ent.h.expiry_time == 0)
+ goto out;
+
+ /* ID */
+ error = get_int(&buf, &ent.id);
+ if (error == -EINVAL)
+ goto out;
+ if (error == -ENOENT)
+ set_bit(CACHE_NEGATIVE, &ent.h.flags);
+
+ error = -ENOMEM;
+ if ((res = nametoid_lookup(&ent, 1)) == NULL)
+ goto out;
+
+ ent_put(&res->h, &nametoid_cache);
+ error = 0;
+out:
+ kfree(buf1);
+
+ return (error);
+}
+
+static DefineSimpleCacheLookupMap(ent, nametoid);
+
+/*
+ * Exported API
+ */
+
+void
+nfsd_idmap_init(void)
+{
+ cache_register(&idtoname_cache);
+ cache_register(&nametoid_cache);
+}
+
+void
+nfsd_idmap_shutdown(void)
+{
+ cache_unregister(&idtoname_cache);
+ cache_unregister(&nametoid_cache);
+}
+
+/*
+ * Deferred request handling
+ */
+
+struct idmap_defer_req {
+ struct cache_req req;
+ struct cache_deferred_req deferred_req;
+ wait_queue_head_t waitq;
+ atomic_t count;
+};
+
+static inline void
+put_mdr(struct idmap_defer_req *mdr)
+{
+ if (atomic_dec_and_test(&mdr->count))
+ kfree(mdr);
+}
+
+static inline void
+get_mdr(struct idmap_defer_req *mdr)
+{
+ atomic_inc(&mdr->count);
+}
+
+static void
+idmap_revisit(struct cache_deferred_req *dreq, int toomany)
+{
+ struct idmap_defer_req *mdr =
+ container_of(dreq, struct idmap_defer_req, deferred_req);
+
+ wake_up(&mdr->waitq);
+ put_mdr(mdr);
+}
+
+static struct cache_deferred_req *
+idmap_defer(struct cache_req *req)
+{
+ struct idmap_defer_req *mdr =
+ container_of(req, struct idmap_defer_req, req);
+
+ mdr->deferred_req.revisit = idmap_revisit;
+ get_mdr(mdr);
+ return (&mdr->deferred_req);
+}
+
+static inline int
+do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+ struct cache_detail *detail, struct ent **item,
+ struct idmap_defer_req *mdr)
+{
+ *item = lookup_fn(key, 0);
+ if (!*item)
+ return -ENOMEM;
+ return cache_check(detail, &(*item)->h, &mdr->req);
+}
+
+static inline int
+do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *, int),
+ struct ent *key, struct cache_detail *detail,
+ struct ent **item)
+{
+ int ret = -ENOMEM;
+
+ *item = lookup_fn(key, 0);
+ if (!*item)
+ goto out_err;
+ ret = -ETIMEDOUT;
+ if (!test_bit(CACHE_VALID, &(*item)->h.flags)
+ || (*item)->h.expiry_time < get_seconds()
+ || detail->flush_time > (*item)->h.last_refresh)
+ goto out_put;
+ ret = -ENOENT;
+ if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags))
+ goto out_put;
+ return 0;
+out_put:
+ ent_put(&(*item)->h, detail);
+out_err:
+ *item = NULL;
+ return ret;
+}
+
+static int
+idmap_lookup(struct svc_rqst *rqstp,
+ struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
+ struct cache_detail *detail, struct ent **item)
+{
+ struct idmap_defer_req *mdr;
+ int ret;
+
+ mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
+ if (!mdr)
+ return -ENOMEM;
+ memset(mdr, 0, sizeof(*mdr));
+ atomic_set(&mdr->count, 1);
+ init_waitqueue_head(&mdr->waitq);
+ mdr->req.defer = idmap_defer;
+ ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr);
+ if (ret == -EAGAIN) {
+ wait_event_interruptible_timeout(mdr->waitq,
+ test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ);
+ ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item);
+ }
+ put_mdr(mdr);
+ return ret;
+}
+
+static int
+idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
+ uid_t *id)
+{
+ struct ent *item, key = {
+ .type = type,
+ };
+ int ret;
+
+ if (namelen + 1 > sizeof(key.name))
+ return -EINVAL;
+ memcpy(key.name, name, namelen);
+ key.name[namelen] = '\0';
+ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
+ ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
+ if (ret == -ENOENT)
+ ret = -ESRCH; /* nfserr_badname */
+ if (ret)
+ return ret;
+ *id = item->id;
+ ent_put(&item->h, &nametoid_cache);
+ return 0;
+}
+
+static int
+idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
+{
+ struct ent *item, key = {
+ .id = id,
+ .type = type,
+ };
+ int ret;
+
+ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
+ ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item);
+ if (ret == -ENOENT)
+ return sprintf(name, "%u", id);
+ if (ret)
+ return ret;
+ ret = strlen(item->name);
+ BUG_ON(ret > IDMAP_NAMESZ);
+ memcpy(name, item->name, ret);
+ ent_put(&item->h, &idtoname_cache);
+ return ret;
+}
+
+int
+nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+ __u32 *id)
+{
+ return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
+}
+
+int
+nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
+ __u32 *id)
+{
+ return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
+}
+
+int
+nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+ return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
+}
+
+int
+nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
+{
+ return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
+}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
new file mode 100644
index 000000000000..e8158741e8b5
--- /dev/null
+++ b/fs/nfsd/nfs4proc.c
@@ -0,0 +1,984 @@
+/*
+ * fs/nfsd/nfs4proc.c
+ *
+ * Server-side procedures for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Note: some routines in this file are just trivial wrappers
+ * (e.g. nfsd4_lookup()) defined solely for the sake of consistent
+ * naming. Since all such routines have been declared "inline",
+ * there shouldn't be any associated overhead. At some point in
+ * the future, I might inline these "by hand" to clean up a
+ * little.
+ */
+
+#include <linux/param.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/nfs4_acl.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+static inline void
+fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+{
+ fh_put(dst);
+ dget(src->fh_dentry);
+ if (src->fh_export)
+ cache_get(&src->fh_export->h);
+ *dst = *src;
+}
+
+static int
+do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ int accmode, status;
+
+ if (open->op_truncate &&
+ !(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfserr_inval;
+
+ accmode = MAY_NOP;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
+ accmode = MAY_READ;
+ if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
+ accmode |= (MAY_WRITE | MAY_TRUNC);
+ accmode |= MAY_OWNER_OVERRIDE;
+
+ status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+
+ return status;
+}
+
+static int
+do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ struct svc_fh resfh;
+ int status;
+
+ fh_init(&resfh, NFS4_FHSIZE);
+ open->op_truncate = 0;
+
+ if (open->op_create) {
+ /*
+ * Note: create modes (UNCHECKED,GUARDED...) are the same
+ * in NFSv4 as in v3.
+ */
+ status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
+ open->op_fname.len, &open->op_iattr,
+ &resfh, open->op_createmode,
+ (u32 *)open->op_verf.data, &open->op_truncate);
+ }
+ else {
+ status = nfsd_lookup(rqstp, current_fh,
+ open->op_fname.data, open->op_fname.len, &resfh);
+ fh_unlock(current_fh);
+ }
+
+ if (!status) {
+ set_change_info(&open->op_cinfo, current_fh);
+
+ /* set reply cache */
+ fh_dup2(current_fh, &resfh);
+ open->op_stateowner->so_replay.rp_openfh_len =
+ resfh.fh_handle.fh_size;
+ memcpy(open->op_stateowner->so_replay.rp_openfh,
+ &resfh.fh_handle.fh_base,
+ resfh.fh_handle.fh_size);
+
+ status = do_open_permission(rqstp, current_fh, open);
+ }
+
+ fh_put(&resfh);
+ return status;
+}
+
+static int
+do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ int status;
+
+ /* Only reclaims from previously confirmed clients are valid */
+ if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
+ return status;
+
+ /* We don't know the target directory, and therefore can not
+ * set the change info
+ */
+
+ memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
+
+ /* set replay cache */
+ open->op_stateowner->so_replay.rp_openfh_len = current_fh->fh_handle.fh_size;
+ memcpy(open->op_stateowner->so_replay.rp_openfh,
+ &current_fh->fh_handle.fh_base,
+ current_fh->fh_handle.fh_size);
+
+ open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
+ (open->op_iattr.ia_size == 0);
+
+ status = do_open_permission(rqstp, current_fh, open);
+
+ return status;
+}
+
+
+static inline int
+nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ int status;
+ dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
+ (int)open->op_fname.len, open->op_fname.data,
+ open->op_stateowner);
+
+ if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+ return nfserr_grace;
+
+ if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+ return nfserr_no_grace;
+
+ /* This check required by spec. */
+ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+
+ /* check seqid for replay. set nfs4_owner */
+ status = nfsd4_process_open1(open);
+ if (status == NFSERR_REPLAY_ME) {
+ struct nfs4_replay *rp = &open->op_stateowner->so_replay;
+ fh_put(current_fh);
+ current_fh->fh_handle.fh_size = rp->rp_openfh_len;
+ memcpy(&current_fh->fh_handle.fh_base, rp->rp_openfh,
+ rp->rp_openfh_len);
+ status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+ if (status)
+ dprintk("nfsd4_open: replay failed"
+ " restoring previous filehandle\n");
+ else
+ status = NFSERR_REPLAY_ME;
+ }
+ if (status)
+ goto out;
+ switch (open->op_claim_type) {
+ case NFS4_OPEN_CLAIM_NULL:
+ /*
+ * (1) set CURRENT_FH to the file being opened,
+ * creating it if necessary, (2) set open->op_cinfo,
+ * (3) set open->op_truncate if the file is to be
+ * truncated after opening, (4) do permission checking.
+ */
+ status = do_open_lookup(rqstp, current_fh, open);
+ if (status)
+ goto out;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ /*
+ * The CURRENT_FH is already set to the file being
+ * opened. (1) set open->op_cinfo, (2) set
+ * open->op_truncate if the file is to be truncated
+ * after opening, (3) do permission checking.
+ */
+ status = do_open_fhandle(rqstp, current_fh, open);
+ if (status)
+ goto out;
+ break;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ printk("NFSD: unsupported OPEN claim type %d\n",
+ open->op_claim_type);
+ status = nfserr_notsupp;
+ goto out;
+ default:
+ printk("NFSD: Invalid OPEN claim type %d\n",
+ open->op_claim_type);
+ status = nfserr_inval;
+ goto out;
+ }
+ /*
+ * nfsd4_process_open2() does the actual opening of the file. If
+ * successful, it (1) truncates the file if open->op_truncate was
+ * set, (2) sets open->op_stateid, (3) sets open->op_delegation.
+ */
+ status = nfsd4_process_open2(rqstp, current_fh, open);
+out:
+ if (open->op_stateowner)
+ nfs4_get_stateowner(open->op_stateowner);
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * filehandle-manipulating ops.
+ */
+static inline int
+nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
+{
+ if (!current_fh->fh_dentry)
+ return nfserr_nofilehandle;
+
+ *getfh = current_fh;
+ return nfs_ok;
+}
+
+static inline int
+nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh)
+{
+ fh_put(current_fh);
+ current_fh->fh_handle.fh_size = putfh->pf_fhlen;
+ memcpy(&current_fh->fh_handle.fh_base, putfh->pf_fhval, putfh->pf_fhlen);
+ return fh_verify(rqstp, current_fh, 0, MAY_NOP);
+}
+
+static inline int
+nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+{
+ int status;
+
+ fh_put(current_fh);
+ status = exp_pseudoroot(rqstp->rq_client, current_fh,
+ &rqstp->rq_chandle);
+ if (!status)
+ status = nfserrno(nfsd_setuser(rqstp, current_fh->fh_export));
+ return status;
+}
+
+static inline int
+nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+{
+ if (!save_fh->fh_dentry)
+ return nfserr_restorefh;
+
+ fh_dup2(current_fh, save_fh);
+ return nfs_ok;
+}
+
+static inline int
+nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+{
+ if (!current_fh->fh_dentry)
+ return nfserr_nofilehandle;
+
+ fh_dup2(save_fh, current_fh);
+ return nfs_ok;
+}
+
+/*
+ * misc nfsv4 ops
+ */
+static inline int
+nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access)
+{
+ if (access->ac_req_access & ~NFS3_ACCESS_FULL)
+ return nfserr_inval;
+
+ access->ac_resp_access = access->ac_req_access;
+ return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported);
+}
+
+static inline int
+nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
+{
+ int status;
+
+ u32 *p = (u32 *)commit->co_verf.data;
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+ status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count);
+ if (status == nfserr_symlink)
+ status = nfserr_inval;
+ return status;
+}
+
+static int
+nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create)
+{
+ struct svc_fh resfh;
+ int status;
+ dev_t rdev;
+
+ fh_init(&resfh, NFS4_FHSIZE);
+
+ status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE);
+ if (status == nfserr_symlink)
+ status = nfserr_notdir;
+ if (status)
+ return status;
+
+ switch (create->cr_type) {
+ case NF4LNK:
+ /* ugh! we have to null-terminate the linktext, or
+ * vfs_symlink() will choke. it is always safe to
+ * null-terminate by brute force, since at worst we
+ * will overwrite the first byte of the create namelen
+ * in the XDR buffer, which has already been extracted
+ * during XDR decode.
+ */
+ create->cr_linkname[create->cr_linklen] = 0;
+
+ status = nfsd_symlink(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, create->cr_linkname,
+ create->cr_linklen, &resfh, &create->cr_iattr);
+ break;
+
+ case NF4BLK:
+ rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+ if (MAJOR(rdev) != create->cr_specdata1 ||
+ MINOR(rdev) != create->cr_specdata2)
+ return nfserr_inval;
+ status = nfsd_create(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, &create->cr_iattr,
+ S_IFBLK, rdev, &resfh);
+ break;
+
+ case NF4CHR:
+ rdev = MKDEV(create->cr_specdata1, create->cr_specdata2);
+ if (MAJOR(rdev) != create->cr_specdata1 ||
+ MINOR(rdev) != create->cr_specdata2)
+ return nfserr_inval;
+ status = nfsd_create(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, &create->cr_iattr,
+ S_IFCHR, rdev, &resfh);
+ break;
+
+ case NF4SOCK:
+ status = nfsd_create(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, &create->cr_iattr,
+ S_IFSOCK, 0, &resfh);
+ break;
+
+ case NF4FIFO:
+ status = nfsd_create(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, &create->cr_iattr,
+ S_IFIFO, 0, &resfh);
+ break;
+
+ case NF4DIR:
+ create->cr_iattr.ia_valid &= ~ATTR_SIZE;
+ status = nfsd_create(rqstp, current_fh, create->cr_name,
+ create->cr_namelen, &create->cr_iattr,
+ S_IFDIR, 0, &resfh);
+ break;
+
+ default:
+ status = nfserr_badtype;
+ }
+
+ if (!status) {
+ fh_unlock(current_fh);
+ set_change_info(&create->cr_cinfo, current_fh);
+ fh_dup2(current_fh, &resfh);
+ }
+
+ fh_put(&resfh);
+ return status;
+}
+
+static inline int
+nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr)
+{
+ int status;
+
+ status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+ if (status)
+ return status;
+
+ if (getattr->ga_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+ return nfserr_inval;
+
+ getattr->ga_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
+ getattr->ga_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
+
+ getattr->ga_fhp = current_fh;
+ return nfs_ok;
+}
+
+static inline int
+nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+ struct svc_fh *save_fh, struct nfsd4_link *link)
+{
+ int status = nfserr_nofilehandle;
+
+ if (!save_fh->fh_dentry)
+ return status;
+ status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh);
+ if (!status)
+ set_change_info(&link->li_cinfo, current_fh);
+ return status;
+}
+
+static int
+nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+{
+ struct svc_fh tmp_fh;
+ int ret;
+
+ fh_init(&tmp_fh, NFS4_FHSIZE);
+ if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
+ &rqstp->rq_chandle)) != 0)
+ return ret;
+ if (tmp_fh.fh_dentry == current_fh->fh_dentry) {
+ fh_put(&tmp_fh);
+ return nfserr_noent;
+ }
+ fh_put(&tmp_fh);
+ return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
+}
+
+static inline int
+nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup)
+{
+ return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh);
+}
+
+static inline int
+nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
+{
+ int status;
+ struct file *filp = NULL;
+
+ /* no need to check permission - this will be done in nfsd_read() */
+
+ if (read->rd_offset >= OFFSET_MAX)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ /* check stateid */
+ if ((status = nfs4_preprocess_stateid_op(current_fh, &read->rd_stateid,
+ CHECK_FH | RD_STATE, &filp))) {
+ dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
+ goto out;
+ }
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ read->rd_rqstp = rqstp;
+ read->rd_fhp = current_fh;
+ read->rd_filp = filp;
+ return status;
+}
+
+static inline int
+nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
+{
+ u64 cookie = readdir->rd_cookie;
+ static const nfs4_verifier zeroverf;
+
+ /* no need to check permission - this will be done in nfsd_readdir() */
+
+ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+ return nfserr_inval;
+
+ readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
+ readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
+
+ if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) ||
+ (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
+ return nfserr_bad_cookie;
+
+ readdir->rd_rqstp = rqstp;
+ readdir->rd_fhp = current_fh;
+ return nfs_ok;
+}
+
+static inline int
+nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink)
+{
+ readlink->rl_rqstp = rqstp;
+ readlink->rl_fhp = current_fh;
+ return nfs_ok;
+}
+
+static inline int
+nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove)
+{
+ int status;
+
+ status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
+ if (status == nfserr_symlink)
+ return nfserr_notdir;
+ if (!status) {
+ fh_unlock(current_fh);
+ set_change_info(&remove->rm_cinfo, current_fh);
+ }
+ return status;
+}
+
+static inline int
+nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+ struct svc_fh *save_fh, struct nfsd4_rename *rename)
+{
+ int status = nfserr_nofilehandle;
+
+ if (!save_fh->fh_dentry)
+ return status;
+ status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
+ rename->rn_snamelen, current_fh,
+ rename->rn_tname, rename->rn_tnamelen);
+
+ /* the underlying filesystem returns different error's than required
+ * by NFSv4. both save_fh and current_fh have been verified.. */
+ if (status == nfserr_isdir)
+ status = nfserr_exist;
+ else if ((status == nfserr_notdir) &&
+ (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) &&
+ S_ISDIR(current_fh->fh_dentry->d_inode->i_mode)))
+ status = nfserr_exist;
+ else if (status == nfserr_symlink)
+ status = nfserr_notdir;
+
+ if (!status) {
+ set_change_info(&rename->rn_sinfo, current_fh);
+ set_change_info(&rename->rn_tinfo, save_fh);
+ }
+ return status;
+}
+
+static inline int
+nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
+{
+ int status = nfs_ok;
+
+ if (!current_fh->fh_dentry)
+ return nfserr_nofilehandle;
+
+ status = nfs_ok;
+ if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+ nfs4_lock_state();
+ if ((status = nfs4_preprocess_stateid_op(current_fh,
+ &setattr->sa_stateid,
+ CHECK_FH | WR_STATE, NULL))) {
+ dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+ goto out_unlock;
+ }
+ nfs4_unlock_state();
+ }
+ status = nfs_ok;
+ if (setattr->sa_acl != NULL)
+ status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
+ if (status)
+ goto out;
+ status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
+ 0, (time_t)0);
+out:
+ return status;
+out_unlock:
+ nfs4_unlock_state();
+ return status;
+}
+
+static inline int
+nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
+{
+ stateid_t *stateid = &write->wr_stateid;
+ struct file *filp = NULL;
+ u32 *p;
+ int status = nfs_ok;
+
+ /* no need to check permission - this will be done in nfsd_write() */
+
+ if (write->wr_offset >= OFFSET_MAX)
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ if ((status = nfs4_preprocess_stateid_op(current_fh, stateid,
+ CHECK_FH | WR_STATE, &filp))) {
+ dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
+ goto out;
+ }
+ nfs4_unlock_state();
+
+ write->wr_bytes_written = write->wr_buflen;
+ write->wr_how_written = write->wr_stable_how;
+ p = (u32 *)write->wr_verifier.data;
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+ status = nfsd_write(rqstp, current_fh, filp, write->wr_offset,
+ write->wr_vec, write->wr_vlen, write->wr_buflen,
+ &write->wr_how_written);
+
+ if (status == nfserr_symlink)
+ status = nfserr_inval;
+ return status;
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+/* This routine never returns NFS_OK! If there are no other errors, it
+ * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
+ * attributes matched. VERIFY is implemented by mapping NFSERR_SAME
+ * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
+ */
+static int
+nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
+{
+ u32 *buf, *p;
+ int count;
+ int status;
+
+ status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
+ if (status)
+ return status;
+
+ if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0)
+ || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+ return nfserr_attrnotsupp;
+ if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
+ || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
+ return nfserr_inval;
+ if (verify->ve_attrlen & 3)
+ return nfserr_inval;
+
+ /* count in words:
+ * bitmap_len(1) + bitmap(2) + attr_len(1) = 4
+ */
+ count = 4 + (verify->ve_attrlen >> 2);
+ buf = kmalloc(count << 2, GFP_KERNEL);
+ if (!buf)
+ return nfserr_resource;
+
+ status = nfsd4_encode_fattr(current_fh, current_fh->fh_export,
+ current_fh->fh_dentry, buf,
+ &count, verify->ve_bmval,
+ rqstp);
+
+ /* this means that nfsd4_encode_fattr() ran out of space */
+ if (status == nfserr_resource && count == 0)
+ status = nfserr_not_same;
+ if (status)
+ goto out_kfree;
+
+ p = buf + 3;
+ status = nfserr_not_same;
+ if (ntohl(*p++) != verify->ve_attrlen)
+ goto out_kfree;
+ if (!memcmp(p, verify->ve_attrval, verify->ve_attrlen))
+ status = nfserr_same;
+
+out_kfree:
+ kfree(buf);
+ return status;
+}
+
+/*
+ * NULL call.
+ */
+static int
+nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+
+/*
+ * COMPOUND call.
+ */
+static int
+nfsd4_proc_compound(struct svc_rqst *rqstp,
+ struct nfsd4_compoundargs *args,
+ struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_op *op;
+ struct svc_fh *current_fh = NULL;
+ struct svc_fh *save_fh = NULL;
+ struct nfs4_stateowner *replay_owner = NULL;
+ int slack_space; /* in words, not bytes! */
+ int status;
+
+ status = nfserr_resource;
+ current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL);
+ if (current_fh == NULL)
+ goto out;
+ fh_init(current_fh, NFS4_FHSIZE);
+ save_fh = kmalloc(sizeof(*save_fh), GFP_KERNEL);
+ if (save_fh == NULL)
+ goto out;
+ fh_init(save_fh, NFS4_FHSIZE);
+
+ resp->xbuf = &rqstp->rq_res;
+ resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len;
+ resp->tagp = resp->p;
+ /* reserve space for: taglen, tag, and opcnt */
+ resp->p += 2 + XDR_QUADLEN(args->taglen);
+ resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
+ resp->taglen = args->taglen;
+ resp->tag = args->tag;
+ resp->opcnt = 0;
+ resp->rqstp = rqstp;
+
+ /*
+ * According to RFC3010, this takes precedence over all other errors.
+ */
+ status = nfserr_minor_vers_mismatch;
+ if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION)
+ goto out;
+
+ status = nfs_ok;
+ while (!status && resp->opcnt < args->opcnt) {
+ op = &args->ops[resp->opcnt++];
+
+ /*
+ * The XDR decode routines may have pre-set op->status;
+ * for example, if there is a miscellaneous XDR error
+ * it will be set to nfserr_bad_xdr.
+ */
+ if (op->status)
+ goto encode_op;
+
+ /* We must be able to encode a successful response to
+ * this operation, with enough room left over to encode a
+ * failed response to the next operation. If we don't
+ * have enough room, fail with ERR_RESOURCE.
+ */
+/* FIXME - is slack_space *really* words, or bytes??? - neilb */
+ slack_space = (char *)resp->end - (char *)resp->p;
+ if (slack_space < COMPOUND_SLACK_SPACE + COMPOUND_ERR_SLACK_SPACE) {
+ BUG_ON(slack_space < COMPOUND_ERR_SLACK_SPACE);
+ op->status = nfserr_resource;
+ goto encode_op;
+ }
+
+ /* All operations except RENEW, SETCLIENTID, RESTOREFH
+ * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
+ * require a valid current filehandle
+ *
+ * SETATTR NOFILEHANDLE error handled in nfsd4_setattr
+ * due to required returned bitmap argument
+ */
+ if ((!current_fh->fh_dentry) &&
+ !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
+ (op->opnum == OP_SETCLIENTID) ||
+ (op->opnum == OP_SETCLIENTID_CONFIRM) ||
+ (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
+ (op->opnum == OP_RELEASE_LOCKOWNER) ||
+ (op->opnum == OP_SETATTR))) {
+ op->status = nfserr_nofilehandle;
+ goto encode_op;
+ }
+ switch (op->opnum) {
+ case OP_ACCESS:
+ op->status = nfsd4_access(rqstp, current_fh, &op->u.access);
+ break;
+ case OP_CLOSE:
+ op->status = nfsd4_close(rqstp, current_fh, &op->u.close);
+ replay_owner = op->u.close.cl_stateowner;
+ break;
+ case OP_COMMIT:
+ op->status = nfsd4_commit(rqstp, current_fh, &op->u.commit);
+ break;
+ case OP_CREATE:
+ op->status = nfsd4_create(rqstp, current_fh, &op->u.create);
+ break;
+ case OP_DELEGRETURN:
+ op->status = nfsd4_delegreturn(rqstp, current_fh, &op->u.delegreturn);
+ break;
+ case OP_GETATTR:
+ op->status = nfsd4_getattr(rqstp, current_fh, &op->u.getattr);
+ break;
+ case OP_GETFH:
+ op->status = nfsd4_getfh(current_fh, &op->u.getfh);
+ break;
+ case OP_LINK:
+ op->status = nfsd4_link(rqstp, current_fh, save_fh, &op->u.link);
+ break;
+ case OP_LOCK:
+ op->status = nfsd4_lock(rqstp, current_fh, &op->u.lock);
+ replay_owner = op->u.lock.lk_stateowner;
+ break;
+ case OP_LOCKT:
+ op->status = nfsd4_lockt(rqstp, current_fh, &op->u.lockt);
+ break;
+ case OP_LOCKU:
+ op->status = nfsd4_locku(rqstp, current_fh, &op->u.locku);
+ replay_owner = op->u.locku.lu_stateowner;
+ break;
+ case OP_LOOKUP:
+ op->status = nfsd4_lookup(rqstp, current_fh, &op->u.lookup);
+ break;
+ case OP_LOOKUPP:
+ op->status = nfsd4_lookupp(rqstp, current_fh);
+ break;
+ case OP_NVERIFY:
+ op->status = nfsd4_verify(rqstp, current_fh, &op->u.nverify);
+ if (op->status == nfserr_not_same)
+ op->status = nfs_ok;
+ break;
+ case OP_OPEN:
+ op->status = nfsd4_open(rqstp, current_fh, &op->u.open);
+ replay_owner = op->u.open.op_stateowner;
+ break;
+ case OP_OPEN_CONFIRM:
+ op->status = nfsd4_open_confirm(rqstp, current_fh, &op->u.open_confirm);
+ replay_owner = op->u.open_confirm.oc_stateowner;
+ break;
+ case OP_OPEN_DOWNGRADE:
+ op->status = nfsd4_open_downgrade(rqstp, current_fh, &op->u.open_downgrade);
+ replay_owner = op->u.open_downgrade.od_stateowner;
+ break;
+ case OP_PUTFH:
+ op->status = nfsd4_putfh(rqstp, current_fh, &op->u.putfh);
+ break;
+ case OP_PUTROOTFH:
+ op->status = nfsd4_putrootfh(rqstp, current_fh);
+ break;
+ case OP_READ:
+ op->status = nfsd4_read(rqstp, current_fh, &op->u.read);
+ break;
+ case OP_READDIR:
+ op->status = nfsd4_readdir(rqstp, current_fh, &op->u.readdir);
+ break;
+ case OP_READLINK:
+ op->status = nfsd4_readlink(rqstp, current_fh, &op->u.readlink);
+ break;
+ case OP_REMOVE:
+ op->status = nfsd4_remove(rqstp, current_fh, &op->u.remove);
+ break;
+ case OP_RENAME:
+ op->status = nfsd4_rename(rqstp, current_fh, save_fh, &op->u.rename);
+ break;
+ case OP_RENEW:
+ op->status = nfsd4_renew(&op->u.renew);
+ break;
+ case OP_RESTOREFH:
+ op->status = nfsd4_restorefh(current_fh, save_fh);
+ break;
+ case OP_SAVEFH:
+ op->status = nfsd4_savefh(current_fh, save_fh);
+ break;
+ case OP_SETATTR:
+ op->status = nfsd4_setattr(rqstp, current_fh, &op->u.setattr);
+ break;
+ case OP_SETCLIENTID:
+ op->status = nfsd4_setclientid(rqstp, &op->u.setclientid);
+ break;
+ case OP_SETCLIENTID_CONFIRM:
+ op->status = nfsd4_setclientid_confirm(rqstp, &op->u.setclientid_confirm);
+ break;
+ case OP_VERIFY:
+ op->status = nfsd4_verify(rqstp, current_fh, &op->u.verify);
+ if (op->status == nfserr_same)
+ op->status = nfs_ok;
+ break;
+ case OP_WRITE:
+ op->status = nfsd4_write(rqstp, current_fh, &op->u.write);
+ break;
+ case OP_RELEASE_LOCKOWNER:
+ op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner);
+ break;
+ default:
+ BUG_ON(op->status == nfs_ok);
+ break;
+ }
+
+encode_op:
+ if (op->status == NFSERR_REPLAY_ME) {
+ op->replay = &replay_owner->so_replay;
+ nfsd4_encode_replay(resp, op);
+ status = op->status = op->replay->rp_status;
+ } else {
+ nfsd4_encode_operation(resp, op);
+ status = op->status;
+ }
+ if (replay_owner && (replay_owner != (void *)(-1))) {
+ nfs4_put_stateowner(replay_owner);
+ replay_owner = NULL;
+ }
+ }
+
+out:
+ nfsd4_release_compoundargs(args);
+ if (current_fh)
+ fh_put(current_fh);
+ kfree(current_fh);
+ if (save_fh)
+ fh_put(save_fh);
+ kfree(save_fh);
+ return status;
+}
+
+#define nfs4svc_decode_voidargs NULL
+#define nfs4svc_release_void NULL
+#define nfsd4_voidres nfsd4_voidargs
+#define nfs4svc_release_compound NULL
+struct nfsd4_voidargs { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsd4_proc_##name, \
+ (kxdrproc_t) nfs4svc_decode_##argt##args, \
+ (kxdrproc_t) nfs4svc_encode_##rest##res, \
+ (kxdrproc_t) nfs4svc_release_##relt, \
+ sizeof(struct nfsd4_##argt##args), \
+ sizeof(struct nfsd4_##rest##res), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+/*
+ * TODO: At the present time, the NFSv4 server does not do XID caching
+ * of requests. Implementing XID caching would not be a serious problem,
+ * although it would require a mild change in interfaces since one
+ * doesn't know whether an NFSv4 request is idempotent until after the
+ * XDR decode. However, XID caching totally confuses pynfs (Peter
+ * Astrand's regression testsuite for NFSv4 servers), which reuses
+ * XID's liberally, so I've left it unimplemented until pynfs generates
+ * better XID's.
+ */
+static struct svc_procedure nfsd_procedures4[2] = {
+ PROC(null, void, void, void, RC_NOCACHE, 1),
+ PROC(compound, compound, compound, compound, RC_NOCACHE, NFSD_BUFSIZE)
+};
+
+struct svc_version nfsd_version4 = {
+ .vs_vers = 4,
+ .vs_nproc = 2,
+ .vs_proc = nfsd_procedures4,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS4_SVC_XDRSIZE,
+};
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
new file mode 100644
index 000000000000..579f7fea7968
--- /dev/null
+++ b/fs/nfsd/nfs4state.c
@@ -0,0 +1,3320 @@
+/*
+* linux/fs/nfsd/nfs4state.c
+*
+* Copyright (c) 2001 The Regents of the University of Michigan.
+* All rights reserved.
+*
+* Kendrick Smith <kmsmith@umich.edu>
+* Andy Adamson <kandros@umich.edu>
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions
+* are met:
+*
+* 1. Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* 2. Redistributions in binary form must reproduce the above copyright
+* notice, this list of conditions and the following disclaimer in the
+* documentation and/or other materials provided with the distribution.
+* 3. Neither the name of the University nor the names of its
+* contributors may be used to endorse or promote products derived
+* from this software without specific prior written permission.
+*
+* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*/
+
+#include <linux/param.h>
+#include <linux/major.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/mount.h>
+#include <linux/workqueue.h>
+#include <linux/smp_lock.h>
+#include <linux/kthread.h>
+#include <linux/nfs4.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+/* Globals */
+static time_t lease_time = 90; /* default lease time */
+static time_t old_lease_time = 90; /* past incarnation lease time */
+static u32 nfs4_reclaim_init = 0;
+time_t boot_time;
+static time_t grace_end = 0;
+static u32 current_clientid = 1;
+static u32 current_ownerid = 1;
+static u32 current_fileid = 1;
+static u32 current_delegid = 1;
+static u32 nfs4_init;
+stateid_t zerostateid; /* bits all 0 */
+stateid_t onestateid; /* bits all 1 */
+
+/* debug counters */
+u32 list_add_perfile = 0;
+u32 list_del_perfile = 0;
+u32 add_perclient = 0;
+u32 del_perclient = 0;
+u32 alloc_file = 0;
+u32 free_file = 0;
+u32 vfsopen = 0;
+u32 vfsclose = 0;
+u32 alloc_delegation= 0;
+u32 free_delegation= 0;
+
+/* forward declarations */
+struct nfs4_stateid * find_stateid(stateid_t *stid, int flags);
+static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid);
+static void release_stateid_lockowners(struct nfs4_stateid *open_stp);
+
+/* Locking:
+ *
+ * client_sema:
+ * protects clientid_hashtbl[], clientstr_hashtbl[],
+ * unconfstr_hashtbl[], uncofid_hashtbl[].
+ */
+static DECLARE_MUTEX(client_sema);
+
+void
+nfs4_lock_state(void)
+{
+ down(&client_sema);
+}
+
+void
+nfs4_unlock_state(void)
+{
+ up(&client_sema);
+}
+
+static inline u32
+opaque_hashval(const void *ptr, int nbytes)
+{
+ unsigned char *cptr = (unsigned char *) ptr;
+
+ u32 x = 0;
+ while (nbytes--) {
+ x *= 37;
+ x += *cptr++;
+ }
+ return x;
+}
+
+/* forward declarations */
+static void release_stateowner(struct nfs4_stateowner *sop);
+static void release_stateid(struct nfs4_stateid *stp, int flags);
+static void release_file(struct nfs4_file *fp);
+
+/*
+ * Delegation state
+ */
+
+/* recall_lock protects the del_recall_lru */
+spinlock_t recall_lock;
+static struct list_head del_recall_lru;
+
+static struct nfs4_delegation *
+alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_fh *current_fh, u32 type)
+{
+ struct nfs4_delegation *dp;
+ struct nfs4_file *fp = stp->st_file;
+ struct nfs4_callback *cb = &stp->st_stateowner->so_client->cl_callback;
+
+ dprintk("NFSD alloc_init_deleg\n");
+ if ((dp = kmalloc(sizeof(struct nfs4_delegation),
+ GFP_KERNEL)) == NULL)
+ return dp;
+ INIT_LIST_HEAD(&dp->dl_del_perfile);
+ INIT_LIST_HEAD(&dp->dl_del_perclnt);
+ INIT_LIST_HEAD(&dp->dl_recall_lru);
+ dp->dl_client = clp;
+ dp->dl_file = fp;
+ dp->dl_flock = NULL;
+ get_file(stp->st_vfs_file);
+ dp->dl_vfs_file = stp->st_vfs_file;
+ dp->dl_type = type;
+ dp->dl_recall.cbr_dp = NULL;
+ dp->dl_recall.cbr_ident = cb->cb_ident;
+ dp->dl_recall.cbr_trunc = 0;
+ dp->dl_stateid.si_boot = boot_time;
+ dp->dl_stateid.si_stateownerid = current_delegid++;
+ dp->dl_stateid.si_fileid = 0;
+ dp->dl_stateid.si_generation = 0;
+ dp->dl_fhlen = current_fh->fh_handle.fh_size;
+ memcpy(dp->dl_fhval, &current_fh->fh_handle.fh_base,
+ current_fh->fh_handle.fh_size);
+ dp->dl_time = 0;
+ atomic_set(&dp->dl_count, 1);
+ list_add(&dp->dl_del_perfile, &fp->fi_del_perfile);
+ list_add(&dp->dl_del_perclnt, &clp->cl_del_perclnt);
+ alloc_delegation++;
+ return dp;
+}
+
+void
+nfs4_put_delegation(struct nfs4_delegation *dp)
+{
+ if (atomic_dec_and_test(&dp->dl_count)) {
+ dprintk("NFSD: freeing dp %p\n",dp);
+ kfree(dp);
+ free_delegation++;
+ }
+}
+
+/* Remove the associated file_lock first, then remove the delegation.
+ * lease_modify() is called to remove the FS_LEASE file_lock from
+ * the i_flock list, eventually calling nfsd's lock_manager
+ * fl_release_callback.
+ */
+static void
+nfs4_close_delegation(struct nfs4_delegation *dp)
+{
+ struct file *filp = dp->dl_vfs_file;
+
+ dprintk("NFSD: close_delegation dp %p\n",dp);
+ dp->dl_vfs_file = NULL;
+ /* The following nfsd_close may not actually close the file,
+ * but we want to remove the lease in any case. */
+ setlease(filp, F_UNLCK, &dp->dl_flock);
+ nfsd_close(filp);
+ vfsclose++;
+}
+
+/* Called under the state lock. */
+static void
+unhash_delegation(struct nfs4_delegation *dp)
+{
+ list_del_init(&dp->dl_del_perfile);
+ list_del_init(&dp->dl_del_perclnt);
+ spin_lock(&recall_lock);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&recall_lock);
+ nfs4_close_delegation(dp);
+ nfs4_put_delegation(dp);
+}
+
+/*
+ * SETCLIENTID state
+ */
+
+/* Hash tables for nfs4_clientid state */
+#define CLIENT_HASH_BITS 4
+#define CLIENT_HASH_SIZE (1 << CLIENT_HASH_BITS)
+#define CLIENT_HASH_MASK (CLIENT_HASH_SIZE - 1)
+
+#define clientid_hashval(id) \
+ ((id) & CLIENT_HASH_MASK)
+#define clientstr_hashval(name, namelen) \
+ (opaque_hashval((name), (namelen)) & CLIENT_HASH_MASK)
+/*
+ * reclaim_str_hashtbl[] holds known client info from previous reset/reboot
+ * used in reboot/reset lease grace period processing
+ *
+ * conf_id_hashtbl[], and conf_str_hashtbl[] hold confirmed
+ * setclientid_confirmed info.
+ *
+ * unconf_str_hastbl[] and unconf_id_hashtbl[] hold unconfirmed
+ * setclientid info.
+ *
+ * client_lru holds client queue ordered by nfs4_client.cl_time
+ * for lease renewal.
+ *
+ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
+ * for last close replay.
+ */
+static struct list_head reclaim_str_hashtbl[CLIENT_HASH_SIZE];
+static int reclaim_str_hashtbl_size = 0;
+static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
+static struct list_head client_lru;
+static struct list_head close_lru;
+
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+ /*
+ * Move client to the end to the LRU list.
+ */
+ dprintk("renewing client (clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot,
+ clp->cl_clientid.cl_id);
+ list_move_tail(&clp->cl_lru, &client_lru);
+ clp->cl_time = get_seconds();
+}
+
+/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
+static int
+STALE_CLIENTID(clientid_t *clid)
+{
+ if (clid->cl_boot == boot_time)
+ return 0;
+ dprintk("NFSD stale clientid (%08x/%08x)\n",
+ clid->cl_boot, clid->cl_id);
+ return 1;
+}
+
+/*
+ * XXX Should we use a slab cache ?
+ * This type of memory management is somewhat inefficient, but we use it
+ * anyway since SETCLIENTID is not a common operation.
+ */
+static inline struct nfs4_client *
+alloc_client(struct xdr_netobj name)
+{
+ struct nfs4_client *clp;
+
+ if ((clp = kmalloc(sizeof(struct nfs4_client), GFP_KERNEL))!= NULL) {
+ memset(clp, 0, sizeof(*clp));
+ if ((clp->cl_name.data = kmalloc(name.len, GFP_KERNEL)) != NULL) {
+ memcpy(clp->cl_name.data, name.data, name.len);
+ clp->cl_name.len = name.len;
+ }
+ else {
+ kfree(clp);
+ clp = NULL;
+ }
+ }
+ return clp;
+}
+
+static inline void
+free_client(struct nfs4_client *clp)
+{
+ if (clp->cl_cred.cr_group_info)
+ put_group_info(clp->cl_cred.cr_group_info);
+ kfree(clp->cl_name.data);
+ kfree(clp);
+}
+
+void
+put_nfs4_client(struct nfs4_client *clp)
+{
+ if (atomic_dec_and_test(&clp->cl_count))
+ free_client(clp);
+}
+
+static void
+expire_client(struct nfs4_client *clp)
+{
+ struct nfs4_stateowner *sop;
+ struct nfs4_delegation *dp;
+ struct nfs4_callback *cb = &clp->cl_callback;
+ struct rpc_clnt *clnt = clp->cl_callback.cb_client;
+ struct list_head reaplist;
+
+ dprintk("NFSD: expire_client cl_count %d\n",
+ atomic_read(&clp->cl_count));
+
+ /* shutdown rpc client, ending any outstanding recall rpcs */
+ if (atomic_read(&cb->cb_set) == 1 && clnt) {
+ rpc_shutdown_client(clnt);
+ clnt = clp->cl_callback.cb_client = NULL;
+ }
+
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&recall_lock);
+ while (!list_empty(&clp->cl_del_perclnt)) {
+ dp = list_entry(clp->cl_del_perclnt.next, struct nfs4_delegation, dl_del_perclnt);
+ dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
+ dp->dl_flock);
+ list_del_init(&dp->dl_del_perclnt);
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ while (!list_empty(&reaplist)) {
+ dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
+ list_del_init(&dp->dl_recall_lru);
+ unhash_delegation(dp);
+ }
+ list_del(&clp->cl_idhash);
+ list_del(&clp->cl_strhash);
+ list_del(&clp->cl_lru);
+ while (!list_empty(&clp->cl_perclient)) {
+ sop = list_entry(clp->cl_perclient.next, struct nfs4_stateowner, so_perclient);
+ release_stateowner(sop);
+ }
+ put_nfs4_client(clp);
+}
+
+static struct nfs4_client *
+create_client(struct xdr_netobj name) {
+ struct nfs4_client *clp;
+
+ if (!(clp = alloc_client(name)))
+ goto out;
+ atomic_set(&clp->cl_count, 1);
+ atomic_set(&clp->cl_callback.cb_set, 0);
+ clp->cl_callback.cb_parsed = 0;
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_strhash);
+ INIT_LIST_HEAD(&clp->cl_perclient);
+ INIT_LIST_HEAD(&clp->cl_del_perclnt);
+ INIT_LIST_HEAD(&clp->cl_lru);
+out:
+ return clp;
+}
+
+static void
+copy_verf(struct nfs4_client *target, nfs4_verifier *source) {
+ memcpy(target->cl_verifier.data, source->data, sizeof(target->cl_verifier.data));
+}
+
+static void
+copy_clid(struct nfs4_client *target, struct nfs4_client *source) {
+ target->cl_clientid.cl_boot = source->cl_clientid.cl_boot;
+ target->cl_clientid.cl_id = source->cl_clientid.cl_id;
+}
+
+static void
+copy_cred(struct svc_cred *target, struct svc_cred *source) {
+
+ target->cr_uid = source->cr_uid;
+ target->cr_gid = source->cr_gid;
+ target->cr_group_info = source->cr_group_info;
+ get_group_info(target->cr_group_info);
+}
+
+static int
+cmp_name(struct xdr_netobj *n1, struct xdr_netobj *n2) {
+ if (!n1 || !n2)
+ return 0;
+ return((n1->len == n2->len) && !memcmp(n1->data, n2->data, n2->len));
+}
+
+static int
+cmp_verf(nfs4_verifier *v1, nfs4_verifier *v2) {
+ return(!memcmp(v1->data,v2->data,sizeof(v1->data)));
+}
+
+static int
+cmp_clid(clientid_t * cl1, clientid_t * cl2) {
+ return((cl1->cl_boot == cl2->cl_boot) &&
+ (cl1->cl_id == cl2->cl_id));
+}
+
+/* XXX what about NGROUP */
+static int
+cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){
+ return(cr1->cr_uid == cr2->cr_uid);
+
+}
+
+static void
+gen_clid(struct nfs4_client *clp) {
+ clp->cl_clientid.cl_boot = boot_time;
+ clp->cl_clientid.cl_id = current_clientid++;
+}
+
+static void
+gen_confirm(struct nfs4_client *clp) {
+ struct timespec tv;
+ u32 * p;
+
+ tv = CURRENT_TIME;
+ p = (u32 *)clp->cl_confirm.data;
+ *p++ = tv.tv_sec;
+ *p++ = tv.tv_nsec;
+}
+
+static int
+check_name(struct xdr_netobj name) {
+
+ if (name.len == 0)
+ return 0;
+ if (name.len > NFS4_OPAQUE_LIMIT) {
+ printk("NFSD: check_name: name too long(%d)!\n", name.len);
+ return 0;
+ }
+ return 1;
+}
+
+void
+add_to_unconfirmed(struct nfs4_client *clp, unsigned int strhashval)
+{
+ unsigned int idhashval;
+
+ list_add(&clp->cl_strhash, &unconf_str_hashtbl[strhashval]);
+ idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ list_add(&clp->cl_idhash, &unconf_id_hashtbl[idhashval]);
+ list_add_tail(&clp->cl_lru, &client_lru);
+ clp->cl_time = get_seconds();
+}
+
+void
+move_to_confirmed(struct nfs4_client *clp)
+{
+ unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
+ unsigned int strhashval;
+
+ dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
+ list_del_init(&clp->cl_strhash);
+ list_del_init(&clp->cl_idhash);
+ list_add(&clp->cl_idhash, &conf_id_hashtbl[idhashval]);
+ strhashval = clientstr_hashval(clp->cl_name.data,
+ clp->cl_name.len);
+ list_add(&clp->cl_strhash, &conf_str_hashtbl[strhashval]);
+ renew_client(clp);
+}
+
+static struct nfs4_client *
+find_confirmed_client(clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+ list_for_each_entry(clp, &conf_id_hashtbl[idhashval], cl_idhash) {
+ if (cmp_clid(&clp->cl_clientid, clid))
+ return clp;
+ }
+ return NULL;
+}
+
+static struct nfs4_client *
+find_unconfirmed_client(clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ unsigned int idhashval = clientid_hashval(clid->cl_id);
+
+ list_for_each_entry(clp, &unconf_id_hashtbl[idhashval], cl_idhash) {
+ if (cmp_clid(&clp->cl_clientid, clid))
+ return clp;
+ }
+ return NULL;
+}
+
+/* a helper function for parse_callback */
+static int
+parse_octet(unsigned int *lenp, char **addrp)
+{
+ unsigned int len = *lenp;
+ char *p = *addrp;
+ int n = -1;
+ char c;
+
+ for (;;) {
+ if (!len)
+ break;
+ len--;
+ c = *p++;
+ if (c == '.')
+ break;
+ if ((c < '0') || (c > '9')) {
+ n = -1;
+ break;
+ }
+ if (n < 0)
+ n = 0;
+ n = (n * 10) + (c - '0');
+ if (n > 255) {
+ n = -1;
+ break;
+ }
+ }
+ *lenp = len;
+ *addrp = p;
+ return n;
+}
+
+/* parse and set the setclientid ipv4 callback address */
+int
+parse_ipv4(unsigned int addr_len, char *addr_val, unsigned int *cbaddrp, unsigned short *cbportp)
+{
+ int temp = 0;
+ u32 cbaddr = 0;
+ u16 cbport = 0;
+ u32 addrlen = addr_len;
+ char *addr = addr_val;
+ int i, shift;
+
+ /* ipaddress */
+ shift = 24;
+ for(i = 4; i > 0 ; i--) {
+ if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+ return 0;
+ }
+ cbaddr |= (temp << shift);
+ if (shift > 0)
+ shift -= 8;
+ }
+ *cbaddrp = cbaddr;
+
+ /* port */
+ shift = 8;
+ for(i = 2; i > 0 ; i--) {
+ if ((temp = parse_octet(&addrlen, &addr)) < 0) {
+ return 0;
+ }
+ cbport |= (temp << shift);
+ if (shift > 0)
+ shift -= 8;
+ }
+ *cbportp = cbport;
+ return 1;
+}
+
+void
+gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se)
+{
+ struct nfs4_callback *cb = &clp->cl_callback;
+
+ /* Currently, we only support tcp for the callback channel */
+ if ((se->se_callback_netid_len != 3) || memcmp((char *)se->se_callback_netid_val, "tcp", 3))
+ goto out_err;
+
+ if ( !(parse_ipv4(se->se_callback_addr_len, se->se_callback_addr_val,
+ &cb->cb_addr, &cb->cb_port)))
+ goto out_err;
+ cb->cb_prog = se->se_callback_prog;
+ cb->cb_ident = se->se_callback_ident;
+ cb->cb_parsed = 1;
+ return;
+out_err:
+ printk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
+ "will not receive delegations\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+
+ cb->cb_parsed = 0;
+ return;
+}
+
+/*
+ * RFC 3010 has a complex implmentation description of processing a
+ * SETCLIENTID request consisting of 5 bullets, labeled as
+ * CASE0 - CASE4 below.
+ *
+ * NOTES:
+ * callback information will be processed in a future patch
+ *
+ * an unconfirmed record is added when:
+ * NORMAL (part of CASE 4): there is no confirmed nor unconfirmed record.
+ * CASE 1: confirmed record found with matching name, principal,
+ * verifier, and clientid.
+ * CASE 2: confirmed record found with matching name, principal,
+ * and there is no unconfirmed record with matching
+ * name and principal
+ *
+ * an unconfirmed record is replaced when:
+ * CASE 3: confirmed record found with matching name, principal,
+ * and an unconfirmed record is found with matching
+ * name, principal, and with clientid and
+ * confirm that does not match the confirmed record.
+ * CASE 4: there is no confirmed record with matching name and
+ * principal. there is an unconfirmed record with
+ * matching name, principal.
+ *
+ * an unconfirmed record is deleted when:
+ * CASE 1: an unconfirmed record that matches input name, verifier,
+ * and confirmed clientid.
+ * CASE 4: any unconfirmed records with matching name and principal
+ * that exist after an unconfirmed record has been replaced
+ * as described above.
+ *
+ */
+int
+nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
+{
+ u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+ struct xdr_netobj clname = {
+ .len = setclid->se_namelen,
+ .data = setclid->se_name,
+ };
+ nfs4_verifier clverifier = setclid->se_verf;
+ unsigned int strhashval;
+ struct nfs4_client * conf, * unconf, * new, * clp;
+ int status;
+
+ status = nfserr_inval;
+ if (!check_name(clname))
+ goto out;
+
+ /*
+ * XXX The Duplicate Request Cache (DRC) has been checked (??)
+ * We get here on a DRC miss.
+ */
+
+ strhashval = clientstr_hashval(clname.data, clname.len);
+
+ conf = NULL;
+ nfs4_lock_state();
+ list_for_each_entry(clp, &conf_str_hashtbl[strhashval], cl_strhash) {
+ if (!cmp_name(&clp->cl_name, &clname))
+ continue;
+ /*
+ * CASE 0:
+ * clname match, confirmed, different principal
+ * or different ip_address
+ */
+ status = nfserr_clid_inuse;
+ if (!cmp_creds(&clp->cl_cred,&rqstp->rq_cred)) {
+ printk("NFSD: setclientid: string in use by client"
+ "(clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+ goto out;
+ }
+ if (clp->cl_addr != ip_addr) {
+ printk("NFSD: setclientid: string in use by client"
+ "(clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+ goto out;
+ }
+
+ /*
+ * cl_name match from a previous SETCLIENTID operation
+ * XXX check for additional matches?
+ */
+ conf = clp;
+ break;
+ }
+ unconf = NULL;
+ list_for_each_entry(clp, &unconf_str_hashtbl[strhashval], cl_strhash) {
+ if (!cmp_name(&clp->cl_name, &clname))
+ continue;
+ /* cl_name match from a previous SETCLIENTID operation */
+ unconf = clp;
+ break;
+ }
+ status = nfserr_resource;
+ if (!conf) {
+ /*
+ * CASE 4:
+ * placed first, because it is the normal case.
+ */
+ if (unconf)
+ expire_client(unconf);
+ if (!(new = create_client(clname)))
+ goto out;
+ copy_verf(new, &clverifier);
+ new->cl_addr = ip_addr;
+ copy_cred(&new->cl_cred,&rqstp->rq_cred);
+ gen_clid(new);
+ gen_confirm(new);
+ gen_callback(new, setclid);
+ add_to_unconfirmed(new, strhashval);
+ } else if (cmp_verf(&conf->cl_verifier, &clverifier)) {
+ /*
+ * CASE 1:
+ * cl_name match, confirmed, principal match
+ * verifier match: probable callback update
+ *
+ * remove any unconfirmed nfs4_client with
+ * matching cl_name, cl_verifier, and cl_clientid
+ *
+ * create and insert an unconfirmed nfs4_client with same
+ * cl_name, cl_verifier, and cl_clientid as existing
+ * nfs4_client, but with the new callback info and a
+ * new cl_confirm
+ */
+ if ((unconf) &&
+ cmp_verf(&unconf->cl_verifier, &conf->cl_verifier) &&
+ cmp_clid(&unconf->cl_clientid, &conf->cl_clientid)) {
+ expire_client(unconf);
+ }
+ if (!(new = create_client(clname)))
+ goto out;
+ copy_verf(new,&conf->cl_verifier);
+ new->cl_addr = ip_addr;
+ copy_cred(&new->cl_cred,&rqstp->rq_cred);
+ copy_clid(new, conf);
+ gen_confirm(new);
+ gen_callback(new, setclid);
+ add_to_unconfirmed(new,strhashval);
+ } else if (!unconf) {
+ /*
+ * CASE 2:
+ * clname match, confirmed, principal match
+ * verfier does not match
+ * no unconfirmed. create a new unconfirmed nfs4_client
+ * using input clverifier, clname, and callback info
+ * and generate a new cl_clientid and cl_confirm.
+ */
+ if (!(new = create_client(clname)))
+ goto out;
+ copy_verf(new,&clverifier);
+ new->cl_addr = ip_addr;
+ copy_cred(&new->cl_cred,&rqstp->rq_cred);
+ gen_clid(new);
+ gen_confirm(new);
+ gen_callback(new, setclid);
+ add_to_unconfirmed(new, strhashval);
+ } else if (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm)) {
+ /*
+ * CASE3:
+ * confirmed found (name, principal match)
+ * confirmed verifier does not match input clverifier
+ *
+ * unconfirmed found (name match)
+ * confirmed->cl_confirm != unconfirmed->cl_confirm
+ *
+ * remove unconfirmed.
+ *
+ * create an unconfirmed nfs4_client
+ * with same cl_name as existing confirmed nfs4_client,
+ * but with new callback info, new cl_clientid,
+ * new cl_verifier and a new cl_confirm
+ */
+ expire_client(unconf);
+ if (!(new = create_client(clname)))
+ goto out;
+ copy_verf(new,&clverifier);
+ new->cl_addr = ip_addr;
+ copy_cred(&new->cl_cred,&rqstp->rq_cred);
+ gen_clid(new);
+ gen_confirm(new);
+ gen_callback(new, setclid);
+ add_to_unconfirmed(new, strhashval);
+ } else {
+ /* No cases hit !!! */
+ status = nfserr_inval;
+ goto out;
+
+ }
+ setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
+ setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
+ memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+
+/*
+ * RFC 3010 has a complex implmentation description of processing a
+ * SETCLIENTID_CONFIRM request consisting of 4 bullets describing
+ * processing on a DRC miss, labeled as CASE1 - CASE4 below.
+ *
+ * NOTE: callback information will be processed here in a future patch
+ */
+int
+nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
+{
+ u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+ struct nfs4_client *clp, *conf = NULL, *unconf = NULL;
+ nfs4_verifier confirm = setclientid_confirm->sc_confirm;
+ clientid_t * clid = &setclientid_confirm->sc_clientid;
+ int status;
+
+ if (STALE_CLIENTID(clid))
+ return nfserr_stale_clientid;
+ /*
+ * XXX The Duplicate Request Cache (DRC) has been checked (??)
+ * We get here on a DRC miss.
+ */
+
+ nfs4_lock_state();
+ clp = find_confirmed_client(clid);
+ if (clp) {
+ status = nfserr_inval;
+ /*
+ * Found a record for this clientid. If the IP addresses
+ * don't match, return ERR_INVAL just as if the record had
+ * not been found.
+ */
+ if (clp->cl_addr != ip_addr) {
+ printk("NFSD: setclientid: string in use by client"
+ "(clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+ goto out;
+ }
+ conf = clp;
+ }
+ clp = find_unconfirmed_client(clid);
+ if (clp) {
+ status = nfserr_inval;
+ if (clp->cl_addr != ip_addr) {
+ printk("NFSD: setclientid: string in use by client"
+ "(clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+ goto out;
+ }
+ unconf = clp;
+ }
+ /* CASE 1:
+ * unconf record that matches input clientid and input confirm.
+ * conf record that matches input clientid.
+ * conf and unconf records match names, verifiers
+ */
+ if ((conf && unconf) &&
+ (cmp_verf(&unconf->cl_confirm, &confirm)) &&
+ (cmp_verf(&conf->cl_verifier, &unconf->cl_verifier)) &&
+ (cmp_name(&conf->cl_name,&unconf->cl_name)) &&
+ (!cmp_verf(&conf->cl_confirm, &unconf->cl_confirm))) {
+ if (!cmp_creds(&conf->cl_cred, &unconf->cl_cred))
+ status = nfserr_clid_inuse;
+ else {
+ expire_client(conf);
+ clp = unconf;
+ move_to_confirmed(unconf);
+ status = nfs_ok;
+ }
+ goto out;
+ }
+ /* CASE 2:
+ * conf record that matches input clientid.
+ * if unconf record that matches input clientid, then unconf->cl_name
+ * or unconf->cl_verifier don't match the conf record.
+ */
+ if ((conf && !unconf) ||
+ ((conf && unconf) &&
+ (!cmp_verf(&conf->cl_verifier, &unconf->cl_verifier) ||
+ !cmp_name(&conf->cl_name, &unconf->cl_name)))) {
+ if (!cmp_creds(&conf->cl_cred,&rqstp->rq_cred)) {
+ status = nfserr_clid_inuse;
+ } else {
+ clp = conf;
+ status = nfs_ok;
+ }
+ goto out;
+ }
+ /* CASE 3:
+ * conf record not found.
+ * unconf record found.
+ * unconf->cl_confirm matches input confirm
+ */
+ if (!conf && unconf && cmp_verf(&unconf->cl_confirm, &confirm)) {
+ if (!cmp_creds(&unconf->cl_cred, &rqstp->rq_cred)) {
+ status = nfserr_clid_inuse;
+ } else {
+ status = nfs_ok;
+ clp = unconf;
+ move_to_confirmed(unconf);
+ }
+ goto out;
+ }
+ /* CASE 4:
+ * conf record not found, or if conf, then conf->cl_confirm does not
+ * match input confirm.
+ * unconf record not found, or if unconf, then unconf->cl_confirm
+ * does not match input confirm.
+ */
+ if ((!conf || (conf && !cmp_verf(&conf->cl_confirm, &confirm))) &&
+ (!unconf || (unconf && !cmp_verf(&unconf->cl_confirm, &confirm)))) {
+ status = nfserr_stale_clientid;
+ goto out;
+ }
+ /* check that we have hit one of the cases...*/
+ status = nfserr_inval;
+ goto out;
+out:
+ if (!status)
+ nfsd4_probe_callback(clp);
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * Open owner state (share locks)
+ */
+
+/* hash tables for nfs4_stateowner */
+#define OWNER_HASH_BITS 8
+#define OWNER_HASH_SIZE (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK (OWNER_HASH_SIZE - 1)
+
+#define ownerid_hashval(id) \
+ ((id) & OWNER_HASH_MASK)
+#define ownerstr_hashval(clientid, ownername) \
+ (((clientid) + opaque_hashval((ownername.data), (ownername.len))) & OWNER_HASH_MASK)
+
+static struct list_head ownerid_hashtbl[OWNER_HASH_SIZE];
+static struct list_head ownerstr_hashtbl[OWNER_HASH_SIZE];
+
+/* hash table for nfs4_file */
+#define FILE_HASH_BITS 8
+#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
+#define FILE_HASH_MASK (FILE_HASH_SIZE - 1)
+/* hash table for (open)nfs4_stateid */
+#define STATEID_HASH_BITS 10
+#define STATEID_HASH_SIZE (1 << STATEID_HASH_BITS)
+#define STATEID_HASH_MASK (STATEID_HASH_SIZE - 1)
+
+#define file_hashval(x) \
+ hash_ptr(x, FILE_HASH_BITS)
+#define stateid_hashval(owner_id, file_id) \
+ (((owner_id) + (file_id)) & STATEID_HASH_MASK)
+
+static struct list_head file_hashtbl[FILE_HASH_SIZE];
+static struct list_head stateid_hashtbl[STATEID_HASH_SIZE];
+
+/* OPEN Share state helper functions */
+static inline struct nfs4_file *
+alloc_init_file(struct inode *ino)
+{
+ struct nfs4_file *fp;
+ unsigned int hashval = file_hashval(ino);
+
+ if ((fp = kmalloc(sizeof(struct nfs4_file),GFP_KERNEL))) {
+ INIT_LIST_HEAD(&fp->fi_hash);
+ INIT_LIST_HEAD(&fp->fi_perfile);
+ INIT_LIST_HEAD(&fp->fi_del_perfile);
+ list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+ fp->fi_inode = igrab(ino);
+ fp->fi_id = current_fileid++;
+ alloc_file++;
+ return fp;
+ }
+ return NULL;
+}
+
+static void
+release_all_files(void)
+{
+ int i;
+ struct nfs4_file *fp;
+
+ for (i=0;i<FILE_HASH_SIZE;i++) {
+ while (!list_empty(&file_hashtbl[i])) {
+ fp = list_entry(file_hashtbl[i].next, struct nfs4_file, fi_hash);
+ /* this should never be more than once... */
+ if (!list_empty(&fp->fi_perfile) || !list_empty(&fp->fi_del_perfile)) {
+ printk("ERROR: release_all_files: file %p is open, creating dangling state !!!\n",fp);
+ }
+ release_file(fp);
+ }
+ }
+}
+
+kmem_cache_t *stateowner_slab = NULL;
+
+static int
+nfsd4_init_slabs(void)
+{
+ stateowner_slab = kmem_cache_create("nfsd4_stateowners",
+ sizeof(struct nfs4_stateowner), 0, 0, NULL, NULL);
+ if (stateowner_slab == NULL) {
+ dprintk("nfsd4: out of memory while initializing nfsv4\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void
+nfsd4_free_slabs(void)
+{
+ int status = 0;
+
+ if (stateowner_slab)
+ status = kmem_cache_destroy(stateowner_slab);
+ stateowner_slab = NULL;
+ BUG_ON(status);
+}
+
+void
+nfs4_free_stateowner(struct kref *kref)
+{
+ struct nfs4_stateowner *sop =
+ container_of(kref, struct nfs4_stateowner, so_ref);
+ kfree(sop->so_owner.data);
+ kmem_cache_free(stateowner_slab, sop);
+}
+
+static inline struct nfs4_stateowner *
+alloc_stateowner(struct xdr_netobj *owner)
+{
+ struct nfs4_stateowner *sop;
+
+ if ((sop = kmem_cache_alloc(stateowner_slab, GFP_KERNEL))) {
+ if ((sop->so_owner.data = kmalloc(owner->len, GFP_KERNEL))) {
+ memcpy(sop->so_owner.data, owner->data, owner->len);
+ sop->so_owner.len = owner->len;
+ kref_init(&sop->so_ref);
+ return sop;
+ }
+ kmem_cache_free(stateowner_slab, sop);
+ }
+ return NULL;
+}
+
+static struct nfs4_stateowner *
+alloc_init_open_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_open *open) {
+ struct nfs4_stateowner *sop;
+ struct nfs4_replay *rp;
+ unsigned int idhashval;
+
+ if (!(sop = alloc_stateowner(&open->op_owner)))
+ return NULL;
+ idhashval = ownerid_hashval(current_ownerid);
+ INIT_LIST_HEAD(&sop->so_idhash);
+ INIT_LIST_HEAD(&sop->so_strhash);
+ INIT_LIST_HEAD(&sop->so_perclient);
+ INIT_LIST_HEAD(&sop->so_perfilestate);
+ INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */
+ INIT_LIST_HEAD(&sop->so_close_lru);
+ sop->so_time = 0;
+ list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
+ list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
+ list_add(&sop->so_perclient, &clp->cl_perclient);
+ add_perclient++;
+ sop->so_is_open_owner = 1;
+ sop->so_id = current_ownerid++;
+ sop->so_client = clp;
+ sop->so_seqid = open->op_seqid;
+ sop->so_confirmed = 0;
+ rp = &sop->so_replay;
+ rp->rp_status = NFSERR_SERVERFAULT;
+ rp->rp_buflen = 0;
+ rp->rp_buf = rp->rp_ibuf;
+ return sop;
+}
+
+static void
+release_stateid_lockowners(struct nfs4_stateid *open_stp)
+{
+ struct nfs4_stateowner *lock_sop;
+
+ while (!list_empty(&open_stp->st_perlockowner)) {
+ lock_sop = list_entry(open_stp->st_perlockowner.next,
+ struct nfs4_stateowner, so_perlockowner);
+ /* list_del(&open_stp->st_perlockowner); */
+ BUG_ON(lock_sop->so_is_open_owner);
+ release_stateowner(lock_sop);
+ }
+}
+
+static void
+unhash_stateowner(struct nfs4_stateowner *sop)
+{
+ struct nfs4_stateid *stp;
+
+ list_del(&sop->so_idhash);
+ list_del(&sop->so_strhash);
+ if (sop->so_is_open_owner) {
+ list_del(&sop->so_perclient);
+ del_perclient++;
+ }
+ list_del(&sop->so_perlockowner);
+ while (!list_empty(&sop->so_perfilestate)) {
+ stp = list_entry(sop->so_perfilestate.next,
+ struct nfs4_stateid, st_perfilestate);
+ if (sop->so_is_open_owner)
+ release_stateid(stp, OPEN_STATE);
+ else
+ release_stateid(stp, LOCK_STATE);
+ }
+}
+
+static void
+release_stateowner(struct nfs4_stateowner *sop)
+{
+ unhash_stateowner(sop);
+ list_del(&sop->so_close_lru);
+ nfs4_put_stateowner(sop);
+}
+
+static inline void
+init_stateid(struct nfs4_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
+ struct nfs4_stateowner *sop = open->op_stateowner;
+ unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
+
+ INIT_LIST_HEAD(&stp->st_hash);
+ INIT_LIST_HEAD(&stp->st_perfilestate);
+ INIT_LIST_HEAD(&stp->st_perlockowner);
+ INIT_LIST_HEAD(&stp->st_perfile);
+ list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+ list_add_perfile++;
+ list_add(&stp->st_perfile, &fp->fi_perfile);
+ stp->st_stateowner = sop;
+ stp->st_file = fp;
+ stp->st_stateid.si_boot = boot_time;
+ stp->st_stateid.si_stateownerid = sop->so_id;
+ stp->st_stateid.si_fileid = fp->fi_id;
+ stp->st_stateid.si_generation = 0;
+ stp->st_access_bmap = 0;
+ stp->st_deny_bmap = 0;
+ __set_bit(open->op_share_access, &stp->st_access_bmap);
+ __set_bit(open->op_share_deny, &stp->st_deny_bmap);
+}
+
+static void
+release_stateid(struct nfs4_stateid *stp, int flags)
+{
+ struct file *filp = stp->st_vfs_file;
+
+ list_del(&stp->st_hash);
+ list_del_perfile++;
+ list_del(&stp->st_perfile);
+ list_del(&stp->st_perfilestate);
+ if (flags & OPEN_STATE) {
+ release_stateid_lockowners(stp);
+ stp->st_vfs_file = NULL;
+ nfsd_close(filp);
+ vfsclose++;
+ } else if (flags & LOCK_STATE)
+ locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
+ kfree(stp);
+ stp = NULL;
+}
+
+static void
+release_file(struct nfs4_file *fp)
+{
+ free_file++;
+ list_del(&fp->fi_hash);
+ iput(fp->fi_inode);
+ kfree(fp);
+}
+
+void
+move_to_close_lru(struct nfs4_stateowner *sop)
+{
+ dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
+
+ unhash_stateowner(sop);
+ list_add_tail(&sop->so_close_lru, &close_lru);
+ sop->so_time = get_seconds();
+}
+
+void
+release_state_owner(struct nfs4_stateid *stp, int flag)
+{
+ struct nfs4_stateowner *sop = stp->st_stateowner;
+ struct nfs4_file *fp = stp->st_file;
+
+ dprintk("NFSD: release_state_owner\n");
+ release_stateid(stp, flag);
+
+ /* place unused nfs4_stateowners on so_close_lru list to be
+ * released by the laundromat service after the lease period
+ * to enable us to handle CLOSE replay
+ */
+ if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
+ move_to_close_lru(sop);
+ /* unused nfs4_file's are releseed. XXX slab cache? */
+ if (list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile)) {
+ release_file(fp);
+ }
+}
+
+static int
+cmp_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner, clientid_t *clid) {
+ return ((sop->so_owner.len == owner->len) &&
+ !memcmp(sop->so_owner.data, owner->data, owner->len) &&
+ (sop->so_client->cl_clientid.cl_id == clid->cl_id));
+}
+
+static struct nfs4_stateowner *
+find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
+{
+ struct nfs4_stateowner *so = NULL;
+
+ list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+ if (cmp_owner_str(so, &open->op_owner, &open->op_clientid))
+ return so;
+ }
+ return NULL;
+}
+
+/* search file_hashtbl[] for file */
+static struct nfs4_file *
+find_file(struct inode *ino)
+{
+ unsigned int hashval = file_hashval(ino);
+ struct nfs4_file *fp;
+
+ list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+ if (fp->fi_inode == ino)
+ return fp;
+ }
+ return NULL;
+}
+
+#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
+#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
+
+void
+set_access(unsigned int *access, unsigned long bmap) {
+ int i;
+
+ *access = 0;
+ for (i = 1; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ *access |= i;
+ }
+}
+
+void
+set_deny(unsigned int *deny, unsigned long bmap) {
+ int i;
+
+ *deny = 0;
+ for (i = 0; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ *deny |= i ;
+ }
+}
+
+static int
+test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
+ unsigned int access, deny;
+
+ set_access(&access, stp->st_access_bmap);
+ set_deny(&deny, stp->st_deny_bmap);
+ if ((access & open->op_share_deny) || (deny & open->op_share_access))
+ return 0;
+ return 1;
+}
+
+/*
+ * Called to check deny when READ with all zero stateid or
+ * WRITE with all zero or all one stateid
+ */
+int
+nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+{
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ struct nfs4_file *fp;
+ struct nfs4_stateid *stp;
+
+ dprintk("NFSD: nfs4_share_conflict\n");
+
+ fp = find_file(ino);
+ if (fp) {
+ /* Search for conflicting share reservations */
+ list_for_each_entry(stp, &fp->fi_perfile, st_perfile) {
+ if (test_bit(deny_type, &stp->st_deny_bmap) ||
+ test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+ return nfserr_share_denied;
+ }
+ }
+ return nfs_ok;
+}
+
+static inline void
+nfs4_file_downgrade(struct file *filp, unsigned int share_access)
+{
+ if (share_access & NFS4_SHARE_ACCESS_WRITE) {
+ put_write_access(filp->f_dentry->d_inode);
+ filp->f_mode = (filp->f_mode | FMODE_READ) & ~FMODE_WRITE;
+ }
+}
+
+/*
+ * Recall a delegation
+ */
+static int
+do_recall(void *__dp)
+{
+ struct nfs4_delegation *dp = __dp;
+
+ daemonize("nfsv4-recall");
+
+ nfsd4_cb_recall(dp);
+ return 0;
+}
+
+/*
+ * Spawn a thread to perform a recall on the delegation represented
+ * by the lease (file_lock)
+ *
+ * Called from break_lease() with lock_kernel() held.
+ * Note: we assume break_lease will only call this *once* for any given
+ * lease.
+ */
+static
+void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+ struct nfs4_delegation *dp= (struct nfs4_delegation *)fl->fl_owner;
+ struct task_struct *t;
+
+ dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
+ if (!dp)
+ return;
+
+ /* We're assuming the state code never drops its reference
+ * without first removing the lease. Since we're in this lease
+ * callback (and since the lease code is serialized by the kernel
+ * lock) we know the server hasn't removed the lease yet, we know
+ * it's safe to take a reference: */
+ atomic_inc(&dp->dl_count);
+
+ spin_lock(&recall_lock);
+ list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+ spin_unlock(&recall_lock);
+
+ /* only place dl_time is set. protected by lock_kernel*/
+ dp->dl_time = get_seconds();
+
+ /* XXX need to merge NFSD_LEASE_TIME with fs/locks.c:lease_break_time */
+ fl->fl_break_time = jiffies + NFSD_LEASE_TIME * HZ;
+
+ t = kthread_run(do_recall, dp, "%s", "nfs4_cb_recall");
+ if (IS_ERR(t)) {
+ struct nfs4_client *clp = dp->dl_client;
+
+ printk(KERN_INFO "NFSD: Callback thread failed for "
+ "for client (clientid %08x/%08x)\n",
+ clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);
+ nfs4_put_delegation(dp);
+ }
+}
+
+/*
+ * The file_lock is being reapd.
+ *
+ * Called by locks_free_lock() with lock_kernel() held.
+ */
+static
+void nfsd_release_deleg_cb(struct file_lock *fl)
+{
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
+
+ dprintk("NFSD nfsd_release_deleg_cb: fl %p dp %p dl_count %d\n", fl,dp, atomic_read(&dp->dl_count));
+
+ if (!(fl->fl_flags & FL_LEASE) || !dp)
+ return;
+ dp->dl_flock = NULL;
+}
+
+/*
+ * Set the delegation file_lock back pointer.
+ *
+ * Called from __setlease() with lock_kernel() held.
+ */
+static
+void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
+{
+ struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
+
+ dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
+ if (!dp)
+ return;
+ dp->dl_flock = new;
+}
+
+/*
+ * Called from __setlease() with lock_kernel() held
+ */
+static
+int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
+{
+ struct nfs4_delegation *onlistd =
+ (struct nfs4_delegation *)onlist->fl_owner;
+ struct nfs4_delegation *tryd =
+ (struct nfs4_delegation *)try->fl_owner;
+
+ if (onlist->fl_lmops != try->fl_lmops)
+ return 0;
+
+ return onlistd->dl_client == tryd->dl_client;
+}
+
+
+static
+int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
+{
+ if (arg & F_UNLCK)
+ return lease_modify(onlist, arg);
+ else
+ return -EAGAIN;
+}
+
+struct lock_manager_operations nfsd_lease_mng_ops = {
+ .fl_break = nfsd_break_deleg_cb,
+ .fl_release_private = nfsd_release_deleg_cb,
+ .fl_copy_lock = nfsd_copy_lock_deleg_cb,
+ .fl_mylease = nfsd_same_client_deleg_cb,
+ .fl_change = nfsd_change_deleg_cb,
+};
+
+
+/*
+ * nfsd4_process_open1()
+ * lookup stateowner.
+ * found:
+ * check confirmed
+ * confirmed:
+ * check seqid
+ * not confirmed:
+ * delete owner
+ * create new owner
+ * notfound:
+ * verify clientid
+ * create new owner
+ *
+ * called with nfs4_lock_state() held.
+ */
+int
+nfsd4_process_open1(struct nfsd4_open *open)
+{
+ int status;
+ clientid_t *clientid = &open->op_clientid;
+ struct nfs4_client *clp = NULL;
+ unsigned int strhashval;
+ struct nfs4_stateowner *sop = NULL;
+
+ status = nfserr_inval;
+ if (!check_name(open->op_owner))
+ goto out;
+
+ if (STALE_CLIENTID(&open->op_clientid))
+ return nfserr_stale_clientid;
+
+ strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
+ sop = find_openstateowner_str(strhashval, open);
+ if (sop) {
+ open->op_stateowner = sop;
+ /* check for replay */
+ if (open->op_seqid == sop->so_seqid){
+ if (sop->so_replay.rp_buflen)
+ return NFSERR_REPLAY_ME;
+ else {
+ /* The original OPEN failed so spectacularly
+ * that we don't even have replay data saved!
+ * Therefore, we have no choice but to continue
+ * processing this OPEN; presumably, we'll
+ * fail again for the same reason.
+ */
+ dprintk("nfsd4_process_open1:"
+ " replay with no replay cache\n");
+ goto renew;
+ }
+ } else if (sop->so_confirmed) {
+ if (open->op_seqid == sop->so_seqid + 1)
+ goto renew;
+ status = nfserr_bad_seqid;
+ goto out;
+ } else {
+ /* If we get here, we received an OPEN for an
+ * unconfirmed nfs4_stateowner. Since the seqid's are
+ * different, purge the existing nfs4_stateowner, and
+ * instantiate a new one.
+ */
+ clp = sop->so_client;
+ release_stateowner(sop);
+ }
+ } else {
+ /* nfs4_stateowner not found.
+ * Verify clientid and instantiate new nfs4_stateowner.
+ * If verify fails this is presumably the result of the
+ * client's lease expiring.
+ */
+ status = nfserr_expired;
+ clp = find_confirmed_client(clientid);
+ if (clp == NULL)
+ goto out;
+ }
+ status = nfserr_resource;
+ sop = alloc_init_open_stateowner(strhashval, clp, open);
+ if (sop == NULL)
+ goto out;
+ open->op_stateowner = sop;
+renew:
+ status = nfs_ok;
+ renew_client(sop->so_client);
+out:
+ if (status && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+ status = nfserr_reclaim_bad;
+ return status;
+}
+
+static int
+nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
+{
+ struct nfs4_stateid *local;
+ int status = nfserr_share_denied;
+ struct nfs4_stateowner *sop = open->op_stateowner;
+
+ list_for_each_entry(local, &fp->fi_perfile, st_perfile) {
+ /* ignore lock owners */
+ if (local->st_stateowner->so_is_open_owner == 0)
+ continue;
+ /* remember if we have seen this open owner */
+ if (local->st_stateowner == sop)
+ *stpp = local;
+ /* check for conflicting share reservations */
+ if (!test_share(local, open))
+ goto out;
+ }
+ status = 0;
+out:
+ return status;
+}
+
+static int
+nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
+ struct svc_fh *cur_fh, int flags)
+{
+ struct nfs4_stateid *stp;
+ int status;
+
+ stp = kmalloc(sizeof(struct nfs4_stateid), GFP_KERNEL);
+ if (stp == NULL)
+ return nfserr_resource;
+
+ status = nfsd_open(rqstp, cur_fh, S_IFREG, flags, &stp->st_vfs_file);
+ if (status) {
+ if (status == nfserr_dropit)
+ status = nfserr_jukebox;
+ kfree(stp);
+ return status;
+ }
+ vfsopen++;
+ *stpp = stp;
+ return 0;
+}
+
+static inline int
+nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
+ struct nfsd4_open *open)
+{
+ struct iattr iattr = {
+ .ia_valid = ATTR_SIZE,
+ .ia_size = 0,
+ };
+ if (!open->op_truncate)
+ return 0;
+ if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
+ return -EINVAL;
+ return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
+}
+
+static int
+nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
+{
+ struct file *filp = stp->st_vfs_file;
+ struct inode *inode = filp->f_dentry->d_inode;
+ unsigned int share_access;
+ int status;
+
+ set_access(&share_access, stp->st_access_bmap);
+ share_access = ~share_access;
+ share_access &= open->op_share_access;
+
+ if (!(share_access & NFS4_SHARE_ACCESS_WRITE))
+ return nfsd4_truncate(rqstp, cur_fh, open);
+
+ status = get_write_access(inode);
+ if (status)
+ return nfserrno(status);
+ status = nfsd4_truncate(rqstp, cur_fh, open);
+ if (status) {
+ put_write_access(inode);
+ return status;
+ }
+ /* remember the open */
+ filp->f_mode = (filp->f_mode | FMODE_WRITE) & ~FMODE_READ;
+ set_bit(open->op_share_access, &stp->st_access_bmap);
+ set_bit(open->op_share_deny, &stp->st_deny_bmap);
+
+ return nfs_ok;
+}
+
+
+/* decrement seqid on successful reclaim, it will be bumped in encode_open */
+static void
+nfs4_set_claim_prev(struct nfsd4_open *open, int *status)
+{
+ if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) {
+ if (*status)
+ *status = nfserr_reclaim_bad;
+ else {
+ open->op_stateowner->so_confirmed = 1;
+ open->op_stateowner->so_seqid--;
+ }
+ }
+}
+
+/*
+ * Attempt to hand out a delegation.
+ */
+static void
+nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_stateid *stp)
+{
+ struct nfs4_delegation *dp;
+ struct nfs4_stateowner *sop = stp->st_stateowner;
+ struct nfs4_callback *cb = &sop->so_client->cl_callback;
+ struct file_lock fl, *flp = &fl;
+ int status, flag = 0;
+
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ if (open->op_claim_type != NFS4_OPEN_CLAIM_NULL
+ || !atomic_read(&cb->cb_set) || !sop->so_confirmed)
+ goto out;
+
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ flag = NFS4_OPEN_DELEGATE_WRITE;
+ else
+ flag = NFS4_OPEN_DELEGATE_READ;
+
+ dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
+ if (dp == NULL) {
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ goto out;
+ }
+ locks_init_lock(&fl);
+ fl.fl_lmops = &nfsd_lease_mng_ops;
+ fl.fl_flags = FL_LEASE;
+ fl.fl_end = OFFSET_MAX;
+ fl.fl_owner = (fl_owner_t)dp;
+ fl.fl_file = stp->st_vfs_file;
+ fl.fl_pid = current->tgid;
+
+ /* setlease checks to see if delegation should be handed out.
+ * the lock_manager callbacks fl_mylease and fl_change are used
+ */
+ if ((status = setlease(stp->st_vfs_file,
+ flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK, &flp))) {
+ dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+ list_del(&dp->dl_del_perfile);
+ list_del(&dp->dl_del_perclnt);
+ nfs4_put_delegation(dp);
+ free_delegation++;
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ goto out;
+ }
+
+ memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
+
+ dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
+ dp->dl_stateid.si_boot,
+ dp->dl_stateid.si_stateownerid,
+ dp->dl_stateid.si_fileid,
+ dp->dl_stateid.si_generation);
+out:
+ open->op_delegate_type = flag;
+}
+
+/*
+ * called with nfs4_lock_state() held.
+ */
+int
+nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+{
+ struct nfs4_file *fp = NULL;
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ struct nfs4_stateid *stp = NULL;
+ int status;
+
+ status = nfserr_inval;
+ if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
+ goto out;
+ /*
+ * Lookup file; if found, lookup stateid and check open request,
+ * and check for delegations in the process of being recalled.
+ * If not found, create the nfs4_file struct
+ */
+ fp = find_file(ino);
+ if (fp) {
+ if ((status = nfs4_check_open(fp, open, &stp)))
+ goto out;
+ } else {
+ status = nfserr_resource;
+ fp = alloc_init_file(ino);
+ if (fp == NULL)
+ goto out;
+ }
+
+ /*
+ * OPEN the file, or upgrade an existing OPEN.
+ * If truncate fails, the OPEN fails.
+ */
+ if (stp) {
+ /* Stateid was found, this is an OPEN upgrade */
+ status = nfs4_upgrade_open(rqstp, current_fh, stp, open);
+ if (status)
+ goto out;
+ } else {
+ /* Stateid was not found, this is a new OPEN */
+ int flags = 0;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ flags = MAY_WRITE;
+ else
+ flags = MAY_READ;
+ if ((status = nfs4_new_open(rqstp, &stp, current_fh, flags)))
+ goto out;
+ init_stateid(stp, fp, open);
+ status = nfsd4_truncate(rqstp, current_fh, open);
+ if (status) {
+ release_stateid(stp, OPEN_STATE);
+ goto out;
+ }
+ }
+ memcpy(&open->op_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+ /*
+ * Attempt to hand out a delegation. No error return, because the
+ * OPEN succeeds even if we fail.
+ */
+ nfs4_open_delegation(current_fh, open, stp);
+
+ status = nfs_ok;
+
+ dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
+ stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
+ stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+out:
+ /* take the opportunity to clean up unused state */
+ if (fp && list_empty(&fp->fi_perfile) && list_empty(&fp->fi_del_perfile))
+ release_file(fp);
+
+ /* CLAIM_PREVIOUS has different error returns */
+ nfs4_set_claim_prev(open, &status);
+ /*
+ * To finish the open response, we just need to set the rflags.
+ */
+ open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
+ if (!open->op_stateowner->so_confirmed)
+ open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+
+ return status;
+}
+
+static struct work_struct laundromat_work;
+static void laundromat_main(void *);
+static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
+
+int
+nfsd4_renew(clientid_t *clid)
+{
+ struct nfs4_client *clp;
+ int status;
+
+ nfs4_lock_state();
+ dprintk("process_renew(%08x/%08x): starting\n",
+ clid->cl_boot, clid->cl_id);
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(clid))
+ goto out;
+ clp = find_confirmed_client(clid);
+ status = nfserr_expired;
+ if (clp == NULL) {
+ /* We assume the client took too long to RENEW. */
+ dprintk("nfsd4_renew: clientid not found!\n");
+ goto out;
+ }
+ renew_client(clp);
+ status = nfserr_cb_path_down;
+ if (!list_empty(&clp->cl_del_perclnt)
+ && !atomic_read(&clp->cl_callback.cb_set))
+ goto out;
+ status = nfs_ok;
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+time_t
+nfs4_laundromat(void)
+{
+ struct nfs4_client *clp;
+ struct nfs4_stateowner *sop;
+ struct nfs4_delegation *dp;
+ struct list_head *pos, *next, reaplist;
+ time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+ time_t t, clientid_val = NFSD_LEASE_TIME;
+ time_t u, test_val = NFSD_LEASE_TIME;
+
+ nfs4_lock_state();
+
+ dprintk("NFSD: laundromat service - starting\n");
+ list_for_each_safe(pos, next, &client_lru) {
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+ t = clp->cl_time - cutoff;
+ if (clientid_val > t)
+ clientid_val = t;
+ break;
+ }
+ dprintk("NFSD: purging unused client (clientid %08x)\n",
+ clp->cl_clientid.cl_id);
+ expire_client(clp);
+ }
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&recall_lock);
+ list_for_each_safe(pos, next, &del_recall_lru) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
+ u = dp->dl_time - cutoff;
+ if (test_val > u)
+ test_val = u;
+ break;
+ }
+ dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
+ dp, dp->dl_flock);
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ list_for_each_safe(pos, next, &reaplist) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ list_del_init(&dp->dl_recall_lru);
+ unhash_delegation(dp);
+ }
+ test_val = NFSD_LEASE_TIME;
+ list_for_each_safe(pos, next, &close_lru) {
+ sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+ if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
+ u = sop->so_time - cutoff;
+ if (test_val > u)
+ test_val = u;
+ break;
+ }
+ dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
+ sop->so_id);
+ list_del(&sop->so_close_lru);
+ nfs4_put_stateowner(sop);
+ }
+ if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
+ clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+ nfs4_unlock_state();
+ return clientid_val;
+}
+
+void
+laundromat_main(void *not_used)
+{
+ time_t t;
+
+ t = nfs4_laundromat();
+ dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
+ schedule_delayed_work(&laundromat_work, t*HZ);
+}
+
+/* search ownerid_hashtbl[] and close_lru for stateid owner
+ * (stateid->si_stateownerid)
+ */
+struct nfs4_stateowner *
+find_openstateowner_id(u32 st_id, int flags) {
+ struct nfs4_stateowner *local = NULL;
+
+ dprintk("NFSD: find_openstateowner_id %d\n", st_id);
+ if (flags & CLOSE_STATE) {
+ list_for_each_entry(local, &close_lru, so_close_lru) {
+ if (local->so_id == st_id)
+ return local;
+ }
+ }
+ return NULL;
+}
+
+static inline int
+nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
+{
+ return fhp->fh_dentry->d_inode != stp->st_vfs_file->f_dentry->d_inode;
+}
+
+static int
+STALE_STATEID(stateid_t *stateid)
+{
+ if (stateid->si_boot == boot_time)
+ return 0;
+ printk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
+ stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
+ stateid->si_generation);
+ return 1;
+}
+
+static inline int
+access_permit_read(unsigned long access_bmap)
+{
+ return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap);
+}
+
+static inline int
+access_permit_write(unsigned long access_bmap)
+{
+ return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
+ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
+}
+
+static
+int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+{
+ int status = nfserr_openmode;
+
+ if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
+ goto out;
+ if ((flags & RD_STATE) && (!access_permit_read(stp->st_access_bmap)))
+ goto out;
+ status = nfs_ok;
+out:
+ return status;
+}
+
+static inline int
+nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
+{
+ if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
+ return nfserr_openmode;
+ else
+ return nfs_ok;
+}
+
+static inline int
+check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
+{
+ /* Trying to call delegreturn with a special stateid? Yuch: */
+ if (!(flags & (RD_STATE | WR_STATE)))
+ return nfserr_bad_stateid;
+ else if (ONE_STATEID(stateid) && (flags & RD_STATE))
+ return nfs_ok;
+ else if (nfs4_in_grace()) {
+ /* Answer in remaining cases depends on existance of
+ * conflicting state; so we must wait out the grace period. */
+ return nfserr_grace;
+ } else if (flags & WR_STATE)
+ return nfs4_share_conflict(current_fh,
+ NFS4_SHARE_DENY_WRITE);
+ else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
+ return nfs4_share_conflict(current_fh,
+ NFS4_SHARE_DENY_READ);
+}
+
+/*
+ * Allow READ/WRITE during grace period on recovered state only for files
+ * that are not able to provide mandatory locking.
+ */
+static inline int
+io_during_grace_disallowed(struct inode *inode, int flags)
+{
+ return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE))
+ && MANDATORY_LOCK(inode);
+}
+
+/*
+* Checks for stateid operations
+*/
+int
+nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
+{
+ struct nfs4_stateid *stp = NULL;
+ struct nfs4_delegation *dp = NULL;
+ stateid_t *stidp;
+ struct inode *ino = current_fh->fh_dentry->d_inode;
+ int status;
+
+ dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
+ stateid->si_boot, stateid->si_stateownerid,
+ stateid->si_fileid, stateid->si_generation);
+ if (filpp)
+ *filpp = NULL;
+
+ if (io_during_grace_disallowed(ino, flags))
+ return nfserr_grace;
+
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ return check_special_stateids(current_fh, stateid, flags);
+
+ /* STALE STATEID */
+ status = nfserr_stale_stateid;
+ if (STALE_STATEID(stateid))
+ goto out;
+
+ /* BAD STATEID */
+ status = nfserr_bad_stateid;
+ if (!stateid->si_fileid) { /* delegation stateid */
+ if(!(dp = find_delegation_stateid(ino, stateid))) {
+ dprintk("NFSD: delegation stateid not found\n");
+ if (nfs4_in_grace())
+ status = nfserr_grace;
+ goto out;
+ }
+ stidp = &dp->dl_stateid;
+ } else { /* open or lock stateid */
+ if (!(stp = find_stateid(stateid, flags))) {
+ dprintk("NFSD: open or lock stateid not found\n");
+ if (nfs4_in_grace())
+ status = nfserr_grace;
+ goto out;
+ }
+ if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp))
+ goto out;
+ if (!stp->st_stateowner->so_confirmed)
+ goto out;
+ stidp = &stp->st_stateid;
+ }
+ if (stateid->si_generation > stidp->si_generation)
+ goto out;
+
+ /* OLD STATEID */
+ status = nfserr_old_stateid;
+ if (stateid->si_generation < stidp->si_generation)
+ goto out;
+ if (stp) {
+ if ((status = nfs4_check_openmode(stp,flags)))
+ goto out;
+ renew_client(stp->st_stateowner->so_client);
+ if (filpp)
+ *filpp = stp->st_vfs_file;
+ } else if (dp) {
+ if ((status = nfs4_check_delegmode(dp, flags)))
+ goto out;
+ renew_client(dp->dl_client);
+ if (flags & DELEG_RET)
+ unhash_delegation(dp);
+ if (filpp)
+ *filpp = dp->dl_vfs_file;
+ }
+ status = nfs_ok;
+out:
+ return status;
+}
+
+
+/*
+ * Checks for sequence id mutating operations.
+ */
+int
+nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
+{
+ int status;
+ struct nfs4_stateid *stp;
+ struct nfs4_stateowner *sop;
+
+ dprintk("NFSD: preprocess_seqid_op: seqid=%d "
+ "stateid = (%08x/%08x/%08x/%08x)\n", seqid,
+ stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
+ stateid->si_generation);
+
+ *stpp = NULL;
+ *sopp = NULL;
+
+ status = nfserr_bad_stateid;
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
+ printk("NFSD: preprocess_seqid_op: magic stateid!\n");
+ goto out;
+ }
+
+ status = nfserr_stale_stateid;
+ if (STALE_STATEID(stateid))
+ goto out;
+ /*
+ * We return BAD_STATEID if filehandle doesn't match stateid,
+ * the confirmed flag is incorrecly set, or the generation
+ * number is incorrect.
+ * If there is no entry in the openfile table for this id,
+ * we can't always return BAD_STATEID;
+ * this might be a retransmitted CLOSE which has arrived after
+ * the openfile has been released.
+ */
+ if (!(stp = find_stateid(stateid, flags)))
+ goto no_nfs4_stateid;
+
+ status = nfserr_bad_stateid;
+
+ /* for new lock stateowners:
+ * check that the lock->v.new.open_stateid
+ * refers to an open stateowner
+ *
+ * check that the lockclid (nfs4_lock->v.new.clientid) is the same
+ * as the open_stateid->st_stateowner->so_client->clientid
+ */
+ if (lockclid) {
+ struct nfs4_stateowner *sop = stp->st_stateowner;
+ struct nfs4_client *clp = sop->so_client;
+
+ if (!sop->so_is_open_owner)
+ goto out;
+ if (!cmp_clid(&clp->cl_clientid, lockclid))
+ goto out;
+ }
+
+ if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
+ printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
+ goto out;
+ }
+
+ *stpp = stp;
+ *sopp = sop = stp->st_stateowner;
+
+ /*
+ * We now validate the seqid and stateid generation numbers.
+ * For the moment, we ignore the possibility of
+ * generation number wraparound.
+ */
+ if (seqid != sop->so_seqid + 1)
+ goto check_replay;
+
+ if (sop->so_confirmed) {
+ if (flags & CONFIRM) {
+ printk("NFSD: preprocess_seqid_op: expected unconfirmed stateowner!\n");
+ goto out;
+ }
+ }
+ else {
+ if (!(flags & CONFIRM)) {
+ printk("NFSD: preprocess_seqid_op: stateowner not confirmed yet!\n");
+ goto out;
+ }
+ }
+ if (stateid->si_generation > stp->st_stateid.si_generation) {
+ printk("NFSD: preprocess_seqid_op: future stateid?!\n");
+ goto out;
+ }
+
+ status = nfserr_old_stateid;
+ if (stateid->si_generation < stp->st_stateid.si_generation) {
+ printk("NFSD: preprocess_seqid_op: old stateid!\n");
+ goto out;
+ }
+ /* XXX renew the client lease here */
+ status = nfs_ok;
+
+out:
+ return status;
+
+no_nfs4_stateid:
+
+ /*
+ * We determine whether this is a bad stateid or a replay,
+ * starting by trying to look up the stateowner.
+ * If stateowner is not found - stateid is bad.
+ */
+ if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) {
+ printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n");
+ status = nfserr_bad_stateid;
+ goto out;
+ }
+ *sopp = sop;
+
+check_replay:
+ if (seqid == sop->so_seqid) {
+ printk("NFSD: preprocess_seqid_op: retransmission?\n");
+ /* indicate replay to calling function */
+ status = NFSERR_REPLAY_ME;
+ } else {
+ printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid);
+
+ *sopp = NULL;
+ status = nfserr_bad_seqid;
+ }
+ goto out;
+}
+
+int
+nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
+{
+ int status;
+ struct nfs4_stateowner *sop;
+ struct nfs4_stateid *stp;
+
+ dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
+
+ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+ goto out;
+
+ nfs4_lock_state();
+
+ if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid,
+ &oc->oc_req_stateid,
+ CHECK_FH | CONFIRM | OPEN_STATE,
+ &oc->oc_stateowner, &stp, NULL)))
+ goto out;
+
+ sop = oc->oc_stateowner;
+ sop->so_confirmed = 1;
+ update_stateid(&stp->st_stateid);
+ memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
+ dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d "
+ "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
+ stp->st_stateid.si_boot,
+ stp->st_stateid.si_stateownerid,
+ stp->st_stateid.si_fileid,
+ stp->st_stateid.si_generation);
+out:
+ if (oc->oc_stateowner)
+ nfs4_get_stateowner(oc->oc_stateowner);
+ nfs4_unlock_state();
+ return status;
+}
+
+
+/*
+ * unset all bits in union bitmap (bmap) that
+ * do not exist in share (from successful OPEN_DOWNGRADE)
+ */
+static void
+reset_union_bmap_access(unsigned long access, unsigned long *bmap)
+{
+ int i;
+ for (i = 1; i < 4; i++) {
+ if ((i & access) != i)
+ __clear_bit(i, bmap);
+ }
+}
+
+static void
+reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ if ((i & deny) != i)
+ __clear_bit(i, bmap);
+ }
+}
+
+int
+nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
+{
+ int status;
+ struct nfs4_stateid *stp;
+ unsigned int share_access;
+
+ dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
+
+ if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny))
+ return nfserr_inval;
+
+ nfs4_lock_state();
+ if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid,
+ &od->od_stateid,
+ CHECK_FH | OPEN_STATE,
+ &od->od_stateowner, &stp, NULL)))
+ goto out;
+
+ status = nfserr_inval;
+ if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
+ dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
+ stp->st_access_bmap, od->od_share_access);
+ goto out;
+ }
+ if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
+ dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
+ stp->st_deny_bmap, od->od_share_deny);
+ goto out;
+ }
+ set_access(&share_access, stp->st_access_bmap);
+ nfs4_file_downgrade(stp->st_vfs_file,
+ share_access & ~od->od_share_access);
+
+ reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
+ reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
+
+ update_stateid(&stp->st_stateid);
+ memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
+ status = nfs_ok;
+out:
+ if (od->od_stateowner)
+ nfs4_get_stateowner(od->od_stateowner);
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * nfs4_unlock_state() called after encode
+ */
+int
+nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close)
+{
+ int status;
+ struct nfs4_stateid *stp;
+
+ dprintk("NFSD: nfsd4_close on file %.*s\n",
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
+
+ nfs4_lock_state();
+ /* check close_lru for replay */
+ if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid,
+ &close->cl_stateid,
+ CHECK_FH | OPEN_STATE | CLOSE_STATE,
+ &close->cl_stateowner, &stp, NULL)))
+ goto out;
+ /*
+ * Return success, but first update the stateid.
+ */
+ status = nfs_ok;
+ update_stateid(&stp->st_stateid);
+ memcpy(&close->cl_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+ /* release_state_owner() calls nfsd_close() if needed */
+ release_state_owner(stp, OPEN_STATE);
+out:
+ if (close->cl_stateowner)
+ nfs4_get_stateowner(close->cl_stateowner);
+ nfs4_unlock_state();
+ return status;
+}
+
+int
+nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
+{
+ int status;
+
+ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
+ goto out;
+
+ nfs4_lock_state();
+ status = nfs4_preprocess_stateid_op(current_fh, &dr->dr_stateid, DELEG_RET, NULL);
+ nfs4_unlock_state();
+out:
+ return status;
+}
+
+
+/*
+ * Lock owner state (byte-range locks)
+ */
+#define LOFF_OVERFLOW(start, len) ((u64)(len) > ~(u64)(start))
+#define LOCK_HASH_BITS 8
+#define LOCK_HASH_SIZE (1 << LOCK_HASH_BITS)
+#define LOCK_HASH_MASK (LOCK_HASH_SIZE - 1)
+
+#define lockownerid_hashval(id) \
+ ((id) & LOCK_HASH_MASK)
+
+static inline unsigned int
+lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
+ struct xdr_netobj *ownername)
+{
+ return (file_hashval(inode) + cl_id
+ + opaque_hashval(ownername->data, ownername->len))
+ & LOCK_HASH_MASK;
+}
+
+static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE];
+
+struct nfs4_stateid *
+find_stateid(stateid_t *stid, int flags)
+{
+ struct nfs4_stateid *local = NULL;
+ u32 st_id = stid->si_stateownerid;
+ u32 f_id = stid->si_fileid;
+ unsigned int hashval;
+
+ dprintk("NFSD: find_stateid flags 0x%x\n",flags);
+ if ((flags & LOCK_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+ hashval = stateid_hashval(st_id, f_id);
+ list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) {
+ if ((local->st_stateid.si_stateownerid == st_id) &&
+ (local->st_stateid.si_fileid == f_id))
+ return local;
+ }
+ }
+ if ((flags & OPEN_STATE) || (flags & RD_STATE) || (flags & WR_STATE)) {
+ hashval = stateid_hashval(st_id, f_id);
+ list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) {
+ if ((local->st_stateid.si_stateownerid == st_id) &&
+ (local->st_stateid.si_fileid == f_id))
+ return local;
+ }
+ } else
+ printk("NFSD: find_stateid: ERROR: no state flag\n");
+ return NULL;
+}
+
+static struct nfs4_delegation *
+find_delegation_stateid(struct inode *ino, stateid_t *stid)
+{
+ struct nfs4_delegation *dp = NULL;
+ struct nfs4_file *fp = NULL;
+ u32 st_id;
+
+ dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
+ stid->si_boot, stid->si_stateownerid,
+ stid->si_fileid, stid->si_generation);
+
+ st_id = stid->si_stateownerid;
+ fp = find_file(ino);
+ if (fp) {
+ list_for_each_entry(dp, &fp->fi_del_perfile, dl_del_perfile) {
+ if(dp->dl_stateid.si_stateownerid == st_id) {
+ dprintk("NFSD: find_delegation dp %p\n",dp);
+ return dp;
+ }
+ }
+ }
+ return NULL;
+}
+
+/*
+ * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
+ * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
+ * byte, because of sign extension problems. Since NFSv4 calls for 64-bit
+ * locking, this prevents us from being completely protocol-compliant. The
+ * real solution to this problem is to start using unsigned file offsets in
+ * the VFS, but this is a very deep change!
+ */
+static inline void
+nfs4_transform_lock_offset(struct file_lock *lock)
+{
+ if (lock->fl_start < 0)
+ lock->fl_start = OFFSET_MAX;
+ if (lock->fl_end < 0)
+ lock->fl_end = OFFSET_MAX;
+}
+
+int
+nfs4_verify_lock_stateowner(struct nfs4_stateowner *sop, unsigned int hashval)
+{
+ struct nfs4_stateowner *local = NULL;
+ int status = 0;
+
+ if (hashval >= LOCK_HASH_SIZE)
+ goto out;
+ list_for_each_entry(local, &lock_ownerid_hashtbl[hashval], so_idhash) {
+ if (local == sop) {
+ status = 1;
+ goto out;
+ }
+ }
+out:
+ return status;
+}
+
+
+static inline void
+nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
+{
+ struct nfs4_stateowner *sop = (struct nfs4_stateowner *) fl->fl_owner;
+ unsigned int hval = lockownerid_hashval(sop->so_id);
+
+ deny->ld_sop = NULL;
+ if (nfs4_verify_lock_stateowner(sop, hval)) {
+ kref_get(&sop->so_ref);
+ deny->ld_sop = sop;
+ deny->ld_clientid = sop->so_client->cl_clientid;
+ }
+ deny->ld_start = fl->fl_start;
+ deny->ld_length = ~(u64)0;
+ if (fl->fl_end != ~(u64)0)
+ deny->ld_length = fl->fl_end - fl->fl_start + 1;
+ deny->ld_type = NFS4_READ_LT;
+ if (fl->fl_type != F_RDLCK)
+ deny->ld_type = NFS4_WRITE_LT;
+}
+
+static struct nfs4_stateowner *
+find_lockstateowner(struct xdr_netobj *owner, clientid_t *clid)
+{
+ struct nfs4_stateowner *local = NULL;
+ int i;
+
+ for (i = 0; i < LOCK_HASH_SIZE; i++) {
+ list_for_each_entry(local, &lock_ownerid_hashtbl[i], so_idhash) {
+ if (!cmp_owner_str(local, owner, clid))
+ continue;
+ return local;
+ }
+ }
+ return NULL;
+}
+
+static struct nfs4_stateowner *
+find_lockstateowner_str(struct inode *inode, clientid_t *clid,
+ struct xdr_netobj *owner)
+{
+ unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
+ struct nfs4_stateowner *op;
+
+ list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
+ if (cmp_owner_str(op, owner, clid))
+ return op;
+ }
+ return NULL;
+}
+
+/*
+ * Alloc a lock owner structure.
+ * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has
+ * occured.
+ *
+ * strhashval = lock_ownerstr_hashval
+ * so_seqid = lock->lk_new_lock_seqid - 1: it gets bumped in encode
+ */
+
+static struct nfs4_stateowner *
+alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
+ struct nfs4_stateowner *sop;
+ struct nfs4_replay *rp;
+ unsigned int idhashval;
+
+ if (!(sop = alloc_stateowner(&lock->lk_new_owner)))
+ return NULL;
+ idhashval = lockownerid_hashval(current_ownerid);
+ INIT_LIST_HEAD(&sop->so_idhash);
+ INIT_LIST_HEAD(&sop->so_strhash);
+ INIT_LIST_HEAD(&sop->so_perclient);
+ INIT_LIST_HEAD(&sop->so_perfilestate);
+ INIT_LIST_HEAD(&sop->so_perlockowner);
+ INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
+ sop->so_time = 0;
+ list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
+ list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+ list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+ sop->so_is_open_owner = 0;
+ sop->so_id = current_ownerid++;
+ sop->so_client = clp;
+ sop->so_seqid = lock->lk_new_lock_seqid - 1;
+ sop->so_confirmed = 1;
+ rp = &sop->so_replay;
+ rp->rp_status = NFSERR_SERVERFAULT;
+ rp->rp_buflen = 0;
+ rp->rp_buf = rp->rp_ibuf;
+ return sop;
+}
+
+struct nfs4_stateid *
+alloc_init_lock_stateid(struct nfs4_stateowner *sop, struct nfs4_file *fp, struct nfs4_stateid *open_stp)
+{
+ struct nfs4_stateid *stp;
+ unsigned int hashval = stateid_hashval(sop->so_id, fp->fi_id);
+
+ if ((stp = kmalloc(sizeof(struct nfs4_stateid),
+ GFP_KERNEL)) == NULL)
+ goto out;
+ INIT_LIST_HEAD(&stp->st_hash);
+ INIT_LIST_HEAD(&stp->st_perfile);
+ INIT_LIST_HEAD(&stp->st_perfilestate);
+ INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+ list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ list_add(&stp->st_perfile, &fp->fi_perfile);
+ list_add_perfile++;
+ list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+ stp->st_stateowner = sop;
+ stp->st_file = fp;
+ stp->st_stateid.si_boot = boot_time;
+ stp->st_stateid.si_stateownerid = sop->so_id;
+ stp->st_stateid.si_fileid = fp->fi_id;
+ stp->st_stateid.si_generation = 0;
+ stp->st_vfs_file = open_stp->st_vfs_file; /* FIXME refcount?? */
+ stp->st_access_bmap = open_stp->st_access_bmap;
+ stp->st_deny_bmap = open_stp->st_deny_bmap;
+
+out:
+ return stp;
+}
+
+int
+check_lock_length(u64 offset, u64 length)
+{
+ return ((length == 0) || ((length != ~(u64)0) &&
+ LOFF_OVERFLOW(offset, length)));
+}
+
+/*
+ * LOCK operation
+ */
+int
+nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
+{
+ struct nfs4_stateowner *lock_sop = NULL, *open_sop = NULL;
+ struct nfs4_stateid *lock_stp;
+ struct file *filp;
+ struct file_lock file_lock;
+ struct file_lock *conflock;
+ int status = 0;
+ unsigned int strhashval;
+
+ dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
+ (long long) lock->lk_offset,
+ (long long) lock->lk_length);
+
+ if (nfs4_in_grace() && !lock->lk_reclaim)
+ return nfserr_grace;
+ if (!nfs4_in_grace() && lock->lk_reclaim)
+ return nfserr_no_grace;
+
+ if (check_lock_length(lock->lk_offset, lock->lk_length))
+ return nfserr_inval;
+
+ nfs4_lock_state();
+
+ if (lock->lk_is_new) {
+ /*
+ * Client indicates that this is a new lockowner.
+ * Use open owner and open stateid to create lock owner and lock
+ * stateid.
+ */
+ struct nfs4_stateid *open_stp = NULL;
+ struct nfs4_file *fp;
+
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(&lock->lk_new_clientid)) {
+ printk("NFSD: nfsd4_lock: clientid is stale!\n");
+ goto out;
+ }
+
+ /* is the new lock seqid presented by the client zero? */
+ status = nfserr_bad_seqid;
+ if (lock->v.new.lock_seqid != 0)
+ goto out;
+
+ /* validate and update open stateid and open seqid */
+ status = nfs4_preprocess_seqid_op(current_fh,
+ lock->lk_new_open_seqid,
+ &lock->lk_new_open_stateid,
+ CHECK_FH | OPEN_STATE,
+ &open_sop, &open_stp,
+ &lock->v.new.clientid);
+ if (status) {
+ if (lock->lk_reclaim)
+ status = nfserr_reclaim_bad;
+ goto out;
+ }
+ /* create lockowner and lock stateid */
+ fp = open_stp->st_file;
+ strhashval = lock_ownerstr_hashval(fp->fi_inode,
+ open_sop->so_client->cl_clientid.cl_id,
+ &lock->v.new.owner);
+ /*
+ * If we already have this lock owner, the client is in
+ * error (or our bookeeping is wrong!)
+ * for asking for a 'new lock'.
+ */
+ status = nfserr_bad_stateid;
+ lock_sop = find_lockstateowner(&lock->v.new.owner,
+ &lock->v.new.clientid);
+ if (lock_sop)
+ goto out;
+ status = nfserr_resource;
+ if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
+ goto out;
+ if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner,
+ fp, open_stp)) == NULL) {
+ release_stateowner(lock->lk_stateowner);
+ lock->lk_stateowner = NULL;
+ goto out;
+ }
+ /* bump the open seqid used to create the lock */
+ open_sop->so_seqid++;
+ } else {
+ /* lock (lock owner + lock stateid) already exists */
+ status = nfs4_preprocess_seqid_op(current_fh,
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ CHECK_FH | LOCK_STATE,
+ &lock->lk_stateowner, &lock_stp, NULL);
+ if (status)
+ goto out;
+ }
+ /* lock->lk_stateowner and lock_stp have been created or found */
+ filp = lock_stp->st_vfs_file;
+
+ if ((status = fh_verify(rqstp, current_fh, S_IFREG, MAY_LOCK))) {
+ printk("NFSD: nfsd4_lock: permission denied!\n");
+ goto out;
+ }
+
+ locks_init_lock(&file_lock);
+ switch (lock->lk_type) {
+ case NFS4_READ_LT:
+ case NFS4_READW_LT:
+ file_lock.fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITE_LT:
+ case NFS4_WRITEW_LT:
+ file_lock.fl_type = F_WRLCK;
+ break;
+ default:
+ status = nfserr_inval;
+ goto out;
+ }
+ file_lock.fl_owner = (fl_owner_t) lock->lk_stateowner;
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_file = filp;
+ file_lock.fl_flags = FL_POSIX;
+
+ file_lock.fl_start = lock->lk_offset;
+ if ((lock->lk_length == ~(u64)0) ||
+ LOFF_OVERFLOW(lock->lk_offset, lock->lk_length))
+ file_lock.fl_end = ~(u64)0;
+ else
+ file_lock.fl_end = lock->lk_offset + lock->lk_length - 1;
+ nfs4_transform_lock_offset(&file_lock);
+
+ /*
+ * Try to lock the file in the VFS.
+ * Note: locks.c uses the BKL to protect the inode's lock list.
+ */
+
+ status = posix_lock_file(filp, &file_lock);
+ if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+ file_lock.fl_ops->fl_release_private(&file_lock);
+ dprintk("NFSD: nfsd4_lock: posix_lock_file status %d\n",status);
+ switch (-status) {
+ case 0: /* success! */
+ update_stateid(&lock_stp->st_stateid);
+ memcpy(&lock->lk_resp_stateid, &lock_stp->st_stateid,
+ sizeof(stateid_t));
+ goto out;
+ case (EAGAIN):
+ goto conflicting_lock;
+ case (EDEADLK):
+ status = nfserr_deadlock;
+ default:
+ dprintk("NFSD: nfsd4_lock: posix_lock_file() failed! status %d\n",status);
+ goto out_destroy_new_stateid;
+ }
+
+conflicting_lock:
+ dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
+ status = nfserr_denied;
+ /* XXX There is a race here. Future patch needed to provide
+ * an atomic posix_lock_and_test_file
+ */
+ if (!(conflock = posix_test_lock(filp, &file_lock))) {
+ status = nfserr_serverfault;
+ goto out;
+ }
+ nfs4_set_lock_denied(conflock, &lock->lk_denied);
+
+out_destroy_new_stateid:
+ if (lock->lk_is_new) {
+ dprintk("NFSD: nfsd4_lock: destroy new stateid!\n");
+ /*
+ * An error encountered after instantiation of the new
+ * stateid has forced us to destroy it.
+ */
+ if (!seqid_mutating_err(status))
+ open_sop->so_seqid--;
+
+ release_state_owner(lock_stp, LOCK_STATE);
+ }
+out:
+ if (lock->lk_stateowner)
+ nfs4_get_stateowner(lock->lk_stateowner);
+ nfs4_unlock_state();
+ return status;
+}
+
+/*
+ * LOCKT operation
+ */
+int
+nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
+{
+ struct inode *inode;
+ struct file file;
+ struct file_lock file_lock;
+ struct file_lock *conflicting_lock;
+ int status;
+
+ if (nfs4_in_grace())
+ return nfserr_grace;
+
+ if (check_lock_length(lockt->lt_offset, lockt->lt_length))
+ return nfserr_inval;
+
+ lockt->lt_stateowner = NULL;
+ nfs4_lock_state();
+
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(&lockt->lt_clientid)) {
+ printk("NFSD: nfsd4_lockt: clientid is stale!\n");
+ goto out;
+ }
+
+ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
+ printk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
+ if (status == nfserr_symlink)
+ status = nfserr_inval;
+ goto out;
+ }
+
+ inode = current_fh->fh_dentry->d_inode;
+ locks_init_lock(&file_lock);
+ switch (lockt->lt_type) {
+ case NFS4_READ_LT:
+ case NFS4_READW_LT:
+ file_lock.fl_type = F_RDLCK;
+ break;
+ case NFS4_WRITE_LT:
+ case NFS4_WRITEW_LT:
+ file_lock.fl_type = F_WRLCK;
+ break;
+ default:
+ printk("NFSD: nfs4_lockt: bad lock type!\n");
+ status = nfserr_inval;
+ goto out;
+ }
+
+ lockt->lt_stateowner = find_lockstateowner_str(inode,
+ &lockt->lt_clientid, &lockt->lt_owner);
+ if (lockt->lt_stateowner)
+ file_lock.fl_owner = (fl_owner_t)lockt->lt_stateowner;
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_flags = FL_POSIX;
+
+ file_lock.fl_start = lockt->lt_offset;
+ if ((lockt->lt_length == ~(u64)0) || LOFF_OVERFLOW(lockt->lt_offset, lockt->lt_length))
+ file_lock.fl_end = ~(u64)0;
+ else
+ file_lock.fl_end = lockt->lt_offset + lockt->lt_length - 1;
+
+ nfs4_transform_lock_offset(&file_lock);
+
+ /* posix_test_lock uses the struct file _only_ to resolve the inode.
+ * since LOCKT doesn't require an OPEN, and therefore a struct
+ * file may not exist, pass posix_test_lock a struct file with
+ * only the dentry:inode set.
+ */
+ memset(&file, 0, sizeof (struct file));
+ file.f_dentry = current_fh->fh_dentry;
+
+ status = nfs_ok;
+ conflicting_lock = posix_test_lock(&file, &file_lock);
+ if (conflicting_lock) {
+ status = nfserr_denied;
+ nfs4_set_lock_denied(conflicting_lock, &lockt->lt_denied);
+ }
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+int
+nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku)
+{
+ struct nfs4_stateid *stp;
+ struct file *filp = NULL;
+ struct file_lock file_lock;
+ int status;
+
+ dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
+ (long long) locku->lu_offset,
+ (long long) locku->lu_length);
+
+ if (check_lock_length(locku->lu_offset, locku->lu_length))
+ return nfserr_inval;
+
+ nfs4_lock_state();
+
+ if ((status = nfs4_preprocess_seqid_op(current_fh,
+ locku->lu_seqid,
+ &locku->lu_stateid,
+ CHECK_FH | LOCK_STATE,
+ &locku->lu_stateowner, &stp, NULL)))
+ goto out;
+
+ filp = stp->st_vfs_file;
+ BUG_ON(!filp);
+ locks_init_lock(&file_lock);
+ file_lock.fl_type = F_UNLCK;
+ file_lock.fl_owner = (fl_owner_t) locku->lu_stateowner;
+ file_lock.fl_pid = current->tgid;
+ file_lock.fl_file = filp;
+ file_lock.fl_flags = FL_POSIX;
+ file_lock.fl_start = locku->lu_offset;
+
+ if ((locku->lu_length == ~(u64)0) || LOFF_OVERFLOW(locku->lu_offset, locku->lu_length))
+ file_lock.fl_end = ~(u64)0;
+ else
+ file_lock.fl_end = locku->lu_offset + locku->lu_length - 1;
+ nfs4_transform_lock_offset(&file_lock);
+
+ /*
+ * Try to unlock the file in the VFS.
+ */
+ status = posix_lock_file(filp, &file_lock);
+ if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private)
+ file_lock.fl_ops->fl_release_private(&file_lock);
+ if (status) {
+ printk("NFSD: nfs4_locku: posix_lock_file failed!\n");
+ goto out_nfserr;
+ }
+ /*
+ * OK, unlock succeeded; the only thing left to do is update the stateid.
+ */
+ update_stateid(&stp->st_stateid);
+ memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+out:
+ if (locku->lu_stateowner)
+ nfs4_get_stateowner(locku->lu_stateowner);
+ nfs4_unlock_state();
+ return status;
+
+out_nfserr:
+ status = nfserrno(status);
+ goto out;
+}
+
+/*
+ * returns
+ * 1: locks held by lockowner
+ * 0: no locks held by lockowner
+ */
+static int
+check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
+{
+ struct file_lock **flpp;
+ struct inode *inode = filp->f_dentry->d_inode;
+ int status = 0;
+
+ lock_kernel();
+ for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
+ if ((*flpp)->fl_owner == (fl_owner_t)lowner)
+ status = 1;
+ goto out;
+ }
+out:
+ unlock_kernel();
+ return status;
+}
+
+int
+nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
+{
+ clientid_t *clid = &rlockowner->rl_clientid;
+ struct nfs4_stateowner *local = NULL;
+ struct xdr_netobj *owner = &rlockowner->rl_owner;
+ int status;
+
+ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
+ clid->cl_boot, clid->cl_id);
+
+ /* XXX check for lease expiration */
+
+ status = nfserr_stale_clientid;
+ if (STALE_CLIENTID(clid)) {
+ printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n");
+ return status;
+ }
+
+ nfs4_lock_state();
+
+ status = nfs_ok;
+ local = find_lockstateowner(owner, clid);
+ if (local) {
+ struct nfs4_stateid *stp;
+
+ /* check for any locks held by any stateid
+ * associated with the (lock) stateowner */
+ status = nfserr_locks_held;
+ list_for_each_entry(stp, &local->so_perfilestate,
+ st_perfilestate) {
+ if (check_for_locks(stp->st_vfs_file, local))
+ goto out;
+ }
+ /* no locks held by (lock) stateowner */
+ status = nfs_ok;
+ release_stateowner(local);
+ }
+out:
+ nfs4_unlock_state();
+ return status;
+}
+
+static inline struct nfs4_client_reclaim *
+alloc_reclaim(int namelen)
+{
+ struct nfs4_client_reclaim *crp = NULL;
+
+ crp = kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
+ if (!crp)
+ return NULL;
+ crp->cr_name.data = kmalloc(namelen, GFP_KERNEL);
+ if (!crp->cr_name.data) {
+ kfree(crp);
+ return NULL;
+ }
+ return crp;
+}
+
+/*
+ * failure => all reset bets are off, nfserr_no_grace...
+ */
+static int
+nfs4_client_to_reclaim(char *name, int namlen)
+{
+ unsigned int strhashval;
+ struct nfs4_client_reclaim *crp = NULL;
+
+ dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", namlen, name);
+ crp = alloc_reclaim(namlen);
+ if (!crp)
+ return 0;
+ strhashval = clientstr_hashval(name, namlen);
+ INIT_LIST_HEAD(&crp->cr_strhash);
+ list_add(&crp->cr_strhash, &reclaim_str_hashtbl[strhashval]);
+ memcpy(crp->cr_name.data, name, namlen);
+ crp->cr_name.len = namlen;
+ reclaim_str_hashtbl_size++;
+ return 1;
+}
+
+static void
+nfs4_release_reclaim(void)
+{
+ struct nfs4_client_reclaim *crp = NULL;
+ int i;
+
+ BUG_ON(!nfs4_reclaim_init);
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&reclaim_str_hashtbl[i])) {
+ crp = list_entry(reclaim_str_hashtbl[i].next,
+ struct nfs4_client_reclaim, cr_strhash);
+ list_del(&crp->cr_strhash);
+ kfree(crp->cr_name.data);
+ kfree(crp);
+ reclaim_str_hashtbl_size--;
+ }
+ }
+ BUG_ON(reclaim_str_hashtbl_size);
+}
+
+/*
+ * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
+struct nfs4_client_reclaim *
+nfs4_find_reclaim_client(clientid_t *clid)
+{
+ unsigned int strhashval;
+ struct nfs4_client *clp;
+ struct nfs4_client_reclaim *crp = NULL;
+
+
+ /* find clientid in conf_id_hashtbl */
+ clp = find_confirmed_client(clid);
+ if (clp == NULL)
+ return NULL;
+
+ dprintk("NFSD: nfs4_find_reclaim_client for %.*s\n",
+ clp->cl_name.len, clp->cl_name.data);
+
+ /* find clp->cl_name in reclaim_str_hashtbl */
+ strhashval = clientstr_hashval(clp->cl_name.data, clp->cl_name.len);
+ list_for_each_entry(crp, &reclaim_str_hashtbl[strhashval], cr_strhash) {
+ if (cmp_name(&crp->cr_name, &clp->cl_name)) {
+ return crp;
+ }
+ }
+ return NULL;
+}
+
+/*
+* Called from OPEN. Look for clientid in reclaim list.
+*/
+int
+nfs4_check_open_reclaim(clientid_t *clid)
+{
+ struct nfs4_client_reclaim *crp;
+
+ if ((crp = nfs4_find_reclaim_client(clid)) == NULL)
+ return nfserr_reclaim_bad;
+ return nfs_ok;
+}
+
+
+/*
+ * Start and stop routines
+ */
+
+static void
+__nfs4_state_init(void)
+{
+ int i;
+ time_t grace_time;
+
+ if (!nfs4_reclaim_init) {
+ for (i = 0; i < CLIENT_HASH_SIZE; i++)
+ INIT_LIST_HEAD(&reclaim_str_hashtbl[i]);
+ reclaim_str_hashtbl_size = 0;
+ nfs4_reclaim_init = 1;
+ }
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&conf_id_hashtbl[i]);
+ INIT_LIST_HEAD(&conf_str_hashtbl[i]);
+ INIT_LIST_HEAD(&unconf_str_hashtbl[i]);
+ INIT_LIST_HEAD(&unconf_id_hashtbl[i]);
+ }
+ for (i = 0; i < FILE_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&file_hashtbl[i]);
+ }
+ for (i = 0; i < OWNER_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
+ INIT_LIST_HEAD(&ownerid_hashtbl[i]);
+ }
+ for (i = 0; i < STATEID_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&stateid_hashtbl[i]);
+ INIT_LIST_HEAD(&lockstateid_hashtbl[i]);
+ }
+ for (i = 0; i < LOCK_HASH_SIZE; i++) {
+ INIT_LIST_HEAD(&lock_ownerid_hashtbl[i]);
+ INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+ }
+ memset(&zerostateid, 0, sizeof(stateid_t));
+ memset(&onestateid, ~0, sizeof(stateid_t));
+
+ INIT_LIST_HEAD(&close_lru);
+ INIT_LIST_HEAD(&client_lru);
+ INIT_LIST_HEAD(&del_recall_lru);
+ spin_lock_init(&recall_lock);
+ boot_time = get_seconds();
+ grace_time = max(old_lease_time, lease_time);
+ if (reclaim_str_hashtbl_size == 0)
+ grace_time = 0;
+ if (grace_time)
+ printk("NFSD: starting %ld-second grace period\n", grace_time);
+ grace_end = boot_time + grace_time;
+ INIT_WORK(&laundromat_work,laundromat_main, NULL);
+ schedule_delayed_work(&laundromat_work, NFSD_LEASE_TIME*HZ);
+}
+
+int
+nfs4_state_init(void)
+{
+ int status;
+
+ if (nfs4_init)
+ return 0;
+ status = nfsd4_init_slabs();
+ if (status)
+ return status;
+ __nfs4_state_init();
+ nfs4_init = 1;
+ return 0;
+}
+
+int
+nfs4_in_grace(void)
+{
+ return get_seconds() < grace_end;
+}
+
+void
+set_no_grace(void)
+{
+ printk("NFSD: ERROR in reboot recovery. State reclaims will fail.\n");
+ grace_end = get_seconds();
+}
+
+time_t
+nfs4_lease_time(void)
+{
+ return lease_time;
+}
+
+static void
+__nfs4_state_shutdown(void)
+{
+ int i;
+ struct nfs4_client *clp = NULL;
+ struct nfs4_delegation *dp = NULL;
+ struct nfs4_stateowner *sop = NULL;
+ struct list_head *pos, *next, reaplist;
+
+ list_for_each_safe(pos, next, &close_lru) {
+ sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
+ list_del(&sop->so_close_lru);
+ nfs4_put_stateowner(sop);
+ }
+
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&conf_id_hashtbl[i])) {
+ clp = list_entry(conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+ expire_client(clp);
+ }
+ while (!list_empty(&unconf_str_hashtbl[i])) {
+ clp = list_entry(unconf_str_hashtbl[i].next, struct nfs4_client, cl_strhash);
+ expire_client(clp);
+ }
+ }
+ INIT_LIST_HEAD(&reaplist);
+ spin_lock(&recall_lock);
+ list_for_each_safe(pos, next, &del_recall_lru) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ list_move(&dp->dl_recall_lru, &reaplist);
+ }
+ spin_unlock(&recall_lock);
+ list_for_each_safe(pos, next, &reaplist) {
+ dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
+ list_del_init(&dp->dl_recall_lru);
+ unhash_delegation(dp);
+ }
+
+ release_all_files();
+ cancel_delayed_work(&laundromat_work);
+ flush_scheduled_work();
+ nfs4_init = 0;
+ dprintk("NFSD: list_add_perfile %d list_del_perfile %d\n",
+ list_add_perfile, list_del_perfile);
+ dprintk("NFSD: add_perclient %d del_perclient %d\n",
+ add_perclient, del_perclient);
+ dprintk("NFSD: alloc_file %d free_file %d\n",
+ alloc_file, free_file);
+ dprintk("NFSD: vfsopen %d vfsclose %d\n",
+ vfsopen, vfsclose);
+ dprintk("NFSD: alloc_delegation %d free_delegation %d\n",
+ alloc_delegation, free_delegation);
+
+}
+
+void
+nfs4_state_shutdown(void)
+{
+ nfs4_lock_state();
+ nfs4_release_reclaim();
+ __nfs4_state_shutdown();
+ nfsd4_free_slabs();
+ nfs4_unlock_state();
+}
+
+/*
+ * Called when leasetime is changed.
+ *
+ * if nfsd is not started, simply set the global lease.
+ *
+ * if nfsd(s) are running, lease change requires nfsv4 state to be reset.
+ * e.g: boot_time is reset, existing nfs4_client structs are
+ * used to fill reclaim_str_hashtbl, then all state (except for the
+ * reclaim_str_hashtbl) is re-initialized.
+ *
+ * if the old lease time is greater than the new lease time, the grace
+ * period needs to be set to the old lease time to allow clients to reclaim
+ * their state. XXX - we may want to set the grace period == lease time
+ * after an initial grace period == old lease time
+ *
+ * if an error occurs in this process, the new lease is set, but the server
+ * will not honor OPEN or LOCK reclaims, and will return nfserr_no_grace
+ * which means OPEN/LOCK/READ/WRITE will fail during grace period.
+ *
+ * clients will attempt to reset all state with SETCLIENTID/CONFIRM, and
+ * OPEN and LOCK reclaims.
+ */
+void
+nfs4_reset_lease(time_t leasetime)
+{
+ struct nfs4_client *clp;
+ int i;
+
+ printk("NFSD: New leasetime %ld\n",leasetime);
+ if (!nfs4_init)
+ return;
+ nfs4_lock_state();
+ old_lease_time = lease_time;
+ lease_time = leasetime;
+
+ nfs4_release_reclaim();
+
+ /* populate reclaim_str_hashtbl with current confirmed nfs4_clientid */
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ list_for_each_entry(clp, &conf_id_hashtbl[i], cl_idhash) {
+ if (!nfs4_client_to_reclaim(clp->cl_name.data,
+ clp->cl_name.len)) {
+ nfs4_release_reclaim();
+ goto init_state;
+ }
+ }
+ }
+init_state:
+ __nfs4_state_shutdown();
+ __nfs4_state_init();
+ nfs4_unlock_state();
+}
+
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
new file mode 100644
index 000000000000..36a058a112d5
--- /dev/null
+++ b/fs/nfsd/nfs4xdr.c
@@ -0,0 +1,2536 @@
+/*
+ * fs/nfs/nfs4xdr.c
+ *
+ * Server-side XDR for NFSv4
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * TODO: Neil Brown made the following observation: We currently
+ * initially reserve NFSD_BUFSIZE space on the transmit queue and
+ * never release any of that until the request is complete.
+ * It would be good to calculate a new maximum response size while
+ * decoding the COMPOUND, and call svc_reserve with this number
+ * at the end of nfs4svc_decode_compoundargs.
+ */
+
+#include <linux/param.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/state.h>
+#include <linux/nfsd/xdr4.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_XDR
+
+static int
+check_filename(char *str, int len, int err)
+{
+ int i;
+
+ if (len == 0)
+ return nfserr_inval;
+ if (isdotent(str, len))
+ return err;
+ for (i = 0; i < len; i++)
+ if (str[i] == '/')
+ return err;
+ return 0;
+}
+
+/*
+ * START OF "GENERIC" DECODE ROUTINES.
+ * These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define DECODE_HEAD \
+ u32 *p; \
+ int status
+#define DECODE_TAIL \
+ status = 0; \
+out: \
+ return status; \
+xdr_error: \
+ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ status = nfserr_bad_xdr; \
+ goto out
+
+#define READ32(x) (x) = ntohl(*p++)
+#define READ64(x) do { \
+ (x) = (u64)ntohl(*p++) << 32; \
+ (x) |= ntohl(*p++); \
+} while (0)
+#define READTIME(x) do { \
+ p++; \
+ (x) = ntohl(*p++); \
+ p++; \
+} while (0)
+#define READMEM(x,nbytes) do { \
+ x = (char *)p; \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define SAVEMEM(x,nbytes) do { \
+ if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
+ savemem(argp, p, nbytes) : \
+ (char *)p)) { \
+ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ goto xdr_error; \
+ } \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define COPYMEM(x,nbytes) do { \
+ memcpy((x), p, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+
+/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
+#define READ_BUF(nbytes) do { \
+ if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \
+ p = argp->p; \
+ argp->p += XDR_QUADLEN(nbytes); \
+ } else if (!(p = read_buf(argp, nbytes))) { \
+ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \
+ goto xdr_error; \
+ } \
+} while (0)
+
+u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+{
+ /* We want more bytes than seem to be available.
+ * Maybe we need a new page, maybe we have just run out
+ */
+ int avail = (char*)argp->end - (char*)argp->p;
+ u32 *p;
+ if (avail + argp->pagelen < nbytes)
+ return NULL;
+ if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
+ return NULL;
+ /* ok, we can do it with the current plus the next page */
+ if (nbytes <= sizeof(argp->tmp))
+ p = argp->tmp;
+ else {
+ if (argp->tmpp)
+ kfree(argp->tmpp);
+ p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ }
+ memcpy(p, argp->p, avail);
+ /* step to next page */
+ argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen < PAGE_SIZE) {
+ argp->end = p + (argp->pagelen>>2);
+ argp->pagelen = 0;
+ } else {
+ argp->end = p + (PAGE_SIZE>>2);
+ argp->pagelen -= PAGE_SIZE;
+ }
+ memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
+ argp->p += XDR_QUADLEN(nbytes - avail);
+ return p;
+}
+
+static int
+defer_free(struct nfsd4_compoundargs *argp,
+ void (*release)(const void *), void *p)
+{
+ struct tmpbuf *tb;
+
+ tb = kmalloc(sizeof(*tb), GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+ tb->buf = p;
+ tb->release = release;
+ tb->next = argp->to_free;
+ argp->to_free = tb;
+ return 0;
+}
+
+char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+{
+ void *new = NULL;
+ if (p == argp->tmp) {
+ new = kmalloc(nbytes, GFP_KERNEL);
+ if (!new) return NULL;
+ p = new;
+ memcpy(p, argp->tmp, nbytes);
+ } else {
+ if (p != argp->tmpp)
+ BUG();
+ argp->tmpp = NULL;
+ }
+ if (defer_free(argp, kfree, p)) {
+ kfree(new);
+ return NULL;
+ } else
+ return (char *)p;
+}
+
+
+static int
+nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
+{
+ u32 bmlen;
+ DECODE_HEAD;
+
+ bmval[0] = 0;
+ bmval[1] = 0;
+
+ READ_BUF(4);
+ READ32(bmlen);
+ if (bmlen > 1000)
+ goto xdr_error;
+
+ READ_BUF(bmlen << 2);
+ if (bmlen > 0)
+ READ32(bmval[0]);
+ if (bmlen > 1)
+ READ32(bmval[1]);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
+ struct nfs4_acl **acl)
+{
+ int expected_len, len = 0;
+ u32 dummy32;
+ char *buf;
+
+ DECODE_HEAD;
+ iattr->ia_valid = 0;
+ if ((status = nfsd4_decode_bitmap(argp, bmval)))
+ return status;
+
+ /*
+ * According to spec, unsupported attributes return ERR_NOTSUPP;
+ * read-only attributes return ERR_INVAL.
+ */
+ if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+ return nfserr_attrnotsupp;
+ if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1))
+ return nfserr_inval;
+
+ READ_BUF(4);
+ READ32(expected_len);
+
+ if (bmval[0] & FATTR4_WORD0_SIZE) {
+ READ_BUF(8);
+ len += 8;
+ READ64(iattr->ia_size);
+ iattr->ia_valid |= ATTR_SIZE;
+ }
+ if (bmval[0] & FATTR4_WORD0_ACL) {
+ int nace, i;
+ struct nfs4_ace ace;
+
+ READ_BUF(4); len += 4;
+ READ32(nace);
+
+ *acl = nfs4_acl_new();
+ if (*acl == NULL) {
+ status = -ENOMEM;
+ goto out_nfserr;
+ }
+ defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl);
+
+ for (i = 0; i < nace; i++) {
+ READ_BUF(16); len += 16;
+ READ32(ace.type);
+ READ32(ace.flag);
+ READ32(ace.access_mask);
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += XDR_QUADLEN(dummy32) << 2;
+ READMEM(buf, dummy32);
+ ace.whotype = nfs4_acl_get_whotype(buf, dummy32);
+ status = 0;
+ if (ace.whotype != NFS4_ACL_WHO_NAMED)
+ ace.who = 0;
+ else if (ace.flag & NFS4_ACE_IDENTIFIER_GROUP)
+ status = nfsd_map_name_to_gid(argp->rqstp,
+ buf, dummy32, &ace.who);
+ else
+ status = nfsd_map_name_to_uid(argp->rqstp,
+ buf, dummy32, &ace.who);
+ if (status)
+ goto out_nfserr;
+ if (nfs4_acl_add_ace(*acl, ace.type, ace.flag,
+ ace.access_mask, ace.whotype, ace.who) != 0) {
+ status = -ENOMEM;
+ goto out_nfserr;
+ }
+ }
+ } else
+ *acl = NULL;
+ if (bmval[1] & FATTR4_WORD1_MODE) {
+ READ_BUF(4);
+ len += 4;
+ READ32(iattr->ia_mode);
+ iattr->ia_mode &= (S_IFMT | S_IALLUGO);
+ iattr->ia_valid |= ATTR_MODE;
+ }
+ if (bmval[1] & FATTR4_WORD1_OWNER) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+ READMEM(buf, dummy32);
+ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ goto out_nfserr;
+ iattr->ia_valid |= ATTR_UID;
+ }
+ if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+ READMEM(buf, dummy32);
+ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ goto out_nfserr;
+ iattr->ia_valid |= ATTR_GID;
+ }
+ if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ switch (dummy32) {
+ case NFS4_SET_TO_CLIENT_TIME:
+ /* We require the high 32 bits of 'seconds' to be 0, and we ignore
+ all 32 bits of 'nseconds'. */
+ READ_BUF(12);
+ len += 12;
+ READ32(dummy32);
+ if (dummy32)
+ return nfserr_inval;
+ READ32(iattr->ia_atime.tv_sec);
+ READ32(iattr->ia_atime.tv_nsec);
+ if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+ iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
+ break;
+ case NFS4_SET_TO_SERVER_TIME:
+ iattr->ia_valid |= ATTR_ATIME;
+ break;
+ default:
+ goto xdr_error;
+ }
+ }
+ if (bmval[1] & FATTR4_WORD1_TIME_METADATA) {
+ /* We require the high 32 bits of 'seconds' to be 0, and we ignore
+ all 32 bits of 'nseconds'. */
+ READ_BUF(12);
+ len += 12;
+ READ32(dummy32);
+ if (dummy32)
+ return nfserr_inval;
+ READ32(iattr->ia_ctime.tv_sec);
+ READ32(iattr->ia_ctime.tv_nsec);
+ if (iattr->ia_ctime.tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+ iattr->ia_valid |= ATTR_CTIME;
+ }
+ if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
+ READ_BUF(4);
+ len += 4;
+ READ32(dummy32);
+ switch (dummy32) {
+ case NFS4_SET_TO_CLIENT_TIME:
+ /* We require the high 32 bits of 'seconds' to be 0, and we ignore
+ all 32 bits of 'nseconds'. */
+ READ_BUF(12);
+ len += 12;
+ READ32(dummy32);
+ if (dummy32)
+ return nfserr_inval;
+ READ32(iattr->ia_mtime.tv_sec);
+ READ32(iattr->ia_mtime.tv_nsec);
+ if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
+ return nfserr_inval;
+ iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
+ break;
+ case NFS4_SET_TO_SERVER_TIME:
+ iattr->ia_valid |= ATTR_MTIME;
+ break;
+ default:
+ goto xdr_error;
+ }
+ }
+ if (len != expected_len)
+ goto xdr_error;
+
+ DECODE_TAIL;
+
+out_nfserr:
+ status = nfserrno(status);
+ goto out;
+}
+
+static int
+nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(access->ac_req_access);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
+{
+ DECODE_HEAD;
+
+ close->cl_stateowner = NULL;
+ READ_BUF(4 + sizeof(stateid_t));
+ READ32(close->cl_seqid);
+ READ32(close->cl_stateid.si_generation);
+ COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+ DECODE_TAIL;
+}
+
+
+static int
+nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
+{
+ DECODE_HEAD;
+
+ READ_BUF(12);
+ READ64(commit->co_offset);
+ READ32(commit->co_count);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(create->cr_type);
+ switch (create->cr_type) {
+ case NF4LNK:
+ READ_BUF(4);
+ READ32(create->cr_linklen);
+ READ_BUF(create->cr_linklen);
+ SAVEMEM(create->cr_linkname, create->cr_linklen);
+ break;
+ case NF4BLK:
+ case NF4CHR:
+ READ_BUF(8);
+ READ32(create->cr_specdata1);
+ READ32(create->cr_specdata2);
+ break;
+ case NF4SOCK:
+ case NF4FIFO:
+ case NF4DIR:
+ default:
+ break;
+ }
+
+ READ_BUF(4);
+ READ32(create->cr_namelen);
+ READ_BUF(create->cr_namelen);
+ SAVEMEM(create->cr_name, create->cr_namelen);
+ if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+ return status;
+
+ if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl)))
+ goto out;
+
+ DECODE_TAIL;
+}
+
+static inline int
+nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_t));
+ READ32(dr->dr_stateid.si_generation);
+ COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t));
+
+ DECODE_TAIL;
+}
+
+static inline int
+nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
+{
+ return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
+}
+
+static int
+nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(link->li_namelen);
+ READ_BUF(link->li_namelen);
+ SAVEMEM(link->li_name, link->li_namelen);
+ if ((status = check_filename(link->li_name, link->li_namelen, nfserr_inval)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+{
+ DECODE_HEAD;
+
+ lock->lk_stateowner = NULL;
+ /*
+ * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
+ */
+ READ_BUF(28);
+ READ32(lock->lk_type);
+ if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ32(lock->lk_reclaim);
+ READ64(lock->lk_offset);
+ READ64(lock->lk_length);
+ READ32(lock->lk_is_new);
+
+ if (lock->lk_is_new) {
+ READ_BUF(36);
+ READ32(lock->lk_new_open_seqid);
+ READ32(lock->lk_new_open_stateid.si_generation);
+
+ COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ32(lock->lk_new_lock_seqid);
+ COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
+ READ32(lock->lk_new_owner.len);
+ READ_BUF(lock->lk_new_owner.len);
+ READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
+ } else {
+ READ_BUF(20);
+ READ32(lock->lk_old_lock_stateid.si_generation);
+ COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ32(lock->lk_old_lock_seqid);
+ }
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
+{
+ DECODE_HEAD;
+
+ READ_BUF(32);
+ READ32(lockt->lt_type);
+ if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ64(lockt->lt_offset);
+ READ64(lockt->lt_length);
+ COPYMEM(&lockt->lt_clientid, 8);
+ READ32(lockt->lt_owner.len);
+ READ_BUF(lockt->lt_owner.len);
+ READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
+{
+ DECODE_HEAD;
+
+ locku->lu_stateowner = NULL;
+ READ_BUF(24 + sizeof(stateid_t));
+ READ32(locku->lu_type);
+ if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
+ goto xdr_error;
+ READ32(locku->lu_seqid);
+ READ32(locku->lu_stateid.si_generation);
+ COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ64(locku->lu_offset);
+ READ64(locku->lu_length);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(lookup->lo_len);
+ READ_BUF(lookup->lo_len);
+ SAVEMEM(lookup->lo_name, lookup->lo_len);
+ if ((status = check_filename(lookup->lo_name, lookup->lo_len, nfserr_noent)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ DECODE_HEAD;
+
+ memset(open->op_bmval, 0, sizeof(open->op_bmval));
+ open->op_iattr.ia_valid = 0;
+ open->op_stateowner = NULL;
+
+ /* seqid, share_access, share_deny, clientid, ownerlen */
+ READ_BUF(16 + sizeof(clientid_t));
+ READ32(open->op_seqid);
+ READ32(open->op_share_access);
+ READ32(open->op_share_deny);
+ COPYMEM(&open->op_clientid, sizeof(clientid_t));
+ READ32(open->op_owner.len);
+
+ /* owner, open_flag */
+ READ_BUF(open->op_owner.len + 4);
+ SAVEMEM(open->op_owner.data, open->op_owner.len);
+ READ32(open->op_create);
+ switch (open->op_create) {
+ case NFS4_OPEN_NOCREATE:
+ break;
+ case NFS4_OPEN_CREATE:
+ READ_BUF(4);
+ READ32(open->op_createmode);
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ case NFS4_CREATE_GUARDED:
+ if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl)))
+ goto out;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ READ_BUF(8);
+ COPYMEM(open->op_verf.data, 8);
+ break;
+ default:
+ goto xdr_error;
+ }
+ break;
+ default:
+ goto xdr_error;
+ }
+
+ /* open_claim */
+ READ_BUF(4);
+ READ32(open->op_claim_type);
+ switch (open->op_claim_type) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_DELEGATE_PREV:
+ READ_BUF(4);
+ READ32(open->op_fname.len);
+ READ_BUF(open->op_fname.len);
+ SAVEMEM(open->op_fname.data, open->op_fname.len);
+ if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+ return status;
+ break;
+ case NFS4_OPEN_CLAIM_PREVIOUS:
+ READ_BUF(4);
+ READ32(open->op_delegate_type);
+ break;
+ case NFS4_OPEN_CLAIM_DELEGATE_CUR:
+ READ_BUF(sizeof(stateid_t) + 4);
+ COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+ READ32(open->op_fname.len);
+ READ_BUF(open->op_fname.len);
+ SAVEMEM(open->op_fname.data, open->op_fname.len);
+ if ((status = check_filename(open->op_fname.data, open->op_fname.len, nfserr_inval)))
+ return status;
+ break;
+ default:
+ goto xdr_error;
+ }
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
+{
+ DECODE_HEAD;
+
+ open_conf->oc_stateowner = NULL;
+ READ_BUF(4 + sizeof(stateid_t));
+ READ32(open_conf->oc_req_stateid.si_generation);
+ COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ32(open_conf->oc_seqid);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
+{
+ DECODE_HEAD;
+
+ open_down->od_stateowner = NULL;
+ READ_BUF(12 + sizeof(stateid_t));
+ READ32(open_down->od_stateid.si_generation);
+ COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ32(open_down->od_seqid);
+ READ32(open_down->od_share_access);
+ READ32(open_down->od_share_deny);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(putfh->pf_fhlen);
+ if (putfh->pf_fhlen > NFS4_FHSIZE)
+ goto xdr_error;
+ READ_BUF(putfh->pf_fhlen);
+ SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_t) + 12);
+ READ32(read->rd_stateid.si_generation);
+ COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ64(read->rd_offset);
+ READ32(read->rd_length);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
+{
+ DECODE_HEAD;
+
+ READ_BUF(24);
+ READ64(readdir->rd_cookie);
+ COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
+ READ32(readdir->rd_dircount); /* just in case you needed a useless field... */
+ READ32(readdir->rd_maxcount);
+ if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
+ goto out;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(remove->rm_namelen);
+ READ_BUF(remove->rm_namelen);
+ SAVEMEM(remove->rm_name, remove->rm_namelen);
+ if ((status = check_filename(remove->rm_name, remove->rm_namelen, nfserr_noent)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
+{
+ DECODE_HEAD;
+
+ READ_BUF(4);
+ READ32(rename->rn_snamelen);
+ READ_BUF(rename->rn_snamelen + 4);
+ SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+ READ32(rename->rn_tnamelen);
+ READ_BUF(rename->rn_tnamelen);
+ SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
+ if ((status = check_filename(rename->rn_sname, rename->rn_snamelen, nfserr_noent)))
+ return status;
+ if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen, nfserr_inval)))
+ return status;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(clientid_t));
+ COPYMEM(clientid, sizeof(clientid_t));
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
+{
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_t));
+ READ32(setattr->sa_stateid.si_generation);
+ COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t));
+ if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl)))
+ goto out;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
+{
+ DECODE_HEAD;
+
+ READ_BUF(12);
+ COPYMEM(setclientid->se_verf.data, 8);
+ READ32(setclientid->se_namelen);
+
+ READ_BUF(setclientid->se_namelen + 8);
+ SAVEMEM(setclientid->se_name, setclientid->se_namelen);
+ READ32(setclientid->se_callback_prog);
+ READ32(setclientid->se_callback_netid_len);
+
+ READ_BUF(setclientid->se_callback_netid_len + 4);
+ SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+ READ32(setclientid->se_callback_addr_len);
+
+ READ_BUF(setclientid->se_callback_addr_len + 4);
+ SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+ READ32(setclientid->se_callback_ident);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
+{
+ DECODE_HEAD;
+
+ READ_BUF(8 + sizeof(nfs4_verifier));
+ COPYMEM(&scd_c->sc_clientid, 8);
+ COPYMEM(&scd_c->sc_confirm, sizeof(nfs4_verifier));
+
+ DECODE_TAIL;
+}
+
+/* Also used for NVERIFY */
+static int
+nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
+{
+#if 0
+ struct nfsd4_compoundargs save = {
+ .p = argp->p,
+ .end = argp->end,
+ .rqstp = argp->rqstp,
+ };
+ u32 ve_bmval[2];
+ struct iattr ve_iattr; /* request */
+ struct nfs4_acl *ve_acl; /* request */
+#endif
+ DECODE_HEAD;
+
+ if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
+ goto out;
+
+ /* For convenience's sake, we compare raw xdr'd attributes in
+ * nfsd4_proc_verify; however we still decode here just to return
+ * correct error in case of bad xdr. */
+#if 0
+ status = nfsd4_decode_fattr(ve_bmval, &ve_iattr, &ve_acl);
+ if (status == nfserr_inval) {
+ status = nfserrno(status);
+ goto out;
+ }
+#endif
+ READ_BUF(4);
+ READ32(verify->ve_attrlen);
+ READ_BUF(verify->ve_attrlen);
+ SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
+{
+ int avail;
+ int v;
+ int len;
+ DECODE_HEAD;
+
+ READ_BUF(sizeof(stateid_opaque_t) + 20);
+ READ32(write->wr_stateid.si_generation);
+ COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ64(write->wr_offset);
+ READ32(write->wr_stable_how);
+ if (write->wr_stable_how > 2)
+ goto xdr_error;
+ READ32(write->wr_buflen);
+
+ /* Sorry .. no magic macros for this.. *
+ * READ_BUF(write->wr_buflen);
+ * SAVEMEM(write->wr_buf, write->wr_buflen);
+ */
+ avail = (char*)argp->end - (char*)argp->p;
+ if (avail + argp->pagelen < write->wr_buflen) {
+ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
+ goto xdr_error;
+ }
+ write->wr_vec[0].iov_base = p;
+ write->wr_vec[0].iov_len = avail;
+ v = 0;
+ len = write->wr_buflen;
+ while (len > write->wr_vec[v].iov_len) {
+ len -= write->wr_vec[v].iov_len;
+ v++;
+ write->wr_vec[v].iov_base = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen >= PAGE_SIZE) {
+ write->wr_vec[v].iov_len = PAGE_SIZE;
+ argp->pagelen -= PAGE_SIZE;
+ } else {
+ write->wr_vec[v].iov_len = argp->pagelen;
+ argp->pagelen -= len;
+ }
+ }
+ argp->end = (u32*) (write->wr_vec[v].iov_base + write->wr_vec[v].iov_len);
+ argp->p = (u32*) (write->wr_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+ write->wr_vec[v].iov_len = len;
+ write->wr_vlen = v+1;
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
+{
+ DECODE_HEAD;
+
+ READ_BUF(12);
+ COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
+ READ32(rlockowner->rl_owner.len);
+ READ_BUF(rlockowner->rl_owner.len);
+ READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
+
+ DECODE_TAIL;
+}
+
+static int
+nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+{
+ DECODE_HEAD;
+ struct nfsd4_op *op;
+ int i;
+
+ /*
+ * XXX: According to spec, we should check the tag
+ * for UTF-8 compliance. I'm postponing this for
+ * now because it seems that some clients do use
+ * binary tags.
+ */
+ READ_BUF(4);
+ READ32(argp->taglen);
+ READ_BUF(argp->taglen + 8);
+ SAVEMEM(argp->tag, argp->taglen);
+ READ32(argp->minorversion);
+ READ32(argp->opcnt);
+
+ if (argp->taglen > NFSD4_MAX_TAGLEN)
+ goto xdr_error;
+ if (argp->opcnt > 100)
+ goto xdr_error;
+
+ if (argp->opcnt > sizeof(argp->iops)/sizeof(argp->iops[0])) {
+ argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
+ if (!argp->ops) {
+ argp->ops = argp->iops;
+ printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n");
+ goto xdr_error;
+ }
+ }
+
+ for (i = 0; i < argp->opcnt; i++) {
+ op = &argp->ops[i];
+ op->replay = NULL;
+
+ /*
+ * We can't use READ_BUF() here because we need to handle
+ * a missing opcode as an OP_WRITE + 1. So we need to check
+ * to see if we're truly at the end of our buffer or if there
+ * is another page we need to flip to.
+ */
+
+ if (argp->p == argp->end) {
+ if (argp->pagelen < 4) {
+ /* There isn't an opcode still on the wire */
+ op->opnum = OP_WRITE + 1;
+ op->status = nfserr_bad_xdr;
+ argp->opcnt = i+1;
+ break;
+ }
+
+ /*
+ * False alarm. We just hit a page boundary, but there
+ * is still data available. Move pointer across page
+ * boundary. *snip from READ_BUF*
+ */
+ argp->p = page_address(argp->pagelist[0]);
+ argp->pagelist++;
+ if (argp->pagelen < PAGE_SIZE) {
+ argp->end = p + (argp->pagelen>>2);
+ argp->pagelen = 0;
+ } else {
+ argp->end = p + (PAGE_SIZE>>2);
+ argp->pagelen -= PAGE_SIZE;
+ }
+ }
+ op->opnum = ntohl(*argp->p++);
+
+ switch (op->opnum) {
+ case 2: /* Reserved operation */
+ op->opnum = OP_ILLEGAL;
+ if (argp->minorversion == 0)
+ op->status = nfserr_op_illegal;
+ else
+ op->status = nfserr_minor_vers_mismatch;
+ break;
+ case OP_ACCESS:
+ op->status = nfsd4_decode_access(argp, &op->u.access);
+ break;
+ case OP_CLOSE:
+ op->status = nfsd4_decode_close(argp, &op->u.close);
+ break;
+ case OP_COMMIT:
+ op->status = nfsd4_decode_commit(argp, &op->u.commit);
+ break;
+ case OP_CREATE:
+ op->status = nfsd4_decode_create(argp, &op->u.create);
+ break;
+ case OP_DELEGRETURN:
+ op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
+ break;
+ case OP_GETATTR:
+ op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
+ break;
+ case OP_GETFH:
+ op->status = nfs_ok;
+ break;
+ case OP_LINK:
+ op->status = nfsd4_decode_link(argp, &op->u.link);
+ break;
+ case OP_LOCK:
+ op->status = nfsd4_decode_lock(argp, &op->u.lock);
+ break;
+ case OP_LOCKT:
+ op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
+ break;
+ case OP_LOCKU:
+ op->status = nfsd4_decode_locku(argp, &op->u.locku);
+ break;
+ case OP_LOOKUP:
+ op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
+ break;
+ case OP_LOOKUPP:
+ op->status = nfs_ok;
+ break;
+ case OP_NVERIFY:
+ op->status = nfsd4_decode_verify(argp, &op->u.nverify);
+ break;
+ case OP_OPEN:
+ op->status = nfsd4_decode_open(argp, &op->u.open);
+ break;
+ case OP_OPEN_CONFIRM:
+ op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
+ break;
+ case OP_OPEN_DOWNGRADE:
+ op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
+ break;
+ case OP_PUTFH:
+ op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
+ break;
+ case OP_PUTROOTFH:
+ op->status = nfs_ok;
+ break;
+ case OP_READ:
+ op->status = nfsd4_decode_read(argp, &op->u.read);
+ break;
+ case OP_READDIR:
+ op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
+ break;
+ case OP_READLINK:
+ op->status = nfs_ok;
+ break;
+ case OP_REMOVE:
+ op->status = nfsd4_decode_remove(argp, &op->u.remove);
+ break;
+ case OP_RENAME:
+ op->status = nfsd4_decode_rename(argp, &op->u.rename);
+ break;
+ case OP_RESTOREFH:
+ op->status = nfs_ok;
+ break;
+ case OP_RENEW:
+ op->status = nfsd4_decode_renew(argp, &op->u.renew);
+ break;
+ case OP_SAVEFH:
+ op->status = nfs_ok;
+ break;
+ case OP_SETATTR:
+ op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
+ break;
+ case OP_SETCLIENTID:
+ op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
+ break;
+ case OP_SETCLIENTID_CONFIRM:
+ op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
+ break;
+ case OP_VERIFY:
+ op->status = nfsd4_decode_verify(argp, &op->u.verify);
+ break;
+ case OP_WRITE:
+ op->status = nfsd4_decode_write(argp, &op->u.write);
+ break;
+ case OP_RELEASE_LOCKOWNER:
+ op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
+ break;
+ default:
+ op->opnum = OP_ILLEGAL;
+ op->status = nfserr_op_illegal;
+ break;
+ }
+
+ if (op->status) {
+ argp->opcnt = i+1;
+ break;
+ }
+ }
+
+ DECODE_TAIL;
+}
+/*
+ * END OF "GENERIC" DECODE ROUTINES.
+ */
+
+/*
+ * START OF "GENERIC" ENCODE ROUTINES.
+ * These may look a little ugly since they are imported from a "generic"
+ * set of XDR encode/decode routines which are intended to be shared by
+ * all of our NFSv4 implementations (OpenBSD, MacOS X...).
+ *
+ * If the pain of reading these is too great, it should be a straightforward
+ * task to translate them into Linux-specific versions which are more
+ * consistent with the style used in NFSv2/v3...
+ */
+#define ENCODE_HEAD u32 *p
+
+#define WRITE32(n) *p++ = htonl(n)
+#define WRITE64(n) do { \
+ *p++ = htonl((u32)((n) >> 32)); \
+ *p++ = htonl((u32)(n)); \
+} while (0)
+#define WRITEMEM(ptr,nbytes) do { \
+ *(p + XDR_QUADLEN(nbytes) -1) = 0; \
+ memcpy(p, ptr, nbytes); \
+ p += XDR_QUADLEN(nbytes); \
+} while (0)
+#define WRITECINFO(c) do { \
+ *p++ = htonl(c.atomic); \
+ *p++ = htonl(c.before_ctime_sec); \
+ *p++ = htonl(c.before_ctime_nsec); \
+ *p++ = htonl(c.after_ctime_sec); \
+ *p++ = htonl(c.after_ctime_nsec); \
+} while (0)
+
+#define RESERVE_SPACE(nbytes) do { \
+ p = resp->p; \
+ BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \
+} while (0)
+#define ADJUST_ARGS() resp->p = p
+
+/*
+ * Header routine to setup seqid operation replay cache
+ */
+#define ENCODE_SEQID_OP_HEAD \
+ u32 *p; \
+ u32 *save; \
+ \
+ save = resp->p;
+
+/*
+ * Routine for encoding the result of a
+ * "seqid-mutating" NFSv4 operation. This is
+ * where seqids are incremented, and the
+ * replay cache is filled.
+ */
+
+#define ENCODE_SEQID_OP_TAIL(stateowner) do { \
+ if (seqid_mutating_err(nfserr) && stateowner) { \
+ if (stateowner->so_confirmed) \
+ stateowner->so_seqid++; \
+ stateowner->so_replay.rp_status = nfserr; \
+ stateowner->so_replay.rp_buflen = \
+ (((char *)(resp)->p - (char *)save)); \
+ memcpy(stateowner->so_replay.rp_buf, save, \
+ stateowner->so_replay.rp_buflen); \
+ } } while (0);
+
+
+static u32 nfs4_ftypes[16] = {
+ NF4BAD, NF4FIFO, NF4CHR, NF4BAD,
+ NF4DIR, NF4BAD, NF4BLK, NF4BAD,
+ NF4REG, NF4BAD, NF4LNK, NF4BAD,
+ NF4SOCK, NF4BAD, NF4LNK, NF4BAD,
+};
+
+static int
+nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+ u32 **p, int *buflen)
+{
+ int status;
+
+ if (*buflen < (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4)
+ return nfserr_resource;
+ if (whotype != NFS4_ACL_WHO_NAMED)
+ status = nfs4_acl_write_who(whotype, (u8 *)(*p + 1));
+ else if (group)
+ status = nfsd_map_gid_to_name(rqstp, id, (u8 *)(*p + 1));
+ else
+ status = nfsd_map_uid_to_name(rqstp, id, (u8 *)(*p + 1));
+ if (status < 0)
+ return nfserrno(status);
+ *p = xdr_encode_opaque(*p, NULL, status);
+ *buflen -= (XDR_QUADLEN(status) << 2) + 4;
+ BUG_ON(*buflen < 0);
+ return 0;
+}
+
+static inline int
+nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
+}
+
+static inline int
+nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
+}
+
+static inline int
+nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
+ u32 **p, int *buflen)
+{
+ return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
+}
+
+
+/*
+ * Note: @fhp can be NULL; in this case, we might have to compose the filehandle
+ * ourselves.
+ *
+ * @countp is the buffer size in _words_; upon successful return this becomes
+ * replaced with the number of words written.
+ */
+int
+nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval,
+ struct svc_rqst *rqstp)
+{
+ u32 bmval0 = bmval[0];
+ u32 bmval1 = bmval[1];
+ struct kstat stat;
+ struct svc_fh tempfh;
+ struct kstatfs statfs;
+ int buflen = *countp << 2;
+ u32 *attrlenp;
+ u32 dummy;
+ u64 dummy64;
+ u32 *p = buffer;
+ int status;
+ int aclsupport = 0;
+ struct nfs4_acl *acl = NULL;
+
+ BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
+ BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
+ BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+
+ status = vfs_getattr(exp->ex_mnt, dentry, &stat);
+ if (status)
+ goto out_nfserr;
+ if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL)) ||
+ (bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+ FATTR4_WORD1_SPACE_TOTAL))) {
+ status = vfs_statfs(dentry->d_inode->i_sb, &statfs);
+ if (status)
+ goto out_nfserr;
+ }
+ if ((bmval0 & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) && !fhp) {
+ fh_init(&tempfh, NFS4_FHSIZE);
+ status = fh_compose(&tempfh, exp, dentry, NULL);
+ if (status)
+ goto out;
+ fhp = &tempfh;
+ }
+ if (bmval0 & (FATTR4_WORD0_ACL | FATTR4_WORD0_ACLSUPPORT
+ | FATTR4_WORD0_SUPPORTED_ATTRS)) {
+ status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
+ aclsupport = (status == 0);
+ if (bmval0 & FATTR4_WORD0_ACL) {
+ if (status == -EOPNOTSUPP)
+ bmval0 &= ~FATTR4_WORD0_ACL;
+ else if (status == -EINVAL) {
+ status = nfserr_attrnotsupp;
+ goto out;
+ } else if (status != 0)
+ goto out_nfserr;
+ }
+ }
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+
+ WRITE32(2);
+ WRITE32(bmval0);
+ WRITE32(bmval1);
+ attrlenp = p++; /* to be backfilled later */
+
+ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(2);
+ WRITE32(aclsupport ?
+ NFSD_SUPPORTED_ATTRS_WORD0 :
+ NFSD_SUPPORTED_ATTRS_WORD0 & ~FATTR4_WORD0_ACL);
+ WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
+ }
+ if (bmval0 & FATTR4_WORD0_TYPE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ dummy = nfs4_ftypes[(stat.mode & S_IFMT) >> 12];
+ if (dummy == NF4BAD)
+ goto out_serverfault;
+ WRITE32(dummy);
+ }
+ if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32( NFS4_FH_NOEXPIRE_WITH_OPEN | NFS4_FH_VOL_RENAME );
+ }
+ if (bmval0 & FATTR4_WORD0_CHANGE) {
+ /*
+ * Note: This _must_ be consistent with the scheme for writing
+ * change_info, so any changes made here must be reflected there
+ * as well. (See xdr4.h:set_change_info() and the WRITECINFO()
+ * macro above.)
+ */
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE32(stat.ctime.tv_sec);
+ WRITE32(stat.ctime.tv_nsec);
+ }
+ if (bmval0 & FATTR4_WORD0_SIZE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64(stat.size);
+ }
+ if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(0);
+ }
+ if (bmval0 & FATTR4_WORD0_FSID) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+ if (is_fsid(fhp, rqstp->rq_reffh)) {
+ WRITE64((u64)exp->ex_fsid);
+ WRITE64((u64)0);
+ } else {
+ WRITE32(0);
+ WRITE32(MAJOR(stat.dev));
+ WRITE32(0);
+ WRITE32(MINOR(stat.dev));
+ }
+ }
+ if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(0);
+ }
+ if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(NFSD_LEASE_TIME);
+ }
+ if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(0);
+ }
+ if (bmval0 & FATTR4_WORD0_ACL) {
+ struct nfs4_ace *ace;
+ struct list_head *h;
+
+ if (acl == NULL) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+
+ WRITE32(0);
+ goto out_acl;
+ }
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(acl->naces);
+
+ list_for_each(h, &acl->ace_head) {
+ ace = list_entry(h, struct nfs4_ace, l_ace);
+
+ if ((buflen -= 4*3) < 0)
+ goto out_resource;
+ WRITE32(ace->type);
+ WRITE32(ace->flag);
+ WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
+ status = nfsd4_encode_aclname(rqstp, ace->whotype,
+ ace->who, ace->flag & NFS4_ACE_IDENTIFIER_GROUP,
+ &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ }
+out_acl:
+ if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(aclsupport ?
+ ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
+ }
+ if (bmval0 & FATTR4_WORD0_CANSETTIME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
+ buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4;
+ if (buflen < 0)
+ goto out_resource;
+ WRITE32(fhp->fh_handle.fh_size);
+ WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size);
+ }
+ if (bmval0 & FATTR4_WORD0_FILEID) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) stat.ino);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_ffree);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_ffree);
+ }
+ if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) statfs.f_files);
+ }
+ if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64(~(u64)0);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXLINK) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(255);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXNAME) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(~(u32) 0);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXREAD) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ }
+ if (bmval0 & FATTR4_WORD0_MAXWRITE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE64((u64) NFSSVC_MAXBLKSIZE);
+ }
+ if (bmval1 & FATTR4_WORD1_MODE) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(stat.mode & S_IALLUGO);
+ }
+ if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(1);
+ }
+ if (bmval1 & FATTR4_WORD1_NUMLINKS) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+ WRITE32(stat.nlink);
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER) {
+ status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+ status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
+ if (status == nfserr_resource)
+ goto out_resource;
+ if (status)
+ goto out;
+ }
+ if (bmval1 & FATTR4_WORD1_RAWDEV) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ WRITE32((u32) MAJOR(stat.rdev));
+ WRITE32((u32) MINOR(stat.rdev));
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ dummy64 = (u64)stat.blocks << 9;
+ WRITE64(dummy64);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.atime.tv_sec);
+ WRITE32(stat.atime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(1);
+ WRITE32(0);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.ctime.tv_sec);
+ WRITE32(stat.ctime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
+ if ((buflen -= 12) < 0)
+ goto out_resource;
+ WRITE32(0);
+ WRITE32(stat.mtime.tv_sec);
+ WRITE32(stat.mtime.tv_nsec);
+ }
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
+ struct dentry *mnt_pnt, *mnt_root;
+
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+ mnt_root = exp->ex_mnt->mnt_root;
+ if (mnt_root->d_inode == dentry->d_inode) {
+ mnt_pnt = exp->ex_mnt->mnt_mountpoint;
+ WRITE64((u64) mnt_pnt->d_inode->i_ino);
+ } else
+ WRITE64((u64) stat.ino);
+ }
+ *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+ *countp = p - buffer;
+ status = nfs_ok;
+
+out:
+ nfs4_acl_free(acl);
+ if (fhp == &tempfh)
+ fh_put(&tempfh);
+ return status;
+out_nfserr:
+ status = nfserrno(status);
+ goto out;
+out_resource:
+ *countp = 0;
+ status = nfserr_resource;
+ goto out;
+out_serverfault:
+ status = nfserr_serverfault;
+ goto out;
+}
+
+static int
+nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
+ const char *name, int namlen, u32 *p, int *buflen)
+{
+ struct svc_export *exp = cd->rd_fhp->fh_export;
+ struct dentry *dentry;
+ int nfserr;
+
+ dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
+ if (IS_ERR(dentry))
+ return nfserrno(PTR_ERR(dentry));
+
+ exp_get(exp);
+ if (d_mountpoint(dentry)) {
+ if (nfsd_cross_mnt(cd->rd_rqstp, &dentry, &exp)) {
+ /*
+ * -EAGAIN is the only error returned from
+ * nfsd_cross_mnt() and it indicates that an
+ * up-call has been initiated to fill in the export
+ * options on exp. When the answer comes back,
+ * this call will be retried.
+ */
+ nfserr = nfserr_dropit;
+ goto out_put;
+ }
+
+ }
+ nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+ cd->rd_rqstp);
+out_put:
+ dput(dentry);
+ exp_put(exp);
+ return nfserr;
+}
+
+static u32 *
+nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
+{
+ u32 *attrlenp;
+
+ if (buflen < 6)
+ return NULL;
+ *p++ = htonl(2);
+ *p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
+ *p++ = htonl(0); /* bmval1 */
+
+ attrlenp = p++;
+ *p++ = nfserr; /* no htonl */
+ *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
+ return p;
+}
+
+static int
+nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
+ loff_t offset, ino_t ino, unsigned int d_type)
+{
+ struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
+ int buflen;
+ u32 *p = cd->buffer;
+ int nfserr = nfserr_toosmall;
+
+ /* In nfsv4, "." and ".." never make it onto the wire.. */
+ if (name && isdotent(name, namlen)) {
+ cd->common.err = nfs_ok;
+ return 0;
+ }
+
+ if (cd->offset)
+ xdr_encode_hyper(cd->offset, (u64) offset);
+
+ buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
+ if (buflen < 0)
+ goto fail;
+
+ *p++ = xdr_one; /* mark entry present */
+ cd->offset = p; /* remember pointer */
+ p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
+ p = xdr_encode_array(p, name, namlen); /* name length & name */
+
+ nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, p, &buflen);
+ switch (nfserr) {
+ case nfs_ok:
+ p += buflen;
+ break;
+ case nfserr_resource:
+ nfserr = nfserr_toosmall;
+ goto fail;
+ case nfserr_dropit:
+ goto fail;
+ default:
+ /*
+ * If the client requested the RDATTR_ERROR attribute,
+ * we stuff the error code into this attribute
+ * and continue. If this attribute was not requested,
+ * then in accordance with the spec, we fail the
+ * entire READDIR operation(!)
+ */
+ if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
+ goto fail;
+ nfserr = nfserr_toosmall;
+ p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+ if (p == NULL)
+ goto fail;
+ }
+ cd->buflen -= (p - cd->buffer);
+ cd->buffer = p;
+ cd->common.err = nfs_ok;
+ return 0;
+fail:
+ cd->common.err = nfserr;
+ return -EINVAL;
+}
+
+static void
+nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_access *access)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8);
+ WRITE32(access->ac_supported);
+ WRITE32(access->ac_resp_access);
+ ADJUST_ARGS();
+ }
+}
+
+static void
+nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(sizeof(stateid_t));
+ WRITE32(close->cl_stateid.si_generation);
+ WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+ }
+ ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
+}
+
+
+static void
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_commit *commit)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8);
+ WRITEMEM(commit->co_verf.data, 8);
+ ADJUST_ARGS();
+ }
+}
+
+static void
+nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create *create)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(32);
+ WRITECINFO(create->cr_cinfo);
+ WRITE32(2);
+ WRITE32(create->cr_bmval[0]);
+ WRITE32(create->cr_bmval[1]);
+ ADJUST_ARGS();
+ }
+}
+
+static int
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_getattr *getattr)
+{
+ struct svc_fh *fhp = getattr->ga_fhp;
+ int buflen;
+
+ if (nfserr)
+ return nfserr;
+
+ buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
+ nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
+ resp->p, &buflen, getattr->ga_bmval,
+ resp->rqstp);
+
+ if (!nfserr)
+ resp->p += buflen;
+ return nfserr;
+}
+
+static void
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, int nfserr, struct svc_fh *fhp)
+{
+ unsigned int len;
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ len = fhp->fh_handle.fh_size;
+ RESERVE_SPACE(len + 4);
+ WRITE32(len);
+ WRITEMEM(&fhp->fh_handle.fh_base, len);
+ ADJUST_ARGS();
+ }
+}
+
+/*
+* Including all fields other than the name, a LOCK4denied structure requires
+* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
+*/
+static void
+nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
+{
+ ENCODE_HEAD;
+
+ RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0));
+ WRITE64(ld->ld_start);
+ WRITE64(ld->ld_length);
+ WRITE32(ld->ld_type);
+ if (ld->ld_sop) {
+ WRITEMEM(&ld->ld_clientid, 8);
+ WRITE32(ld->ld_sop->so_owner.len);
+ WRITEMEM(ld->ld_sop->so_owner.data, ld->ld_sop->so_owner.len);
+ kref_put(&ld->ld_sop->so_ref, nfs4_free_stateowner);
+ } else { /* non - nfsv4 lock in conflict, no clientid nor owner */
+ WRITE64((u64)0); /* clientid */
+ WRITE32(0); /* length of owner name */
+ }
+ ADJUST_ARGS();
+}
+
+static void
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
+{
+
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(4 + sizeof(stateid_t));
+ WRITE32(lock->lk_resp_stateid.si_generation);
+ WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+ } else if (nfserr == nfserr_denied)
+ nfsd4_encode_lock_denied(resp, &lock->lk_denied);
+
+ ENCODE_SEQID_OP_TAIL(lock->lk_stateowner);
+}
+
+static void
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lockt *lockt)
+{
+ if (nfserr == nfserr_denied)
+ nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+}
+
+static void
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_locku *locku)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(sizeof(stateid_t));
+ WRITE32(locku->lu_stateid.si_generation);
+ WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+ }
+
+ ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
+}
+
+
+static void
+nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link *link)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(20);
+ WRITECINFO(link->li_cinfo);
+ ADJUST_ARGS();
+ }
+}
+
+
+static void
+nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (nfserr)
+ goto out;
+
+ RESERVE_SPACE(36 + sizeof(stateid_t));
+ WRITE32(open->op_stateid.si_generation);
+ WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t));
+ WRITECINFO(open->op_cinfo);
+ WRITE32(open->op_rflags);
+ WRITE32(2);
+ WRITE32(open->op_bmval[0]);
+ WRITE32(open->op_bmval[1]);
+ WRITE32(open->op_delegate_type);
+ ADJUST_ARGS();
+
+ switch (open->op_delegate_type) {
+ case NFS4_OPEN_DELEGATE_NONE:
+ break;
+ case NFS4_OPEN_DELEGATE_READ:
+ RESERVE_SPACE(20 + sizeof(stateid_t));
+ WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+ WRITE32(0);
+
+ /*
+ * TODO: ACE's in delegations
+ */
+ WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ WRITE32(0);
+ WRITE32(0);
+ WRITE32(0); /* XXX: is NULL principal ok? */
+ ADJUST_ARGS();
+ break;
+ case NFS4_OPEN_DELEGATE_WRITE:
+ RESERVE_SPACE(32 + sizeof(stateid_t));
+ WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t));
+ WRITE32(0);
+
+ /*
+ * TODO: space_limit's in delegations
+ */
+ WRITE32(NFS4_LIMIT_SIZE);
+ WRITE32(~(u32)0);
+ WRITE32(~(u32)0);
+
+ /*
+ * TODO: ACE's in delegations
+ */
+ WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ WRITE32(0);
+ WRITE32(0);
+ WRITE32(0); /* XXX: is NULL principal ok? */
+ ADJUST_ARGS();
+ break;
+ default:
+ BUG();
+ }
+ /* XXX save filehandle here */
+out:
+ ENCODE_SEQID_OP_TAIL(open->op_stateowner);
+}
+
+static void
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(sizeof(stateid_t));
+ WRITE32(oc->oc_resp_stateid.si_generation);
+ WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+ }
+
+ ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
+}
+
+static void
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od)
+{
+ ENCODE_SEQID_OP_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(sizeof(stateid_t));
+ WRITE32(od->od_stateid.si_generation);
+ WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t));
+ ADJUST_ARGS();
+ }
+
+ ENCODE_SEQID_OP_TAIL(od->od_stateowner);
+}
+
+static int
+nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_read *read)
+{
+ u32 eof;
+ int v, pn;
+ unsigned long maxcount;
+ long len;
+ ENCODE_HEAD;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ RESERVE_SPACE(8); /* eof flag and byte count */
+
+ maxcount = NFSSVC_MAXBLKSIZE;
+ if (maxcount > read->rd_length)
+ maxcount = read->rd_length;
+
+ len = maxcount;
+ v = 0;
+ while (len > 0) {
+ pn = resp->rqstp->rq_resused;
+ svc_take_page(resp->rqstp);
+ read->rd_iov[v].iov_base = page_address(resp->rqstp->rq_respages[pn]);
+ read->rd_iov[v].iov_len = len < PAGE_SIZE ? len : PAGE_SIZE;
+ v++;
+ len -= PAGE_SIZE;
+ }
+ read->rd_vlen = v;
+
+ nfserr = nfsd_read(read->rd_rqstp, read->rd_fhp, read->rd_filp,
+ read->rd_offset, read->rd_iov, read->rd_vlen,
+ &maxcount);
+
+ if (nfserr == nfserr_symlink)
+ nfserr = nfserr_inval;
+ if (nfserr)
+ return nfserr;
+ eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+
+ WRITE32(eof);
+ WRITE32(maxcount);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+ resp->xbuf->page_len = maxcount;
+
+ /* read zero bytes -> don't set up tail */
+ if(!maxcount)
+ return 0;
+
+ /* set up page for remaining responses */
+ svc_take_page(resp->rqstp);
+ resp->xbuf->tail[0].iov_base =
+ page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ resp->xbuf->tail[0].iov_len = 0;
+ resp->p = resp->xbuf->tail[0].iov_base;
+ resp->end = resp->p + PAGE_SIZE/4;
+
+ if (maxcount&3) {
+ *(resp->p)++ = 0;
+ resp->xbuf->tail[0].iov_base += maxcount&3;
+ resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ }
+ return 0;
+}
+
+static int
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readlink *readlink)
+{
+ int maxcount;
+ char *page;
+ ENCODE_HEAD;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ svc_take_page(resp->rqstp);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+
+ maxcount = PAGE_SIZE;
+ RESERVE_SPACE(4);
+
+ /*
+ * XXX: By default, the ->readlink() VFS op will truncate symlinks
+ * if they would overflow the buffer. Is this kosher in NFSv4? If
+ * not, one easy fix is: if ->readlink() precisely fills the buffer,
+ * assume that truncation occurred, and return NFS4ERR_RESOURCE.
+ */
+ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
+ if (nfserr == nfserr_isdir)
+ return nfserr_inval;
+ if (nfserr)
+ return nfserr;
+
+ WRITE32(maxcount);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+ svc_take_page(resp->rqstp);
+ resp->xbuf->tail[0].iov_base =
+ page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ resp->xbuf->tail[0].iov_len = 0;
+ resp->p = resp->xbuf->tail[0].iov_base;
+ resp->end = resp->p + PAGE_SIZE/4;
+
+ resp->xbuf->page_len = maxcount;
+ if (maxcount&3) {
+ *(resp->p)++ = 0;
+ resp->xbuf->tail[0].iov_base += maxcount&3;
+ resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
+ }
+ return 0;
+}
+
+static int
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readdir *readdir)
+{
+ int maxcount;
+ loff_t offset;
+ u32 *page, *savep;
+ ENCODE_HEAD;
+
+ if (nfserr)
+ return nfserr;
+ if (resp->xbuf->page_len)
+ return nfserr_resource;
+
+ RESERVE_SPACE(8); /* verifier */
+ savep = p;
+
+ /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
+ WRITE32(0);
+ WRITE32(0);
+ ADJUST_ARGS();
+ resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
+
+ maxcount = PAGE_SIZE;
+ if (maxcount > readdir->rd_maxcount)
+ maxcount = readdir->rd_maxcount;
+
+ /*
+ * Convert from bytes to words, account for the two words already
+ * written, make sure to leave two words at the end for the next
+ * pointer and eof field.
+ */
+ maxcount = (maxcount >> 2) - 4;
+ if (maxcount < 0) {
+ nfserr = nfserr_toosmall;
+ goto err_no_verf;
+ }
+
+ svc_take_page(resp->rqstp);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ readdir->common.err = 0;
+ readdir->buflen = maxcount;
+ readdir->buffer = page;
+ readdir->offset = NULL;
+
+ offset = readdir->rd_cookie;
+ nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
+ &offset,
+ &readdir->common, nfsd4_encode_dirent);
+ if (nfserr == nfs_ok &&
+ readdir->common.err == nfserr_toosmall &&
+ readdir->buffer == page)
+ nfserr = nfserr_toosmall;
+ if (nfserr == nfserr_symlink)
+ nfserr = nfserr_notdir;
+ if (nfserr)
+ goto err_no_verf;
+
+ if (readdir->offset)
+ xdr_encode_hyper(readdir->offset, offset);
+
+ p = readdir->buffer;
+ *p++ = 0; /* no more entries */
+ *p++ = htonl(readdir->common.err == nfserr_eof);
+ resp->xbuf->page_len = ((char*)p) - (char*)page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+
+ /* allocate a page for the tail */
+ svc_take_page(resp->rqstp);
+ resp->xbuf->tail[0].iov_base =
+ page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+ resp->rqstp->rq_restailpage = resp->rqstp->rq_resused-1;
+ resp->xbuf->tail[0].iov_len = 0;
+ resp->p = resp->xbuf->tail[0].iov_base;
+ resp->end = resp->p + PAGE_SIZE/4;
+
+ return 0;
+err_no_verf:
+ p = savep;
+ ADJUST_ARGS();
+ return nfserr;
+}
+
+static void
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_remove *remove)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(20);
+ WRITECINFO(remove->rm_cinfo);
+ ADJUST_ARGS();
+ }
+}
+
+static void
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rename *rename)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(40);
+ WRITECINFO(rename->rn_sinfo);
+ WRITECINFO(rename->rn_tinfo);
+ ADJUST_ARGS();
+ }
+}
+
+/*
+ * The SETATTR encode routine is special -- it always encodes a bitmap,
+ * regardless of the error status.
+ */
+static void
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setattr *setattr)
+{
+ ENCODE_HEAD;
+
+ RESERVE_SPACE(12);
+ if (nfserr) {
+ WRITE32(2);
+ WRITE32(0);
+ WRITE32(0);
+ }
+ else {
+ WRITE32(2);
+ WRITE32(setattr->sa_bmval[0]);
+ WRITE32(setattr->sa_bmval[1]);
+ }
+ ADJUST_ARGS();
+}
+
+static void
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setclientid *scd)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(8 + sizeof(nfs4_verifier));
+ WRITEMEM(&scd->se_clientid, 8);
+ WRITEMEM(&scd->se_confirm, sizeof(nfs4_verifier));
+ ADJUST_ARGS();
+ }
+ else if (nfserr == nfserr_clid_inuse) {
+ RESERVE_SPACE(8);
+ WRITE32(0);
+ WRITE32(0);
+ ADJUST_ARGS();
+ }
+}
+
+static void
+nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_write *write)
+{
+ ENCODE_HEAD;
+
+ if (!nfserr) {
+ RESERVE_SPACE(16);
+ WRITE32(write->wr_bytes_written);
+ WRITE32(write->wr_how_written);
+ WRITEMEM(write->wr_verifier.data, 8);
+ ADJUST_ARGS();
+ }
+}
+
+void
+nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+ u32 *statp;
+ ENCODE_HEAD;
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+ statp = p++; /* to be backfilled at the end */
+ ADJUST_ARGS();
+
+ switch (op->opnum) {
+ case OP_ACCESS:
+ nfsd4_encode_access(resp, op->status, &op->u.access);
+ break;
+ case OP_CLOSE:
+ nfsd4_encode_close(resp, op->status, &op->u.close);
+ break;
+ case OP_COMMIT:
+ nfsd4_encode_commit(resp, op->status, &op->u.commit);
+ break;
+ case OP_CREATE:
+ nfsd4_encode_create(resp, op->status, &op->u.create);
+ break;
+ case OP_DELEGRETURN:
+ break;
+ case OP_GETATTR:
+ op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
+ break;
+ case OP_GETFH:
+ nfsd4_encode_getfh(resp, op->status, op->u.getfh);
+ break;
+ case OP_LINK:
+ nfsd4_encode_link(resp, op->status, &op->u.link);
+ break;
+ case OP_LOCK:
+ nfsd4_encode_lock(resp, op->status, &op->u.lock);
+ break;
+ case OP_LOCKT:
+ nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
+ break;
+ case OP_LOCKU:
+ nfsd4_encode_locku(resp, op->status, &op->u.locku);
+ break;
+ case OP_LOOKUP:
+ break;
+ case OP_LOOKUPP:
+ break;
+ case OP_NVERIFY:
+ break;
+ case OP_OPEN:
+ nfsd4_encode_open(resp, op->status, &op->u.open);
+ break;
+ case OP_OPEN_CONFIRM:
+ nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
+ break;
+ case OP_OPEN_DOWNGRADE:
+ nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
+ break;
+ case OP_PUTFH:
+ break;
+ case OP_PUTROOTFH:
+ break;
+ case OP_READ:
+ op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
+ break;
+ case OP_READDIR:
+ op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
+ break;
+ case OP_READLINK:
+ op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
+ break;
+ case OP_REMOVE:
+ nfsd4_encode_remove(resp, op->status, &op->u.remove);
+ break;
+ case OP_RENAME:
+ nfsd4_encode_rename(resp, op->status, &op->u.rename);
+ break;
+ case OP_RENEW:
+ break;
+ case OP_RESTOREFH:
+ break;
+ case OP_SAVEFH:
+ break;
+ case OP_SETATTR:
+ nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
+ break;
+ case OP_SETCLIENTID:
+ nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
+ break;
+ case OP_SETCLIENTID_CONFIRM:
+ break;
+ case OP_VERIFY:
+ break;
+ case OP_WRITE:
+ nfsd4_encode_write(resp, op->status, &op->u.write);
+ break;
+ case OP_RELEASE_LOCKOWNER:
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * Note: We write the status directly, instead of using WRITE32(),
+ * since it is already in network byte order.
+ */
+ *statp = op->status;
+}
+
+/*
+ * Encode the reply stored in the stateowner reply cache
+ *
+ * XDR note: do not encode rp->rp_buflen: the buffer contains the
+ * previously sent already encoded operation.
+ *
+ * called with nfs4_lock_state() held
+ */
+void
+nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+{
+ ENCODE_HEAD;
+ struct nfs4_replay *rp = op->replay;
+
+ BUG_ON(!rp);
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+ *p++ = rp->rp_status; /* already xdr'ed */
+ ADJUST_ARGS();
+
+ RESERVE_SPACE(rp->rp_buflen);
+ WRITEMEM(rp->rp_buf, rp->rp_buflen);
+ ADJUST_ARGS();
+}
+
+/*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+int
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
+{
+ if (args->ops != args->iops) {
+ kfree(args->ops);
+ args->ops = args->iops;
+ }
+ if (args->tmpp) {
+ kfree(args->tmpp);
+ args->tmpp = NULL;
+ }
+ while (args->to_free) {
+ struct tmpbuf *tb = args->to_free;
+ args->to_free = tb->next;
+ tb->release(tb->buf);
+ kfree(tb);
+ }
+}
+
+int
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args)
+{
+ int status;
+
+ args->p = p;
+ args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
+ args->pagelist = rqstp->rq_arg.pages;
+ args->pagelen = rqstp->rq_arg.page_len;
+ args->tmpp = NULL;
+ args->to_free = NULL;
+ args->ops = args->iops;
+ args->rqstp = rqstp;
+
+ status = nfsd4_decode_compound(args);
+ if (status) {
+ nfsd4_release_compoundargs(args);
+ }
+ return !status;
+}
+
+int
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundres *resp)
+{
+ /*
+ * All that remains is to write the tag and operation count...
+ */
+ struct kvec *iov;
+ p = resp->tagp;
+ *p++ = htonl(resp->taglen);
+ memcpy(p, resp->tag, resp->taglen);
+ p += XDR_QUADLEN(resp->taglen);
+ *p++ = htonl(resp->opcnt);
+
+ if (rqstp->rq_res.page_len)
+ iov = &rqstp->rq_res.tail[0];
+ else
+ iov = &rqstp->rq_res.head[0];
+ iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
+ BUG_ON(iov->iov_len > PAGE_SIZE);
+ return 1;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
new file mode 100644
index 000000000000..119e4d4495b8
--- /dev/null
+++ b/fs/nfsd/nfscache.c
@@ -0,0 +1,328 @@
+/*
+ * linux/fs/nfsd/nfscache.c
+ *
+ * Request reply cache. This is currently a global cache, but this may
+ * change in the future and be a per-client cache.
+ *
+ * This code is heavily inspired by the 44BSD implementation, although
+ * it does things a bit differently.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+
+/* Size of reply cache. Common values are:
+ * 4.3BSD: 128
+ * 4.4BSD: 256
+ * Solaris2: 1024
+ * DEC Unix: 512-4096
+ */
+#define CACHESIZE 1024
+#define HASHSIZE 64
+#define REQHASH(xid) ((((xid) >> 24) ^ (xid)) & (HASHSIZE-1))
+
+static struct hlist_head * hash_list;
+static struct list_head lru_head;
+static int cache_disabled = 1;
+
+static int nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
+
+/*
+ * locking for the reply cache:
+ * A cache entry is "single use" if c_state == RC_INPROG
+ * Otherwise, it when accessing _prev or _next, the lock must be held.
+ */
+static DEFINE_SPINLOCK(cache_lock);
+
+void
+nfsd_cache_init(void)
+{
+ struct svc_cacherep *rp;
+ int i;
+
+ INIT_LIST_HEAD(&lru_head);
+ i = CACHESIZE;
+ while(i) {
+ rp = kmalloc(sizeof(*rp), GFP_KERNEL);
+ if (!rp) break;
+ list_add(&rp->c_lru, &lru_head);
+ rp->c_state = RC_UNUSED;
+ rp->c_type = RC_NOCACHE;
+ INIT_HLIST_NODE(&rp->c_hash);
+ i--;
+ }
+
+ if (i)
+ printk (KERN_ERR "nfsd: cannot allocate all %d cache entries, only got %d\n",
+ CACHESIZE, CACHESIZE-i);
+
+ hash_list = kmalloc (HASHSIZE * sizeof(struct hlist_head), GFP_KERNEL);
+ if (!hash_list) {
+ nfsd_cache_shutdown();
+ printk (KERN_ERR "nfsd: cannot allocate %Zd bytes for hash list\n",
+ HASHSIZE * sizeof(struct hlist_head));
+ return;
+ }
+ memset(hash_list, 0, HASHSIZE * sizeof(struct hlist_head));
+
+ cache_disabled = 0;
+}
+
+void
+nfsd_cache_shutdown(void)
+{
+ struct svc_cacherep *rp;
+
+ while (!list_empty(&lru_head)) {
+ rp = list_entry(lru_head.next, struct svc_cacherep, c_lru);
+ if (rp->c_state == RC_DONE && rp->c_type == RC_REPLBUFF)
+ kfree(rp->c_replvec.iov_base);
+ list_del(&rp->c_lru);
+ kfree(rp);
+ }
+
+ cache_disabled = 1;
+
+ if (hash_list)
+ kfree (hash_list);
+ hash_list = NULL;
+}
+
+/*
+ * Move cache entry to end of LRU list
+ */
+static void
+lru_put_end(struct svc_cacherep *rp)
+{
+ list_del(&rp->c_lru);
+ list_add_tail(&rp->c_lru, &lru_head);
+}
+
+/*
+ * Move a cache entry from one hash list to another
+ */
+static void
+hash_refile(struct svc_cacherep *rp)
+{
+ hlist_del_init(&rp->c_hash);
+ hlist_add_head(&rp->c_hash, hash_list + REQHASH(rp->c_xid));
+}
+
+/*
+ * Try to find an entry matching the current call in the cache. When none
+ * is found, we grab the oldest unlocked entry off the LRU list.
+ * Note that no operation within the loop may sleep.
+ */
+int
+nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
+{
+ struct hlist_node *hn;
+ struct hlist_head *rh;
+ struct svc_cacherep *rp;
+ u32 xid = rqstp->rq_xid,
+ proto = rqstp->rq_prot,
+ vers = rqstp->rq_vers,
+ proc = rqstp->rq_proc;
+ unsigned long age;
+ int rtn;
+
+ rqstp->rq_cacherep = NULL;
+ if (cache_disabled || type == RC_NOCACHE) {
+ nfsdstats.rcnocache++;
+ return RC_DOIT;
+ }
+
+ spin_lock(&cache_lock);
+ rtn = RC_DOIT;
+
+ rh = &hash_list[REQHASH(xid)];
+ hlist_for_each_entry(rp, hn, rh, c_hash) {
+ if (rp->c_state != RC_UNUSED &&
+ xid == rp->c_xid && proc == rp->c_proc &&
+ proto == rp->c_prot && vers == rp->c_vers &&
+ time_before(jiffies, rp->c_timestamp + 120*HZ) &&
+ memcmp((char*)&rqstp->rq_addr, (char*)&rp->c_addr, sizeof(rp->c_addr))==0) {
+ nfsdstats.rchits++;
+ goto found_entry;
+ }
+ }
+ nfsdstats.rcmisses++;
+
+ /* This loop shouldn't take more than a few iterations normally */
+ {
+ int safe = 0;
+ list_for_each_entry(rp, &lru_head, c_lru) {
+ if (rp->c_state != RC_INPROG)
+ break;
+ if (safe++ > CACHESIZE) {
+ printk("nfsd: loop in repcache LRU list\n");
+ cache_disabled = 1;
+ goto out;
+ }
+ }
+ }
+
+ /* This should not happen */
+ if (rp == NULL) {
+ static int complaints;
+
+ printk(KERN_WARNING "nfsd: all repcache entries locked!\n");
+ if (++complaints > 5) {
+ printk(KERN_WARNING "nfsd: disabling repcache.\n");
+ cache_disabled = 1;
+ }
+ goto out;
+ }
+
+ rqstp->rq_cacherep = rp;
+ rp->c_state = RC_INPROG;
+ rp->c_xid = xid;
+ rp->c_proc = proc;
+ rp->c_addr = rqstp->rq_addr;
+ rp->c_prot = proto;
+ rp->c_vers = vers;
+ rp->c_timestamp = jiffies;
+
+ hash_refile(rp);
+
+ /* release any buffer */
+ if (rp->c_type == RC_REPLBUFF) {
+ kfree(rp->c_replvec.iov_base);
+ rp->c_replvec.iov_base = NULL;
+ }
+ rp->c_type = RC_NOCACHE;
+ out:
+ spin_unlock(&cache_lock);
+ return rtn;
+
+found_entry:
+ /* We found a matching entry which is either in progress or done. */
+ age = jiffies - rp->c_timestamp;
+ rp->c_timestamp = jiffies;
+ lru_put_end(rp);
+
+ rtn = RC_DROPIT;
+ /* Request being processed or excessive rexmits */
+ if (rp->c_state == RC_INPROG || age < RC_DELAY)
+ goto out;
+
+ /* From the hall of fame of impractical attacks:
+ * Is this a user who tries to snoop on the cache? */
+ rtn = RC_DOIT;
+ if (!rqstp->rq_secure && rp->c_secure)
+ goto out;
+
+ /* Compose RPC reply header */
+ switch (rp->c_type) {
+ case RC_NOCACHE:
+ break;
+ case RC_REPLSTAT:
+ svc_putu32(&rqstp->rq_res.head[0], rp->c_replstat);
+ rtn = RC_REPLY;
+ break;
+ case RC_REPLBUFF:
+ if (!nfsd_cache_append(rqstp, &rp->c_replvec))
+ goto out; /* should not happen */
+ rtn = RC_REPLY;
+ break;
+ default:
+ printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type);
+ rp->c_state = RC_UNUSED;
+ }
+
+ goto out;
+}
+
+/*
+ * Update a cache entry. This is called from nfsd_dispatch when
+ * the procedure has been executed and the complete reply is in
+ * rqstp->rq_res.
+ *
+ * We're copying around data here rather than swapping buffers because
+ * the toplevel loop requires max-sized buffers, which would be a waste
+ * of memory for a cache with a max reply size of 100 bytes (diropokres).
+ *
+ * If we should start to use different types of cache entries tailored
+ * specifically for attrstat and fh's, we may save even more space.
+ *
+ * Also note that a cachetype of RC_NOCACHE can legally be passed when
+ * nfsd failed to encode a reply that otherwise would have been cached.
+ * In this case, nfsd_cache_update is called with statp == NULL.
+ */
+void
+nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
+{
+ struct svc_cacherep *rp;
+ struct kvec *resv = &rqstp->rq_res.head[0], *cachv;
+ int len;
+
+ if (!(rp = rqstp->rq_cacherep) || cache_disabled)
+ return;
+
+ len = resv->iov_len - ((char*)statp - (char*)resv->iov_base);
+ len >>= 2;
+
+ /* Don't cache excessive amounts of data and XDR failures */
+ if (!statp || len > (256 >> 2)) {
+ rp->c_state = RC_UNUSED;
+ return;
+ }
+
+ switch (cachetype) {
+ case RC_REPLSTAT:
+ if (len != 1)
+ printk("nfsd: RC_REPLSTAT/reply len %d!\n",len);
+ rp->c_replstat = *statp;
+ break;
+ case RC_REPLBUFF:
+ cachv = &rp->c_replvec;
+ cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+ if (!cachv->iov_base) {
+ spin_lock(&cache_lock);
+ rp->c_state = RC_UNUSED;
+ spin_unlock(&cache_lock);
+ return;
+ }
+ cachv->iov_len = len << 2;
+ memcpy(cachv->iov_base, statp, len << 2);
+ break;
+ }
+ spin_lock(&cache_lock);
+ lru_put_end(rp);
+ rp->c_secure = rqstp->rq_secure;
+ rp->c_type = cachetype;
+ rp->c_state = RC_DONE;
+ rp->c_timestamp = jiffies;
+ spin_unlock(&cache_lock);
+ return;
+}
+
+/*
+ * Copy cached reply to current reply buffer. Should always fit.
+ * FIXME as reply is in a page, we should just attach the page, and
+ * keep a refcount....
+ */
+static int
+nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
+{
+ struct kvec *vec = &rqstp->rq_res.head[0];
+
+ if (vec->iov_len + data->iov_len > PAGE_SIZE) {
+ printk(KERN_WARNING "nfsd: cached reply too large (%Zd).\n",
+ data->iov_len);
+ return 0;
+ }
+ memcpy((char*)vec->iov_base + vec->iov_len, data->iov_base, data->iov_len);
+ vec->iov_len += data->iov_len;
+ return 1;
+}
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
new file mode 100644
index 000000000000..161afdcb8f7d
--- /dev/null
+++ b/fs/nfsd/nfsctl.c
@@ -0,0 +1,438 @@
+/*
+ * linux/fs/nfsd/nfsctl.c
+ *
+ * Syscall interface to knfsd.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/syscalls.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+
+#include <linux/nfs.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/nfsd/syscall.h>
+#include <linux/nfsd/interface.h>
+
+#include <asm/uaccess.h>
+
+/*
+ * We have a single directory with 9 nodes in it.
+ */
+enum {
+ NFSD_Root = 1,
+ NFSD_Svc,
+ NFSD_Add,
+ NFSD_Del,
+ NFSD_Export,
+ NFSD_Unexport,
+ NFSD_Getfd,
+ NFSD_Getfs,
+ NFSD_List,
+ NFSD_Fh,
+ NFSD_Threads,
+ NFSD_Leasetime,
+};
+
+/*
+ * write() for these nodes.
+ */
+static ssize_t write_svc(struct file *file, char *buf, size_t size);
+static ssize_t write_add(struct file *file, char *buf, size_t size);
+static ssize_t write_del(struct file *file, char *buf, size_t size);
+static ssize_t write_export(struct file *file, char *buf, size_t size);
+static ssize_t write_unexport(struct file *file, char *buf, size_t size);
+static ssize_t write_getfd(struct file *file, char *buf, size_t size);
+static ssize_t write_getfs(struct file *file, char *buf, size_t size);
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size);
+static ssize_t write_threads(struct file *file, char *buf, size_t size);
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size);
+
+static ssize_t (*write_op[])(struct file *, char *, size_t) = {
+ [NFSD_Svc] = write_svc,
+ [NFSD_Add] = write_add,
+ [NFSD_Del] = write_del,
+ [NFSD_Export] = write_export,
+ [NFSD_Unexport] = write_unexport,
+ [NFSD_Getfd] = write_getfd,
+ [NFSD_Getfs] = write_getfs,
+ [NFSD_Fh] = write_filehandle,
+ [NFSD_Threads] = write_threads,
+ [NFSD_Leasetime] = write_leasetime,
+};
+
+static ssize_t nfsctl_transaction_write(struct file *file, const char __user *buf, size_t size, loff_t *pos)
+{
+ ino_t ino = file->f_dentry->d_inode->i_ino;
+ char *data;
+ ssize_t rv;
+
+ if (ino >= sizeof(write_op)/sizeof(write_op[0]) || !write_op[ino])
+ return -EINVAL;
+
+ data = simple_transaction_get(file, buf, size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+
+ rv = write_op[ino](file, data, size);
+ if (rv>0) {
+ simple_transaction_set(file, rv);
+ rv = size;
+ }
+ return rv;
+}
+
+static struct file_operations transaction_ops = {
+ .write = nfsctl_transaction_write,
+ .read = simple_transaction_read,
+ .release = simple_transaction_release,
+};
+
+extern struct seq_operations nfs_exports_op;
+static int exports_open(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &nfs_exports_op);
+}
+
+static struct file_operations exports_operations = {
+ .open = exports_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+/*----------------------------------------------------------------------------*/
+/*
+ * payload - write methods
+ * If the method has a response, the response should be put in buf,
+ * and the length returned. Otherwise return 0 or and -error.
+ */
+
+static ssize_t write_svc(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_svc *data;
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_svc*) buf;
+ return nfsd_svc(data->svc_port, data->svc_nthreads);
+}
+
+static ssize_t write_add(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_client *data;
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_client *)buf;
+ return exp_addclient(data);
+}
+
+static ssize_t write_del(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_client *data;
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_client *)buf;
+ return exp_delclient(data);
+}
+
+static ssize_t write_export(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_export *data;
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_export*)buf;
+ return exp_export(data);
+}
+
+static ssize_t write_unexport(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_export *data;
+
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_export*)buf;
+ return exp_unexport(data);
+}
+
+static ssize_t write_getfs(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_fsparm *data;
+ struct sockaddr_in *sin;
+ struct auth_domain *clp;
+ int err = 0;
+ struct knfsd_fh *res;
+
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_fsparm*)buf;
+ err = -EPROTONOSUPPORT;
+ if (data->gd_addr.sa_family != AF_INET)
+ goto out;
+ sin = (struct sockaddr_in *)&data->gd_addr;
+ if (data->gd_maxlen > NFS3_FHSIZE)
+ data->gd_maxlen = NFS3_FHSIZE;
+
+ res = (struct knfsd_fh*)buf;
+
+ exp_readlock();
+ if (!(clp = auth_unix_lookup(sin->sin_addr)))
+ err = -EPERM;
+ else {
+ err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen);
+ auth_domain_put(clp);
+ }
+ exp_readunlock();
+ if (err == 0)
+ err = res->fh_size + (int)&((struct knfsd_fh*)0)->fh_base;
+ out:
+ return err;
+}
+
+static ssize_t write_getfd(struct file *file, char *buf, size_t size)
+{
+ struct nfsctl_fdparm *data;
+ struct sockaddr_in *sin;
+ struct auth_domain *clp;
+ int err = 0;
+ struct knfsd_fh fh;
+ char *res;
+
+ if (size < sizeof(*data))
+ return -EINVAL;
+ data = (struct nfsctl_fdparm*)buf;
+ err = -EPROTONOSUPPORT;
+ if (data->gd_addr.sa_family != AF_INET)
+ goto out;
+ err = -EINVAL;
+ if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS)
+ goto out;
+
+ res = buf;
+ sin = (struct sockaddr_in *)&data->gd_addr;
+ exp_readlock();
+ if (!(clp = auth_unix_lookup(sin->sin_addr)))
+ err = -EPERM;
+ else {
+ err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE);
+ auth_domain_put(clp);
+ }
+ exp_readunlock();
+
+ if (err == 0) {
+ memset(res,0, NFS_FHSIZE);
+ memcpy(res, &fh.fh_base, fh.fh_size);
+ err = NFS_FHSIZE;
+ }
+ out:
+ return err;
+}
+
+static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
+{
+ /* request is:
+ * domain path maxsize
+ * response is
+ * filehandle
+ *
+ * qword quoting is used, so filehandle will be \x....
+ */
+ char *dname, *path;
+ int maxsize;
+ char *mesg = buf;
+ int len;
+ struct auth_domain *dom;
+ struct knfsd_fh fh;
+
+ if (buf[size-1] != '\n')
+ return -EINVAL;
+ buf[size-1] = 0;
+
+ dname = mesg;
+ len = qword_get(&mesg, dname, size);
+ if (len <= 0) return -EINVAL;
+
+ path = dname+len+1;
+ len = qword_get(&mesg, path, size);
+ if (len <= 0) return -EINVAL;
+
+ len = get_int(&mesg, &maxsize);
+ if (len)
+ return len;
+
+ if (maxsize < NFS_FHSIZE)
+ return -EINVAL;
+ if (maxsize > NFS3_FHSIZE)
+ maxsize = NFS3_FHSIZE;
+
+ if (qword_get(&mesg, mesg, size)>0)
+ return -EINVAL;
+
+ /* we have all the words, they are in buf.. */
+ dom = unix_domain_find(dname);
+ if (!dom)
+ return -ENOMEM;
+
+ len = exp_rootfh(dom, path, &fh, maxsize);
+ auth_domain_put(dom);
+ if (len)
+ return len;
+
+ mesg = buf; len = SIMPLE_TRANSACTION_LIMIT;
+ qword_addhex(&mesg, &len, (char*)&fh.fh_base, fh.fh_size);
+ mesg[-1] = '\n';
+ return mesg - buf;
+}
+
+extern int nfsd_nrthreads(void);
+
+static ssize_t write_threads(struct file *file, char *buf, size_t size)
+{
+ /* if size > 0, look for a number of threads and call nfsd_svc
+ * then write out number of threads as reply
+ */
+ char *mesg = buf;
+ int rv;
+ if (size > 0) {
+ int newthreads;
+ rv = get_int(&mesg, &newthreads);
+ if (rv)
+ return rv;
+ if (newthreads <0)
+ return -EINVAL;
+ rv = nfsd_svc(2049, newthreads);
+ if (rv)
+ return rv;
+ }
+ sprintf(buf, "%d\n", nfsd_nrthreads());
+ return strlen(buf);
+}
+
+extern time_t nfs4_leasetime(void);
+
+static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
+{
+ /* if size > 10 seconds, call
+ * nfs4_reset_lease() then write out the new lease (seconds) as reply
+ */
+ char *mesg = buf;
+ int rv;
+
+ if (size > 0) {
+ int lease;
+ rv = get_int(&mesg, &lease);
+ if (rv)
+ return rv;
+ if (lease < 10 || lease > 3600)
+ return -EINVAL;
+ nfs4_reset_lease(lease);
+ }
+ sprintf(buf, "%ld\n", nfs4_lease_time());
+ return strlen(buf);
+}
+
+/*----------------------------------------------------------------------------*/
+/*
+ * populating the filesystem.
+ */
+
+static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
+{
+ static struct tree_descr nfsd_files[] = {
+ [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR},
+ [NFSD_Add] = {".add", &transaction_ops, S_IWUSR},
+ [NFSD_Del] = {".del", &transaction_ops, S_IWUSR},
+ [NFSD_Export] = {".export", &transaction_ops, S_IWUSR},
+ [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR},
+ [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+ [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR},
+ [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
+#ifdef CONFIG_NFSD_V4
+ [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR},
+#endif
+ /* last one */ {""}
+ };
+ return simple_fill_super(sb, 0x6e667364, nfsd_files);
+}
+
+static struct super_block *nfsd_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
+{
+ return get_sb_single(fs_type, flags, data, nfsd_fill_super);
+}
+
+static struct file_system_type nfsd_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfsd",
+ .get_sb = nfsd_get_sb,
+ .kill_sb = kill_litter_super,
+};
+
+static int __init init_nfsd(void)
+{
+ int retval;
+ printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
+
+ nfsd_stat_init(); /* Statistics */
+ nfsd_cache_init(); /* RPC reply cache */
+ nfsd_export_init(); /* Exports table */
+ nfsd_lockd_init(); /* lockd->nfsd callbacks */
+#ifdef CONFIG_NFSD_V4
+ nfsd_idmap_init(); /* Name to ID mapping */
+#endif /* CONFIG_NFSD_V4 */
+ if (proc_mkdir("fs/nfs", NULL)) {
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry("fs/nfs/exports", 0, NULL);
+ if (entry)
+ entry->proc_fops = &exports_operations;
+ }
+ retval = register_filesystem(&nfsd_fs_type);
+ if (retval) {
+ nfsd_export_shutdown();
+ nfsd_cache_shutdown();
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+ nfsd_stat_shutdown();
+ nfsd_lockd_shutdown();
+ }
+ return retval;
+}
+
+static void __exit exit_nfsd(void)
+{
+ nfsd_export_shutdown();
+ nfsd_cache_shutdown();
+ remove_proc_entry("fs/nfs/exports", NULL);
+ remove_proc_entry("fs/nfs", NULL);
+ nfsd_stat_shutdown();
+ nfsd_lockd_shutdown();
+#ifdef CONFIG_NFSD_V4
+ nfsd_idmap_shutdown();
+#endif /* CONFIG_NFSD_V4 */
+ unregister_filesystem(&nfsd_fs_type);
+}
+
+MODULE_AUTHOR("Olaf Kirch <okir@monad.swb.de>");
+MODULE_LICENSE("GPL");
+module_init(init_nfsd)
+module_exit(exit_nfsd)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
new file mode 100644
index 000000000000..7a3e397b4ed3
--- /dev/null
+++ b/fs/nfsd/nfsfh.c
@@ -0,0 +1,532 @@
+/*
+ * linux/fs/nfsd/nfsfh.c
+ *
+ * NFS server file handle treatment.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ * Portions Copyright (C) 1999 G. Allen Morris III <gam3@acm.org>
+ * Extensive rewrite by Neil Brown <neilb@cse.unsw.edu.au> Southern-Spring 1999
+ * ... and again Southern-Winter 2001 to support export_operations
+ */
+
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/fs.h>
+#include <linux/unistd.h>
+#include <linux/string.h>
+#include <linux/stat.h>
+#include <linux/dcache.h>
+#include <linux/mount.h>
+#include <asm/pgtable.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_FH
+#define NFSD_PARANOIA 1
+/* #define NFSD_DEBUG_VERBOSE 1 */
+
+
+static int nfsd_nr_verified;
+static int nfsd_nr_put;
+
+extern struct export_operations export_op_default;
+
+#define CALL(ops,fun) ((ops->fun)?(ops->fun):export_op_default.fun)
+
+/*
+ * our acceptability function.
+ * if NOSUBTREECHECK, accept anything
+ * if not, require that we can walk up to exp->ex_dentry
+ * doing some checks on the 'x' bits
+ */
+static int nfsd_acceptable(void *expv, struct dentry *dentry)
+{
+ struct svc_export *exp = expv;
+ int rv;
+ struct dentry *tdentry;
+ struct dentry *parent;
+
+ if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
+ return 1;
+
+ tdentry = dget(dentry);
+ while (tdentry != exp->ex_dentry && ! IS_ROOT(tdentry)) {
+ /* make sure parents give x permission to user */
+ int err;
+ parent = dget_parent(tdentry);
+ err = permission(parent->d_inode, MAY_EXEC, NULL);
+ if (err < 0) {
+ dput(parent);
+ break;
+ }
+ dput(tdentry);
+ tdentry = parent;
+ }
+ if (tdentry != exp->ex_dentry)
+ dprintk("nfsd_acceptable failed at %p %s\n", tdentry, tdentry->d_name.name);
+ rv = (tdentry == exp->ex_dentry);
+ dput(tdentry);
+ return rv;
+}
+
+/* Type check. The correct error return for type mismatches does not seem to be
+ * generally agreed upon. SunOS seems to use EISDIR if file isn't S_IFREG; a
+ * comment in the NFSv3 spec says this is incorrect (implementation notes for
+ * the write call).
+ */
+static inline int
+nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
+{
+ /* Type can be negative when creating hardlinks - not to a dir */
+ if (type > 0 && (mode & S_IFMT) != type) {
+ if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK)
+ return nfserr_symlink;
+ else if (type == S_IFDIR)
+ return nfserr_notdir;
+ else if ((mode & S_IFMT) == S_IFDIR)
+ return nfserr_isdir;
+ else
+ return nfserr_inval;
+ }
+ if (type < 0 && (mode & S_IFMT) == -type) {
+ if (rqstp->rq_vers == 4 && (mode & S_IFMT) == S_IFLNK)
+ return nfserr_symlink;
+ else if (type == -S_IFDIR)
+ return nfserr_isdir;
+ else
+ return nfserr_notdir;
+ }
+ return 0;
+}
+
+/*
+ * Perform sanity checks on the dentry in a client's file handle.
+ *
+ * Note that the file handle dentry may need to be freed even after
+ * an error return.
+ *
+ * This is only called at the start of an nfsproc call, so fhp points to
+ * a svc_fh which is all 0 except for the over-the-wire file handle.
+ */
+u32
+fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
+{
+ struct knfsd_fh *fh = &fhp->fh_handle;
+ struct svc_export *exp = NULL;
+ struct dentry *dentry;
+ u32 error = 0;
+
+ dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
+
+ /* keep this filehandle for possible reference when encoding attributes */
+ rqstp->rq_reffh = fh;
+
+ if (!fhp->fh_dentry) {
+ __u32 *datap=NULL;
+ __u32 tfh[3]; /* filehandle fragment for oldstyle filehandles */
+ int fileid_type;
+ int data_left = fh->fh_size/4;
+
+ error = nfserr_stale;
+ if (rqstp->rq_client == NULL)
+ goto out;
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+ if (rqstp->rq_vers == 4 && fh->fh_size == 0)
+ return nfserr_nofilehandle;
+
+ if (fh->fh_version == 1) {
+ int len;
+ datap = fh->fh_auth;
+ if (--data_left<0) goto out;
+ switch (fh->fh_auth_type) {
+ case 0: break;
+ default: goto out;
+ }
+ len = key_len(fh->fh_fsid_type) / 4;
+ if (len == 0) goto out;
+ if (fh->fh_fsid_type == 2) {
+ /* deprecated, convert to type 3 */
+ len = 3;
+ fh->fh_fsid_type = 3;
+ fh->fh_fsid[0] = new_encode_dev(MKDEV(ntohl(fh->fh_fsid[0]), ntohl(fh->fh_fsid[1])));
+ fh->fh_fsid[1] = fh->fh_fsid[2];
+ }
+ if ((data_left -= len)<0) goto out;
+ exp = exp_find(rqstp->rq_client, fh->fh_fsid_type, datap, &rqstp->rq_chandle);
+ datap += len;
+ } else {
+ dev_t xdev;
+ ino_t xino;
+ if (fh->fh_size != NFS_FHSIZE)
+ goto out;
+ /* assume old filehandle format */
+ xdev = old_decode_dev(fh->ofh_xdev);
+ xino = u32_to_ino_t(fh->ofh_xino);
+ mk_fsid_v0(tfh, xdev, xino);
+ exp = exp_find(rqstp->rq_client, 0, tfh, &rqstp->rq_chandle);
+ }
+
+ error = nfserr_dropit;
+ if (IS_ERR(exp) && PTR_ERR(exp) == -EAGAIN)
+ goto out;
+
+ error = nfserr_stale;
+ if (!exp || IS_ERR(exp))
+ goto out;
+
+ /* Check if the request originated from a secure port. */
+ error = nfserr_perm;
+ if (!rqstp->rq_secure && EX_SECURE(exp)) {
+ printk(KERN_WARNING
+ "nfsd: request from insecure port (%u.%u.%u.%u:%d)!\n",
+ NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
+ ntohs(rqstp->rq_addr.sin_port));
+ goto out;
+ }
+
+ /* Set user creds for this exportpoint */
+ error = nfsd_setuser(rqstp, exp);
+ if (error) {
+ error = nfserrno(error);
+ goto out;
+ }
+
+ /*
+ * Look up the dentry using the NFS file handle.
+ */
+ error = nfserr_stale;
+ if (rqstp->rq_vers > 2)
+ error = nfserr_badhandle;
+
+ if (fh->fh_version != 1) {
+ tfh[0] = fh->ofh_ino;
+ tfh[1] = fh->ofh_generation;
+ tfh[2] = fh->ofh_dirino;
+ datap = tfh;
+ data_left = 3;
+ if (fh->ofh_dirino == 0)
+ fileid_type = 1;
+ else
+ fileid_type = 2;
+ } else
+ fileid_type = fh->fh_fileid_type;
+
+ if (fileid_type == 0)
+ dentry = dget(exp->ex_dentry);
+ else {
+ struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op;
+ dentry = CALL(nop,decode_fh)(exp->ex_mnt->mnt_sb,
+ datap, data_left,
+ fileid_type,
+ nfsd_acceptable, exp);
+ }
+ if (dentry == NULL)
+ goto out;
+ if (IS_ERR(dentry)) {
+ if (PTR_ERR(dentry) != -EINVAL)
+ error = nfserrno(PTR_ERR(dentry));
+ goto out;
+ }
+#ifdef NFSD_PARANOIA
+ if (S_ISDIR(dentry->d_inode->i_mode) &&
+ (dentry->d_flags & DCACHE_DISCONNECTED)) {
+ printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ }
+#endif
+
+ fhp->fh_dentry = dentry;
+ fhp->fh_export = exp;
+ nfsd_nr_verified++;
+ } else {
+ /* just rechecking permissions
+ * (e.g. nfsproc_create calls fh_verify, then nfsd_create does as well)
+ */
+ dprintk("nfsd: fh_verify - just checking\n");
+ dentry = fhp->fh_dentry;
+ exp = fhp->fh_export;
+ }
+ cache_get(&exp->h);
+
+ error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
+ if (error)
+ goto out;
+
+ /* Finally, check access permissions. */
+ error = nfsd_permission(exp, dentry, access);
+
+#ifdef NFSD_PARANOIA_EXTREME
+ if (error) {
+ printk("fh_verify: %s/%s permission failure, acc=%x, error=%d\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name, access, (error >> 24));
+ }
+#endif
+out:
+ if (exp && !IS_ERR(exp))
+ exp_put(exp);
+ if (error == nfserr_stale)
+ nfsdstats.fh_stale++;
+ return error;
+}
+
+
+/*
+ * Compose a file handle for an NFS reply.
+ *
+ * Note that when first composed, the dentry may not yet have
+ * an inode. In this case a call to fh_update should be made
+ * before the fh goes out on the wire ...
+ */
+static inline int _fh_update(struct dentry *dentry, struct svc_export *exp,
+ __u32 *datap, int *maxsize)
+{
+ struct export_operations *nop = exp->ex_mnt->mnt_sb->s_export_op;
+
+ if (dentry == exp->ex_dentry) {
+ *maxsize = 0;
+ return 0;
+ }
+
+ return CALL(nop,encode_fh)(dentry, datap, maxsize,
+ !(exp->ex_flags&NFSEXP_NOSUBTREECHECK));
+}
+
+/*
+ * for composing old style file handles
+ */
+static inline void _fh_update_old(struct dentry *dentry,
+ struct svc_export *exp,
+ struct knfsd_fh *fh)
+{
+ fh->ofh_ino = ino_t_to_u32(dentry->d_inode->i_ino);
+ fh->ofh_generation = dentry->d_inode->i_generation;
+ if (S_ISDIR(dentry->d_inode->i_mode) ||
+ (exp->ex_flags & NFSEXP_NOSUBTREECHECK))
+ fh->ofh_dirino = 0;
+}
+
+int
+fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
+{
+ /* ref_fh is a reference file handle.
+ * if it is non-null, then we should compose a filehandle which is
+ * of the same version, where possible.
+ * Currently, that means that if ref_fh->fh_handle.fh_version == 0xca
+ * Then create a 32byte filehandle using nfs_fhbase_old
+ *
+ */
+
+ u8 ref_fh_version = 0;
+ u8 ref_fh_fsid_type = 0;
+ struct inode * inode = dentry->d_inode;
+ struct dentry *parent = dentry->d_parent;
+ __u32 *datap;
+ dev_t ex_dev = exp->ex_dentry->d_inode->i_sb->s_dev;
+
+ dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %s/%s, ino=%ld)\n",
+ MAJOR(ex_dev), MINOR(ex_dev),
+ (long) exp->ex_dentry->d_inode->i_ino,
+ parent->d_name.name, dentry->d_name.name,
+ (inode ? inode->i_ino : 0));
+
+ if (ref_fh) {
+ ref_fh_version = ref_fh->fh_handle.fh_version;
+ if (ref_fh_version == 0xca)
+ ref_fh_fsid_type = 0;
+ else
+ ref_fh_fsid_type = ref_fh->fh_handle.fh_fsid_type;
+ if (ref_fh_fsid_type > 3)
+ ref_fh_fsid_type = 0;
+
+ /* make sure ref_fh type works for given export */
+ if (ref_fh_fsid_type == 1 &&
+ !(exp->ex_flags & NFSEXP_FSID)) {
+ /* if we don't have an fsid, we cannot provide one... */
+ ref_fh_fsid_type = 0;
+ }
+ } else if (exp->ex_flags & NFSEXP_FSID)
+ ref_fh_fsid_type = 1;
+
+ if (!old_valid_dev(ex_dev) && ref_fh_fsid_type == 0) {
+ /* for newer device numbers, we must use a newer fsid format */
+ ref_fh_version = 1;
+ ref_fh_fsid_type = 3;
+ }
+ if (old_valid_dev(ex_dev) &&
+ (ref_fh_fsid_type == 2 || ref_fh_fsid_type == 3))
+ /* must use type1 for smaller device numbers */
+ ref_fh_fsid_type = 0;
+
+ if (ref_fh == fhp)
+ fh_put(ref_fh);
+
+ if (fhp->fh_locked || fhp->fh_dentry) {
+ printk(KERN_ERR "fh_compose: fh %s/%s not initialized!\n",
+ parent->d_name.name, dentry->d_name.name);
+ }
+ if (fhp->fh_maxsize < NFS_FHSIZE)
+ printk(KERN_ERR "fh_compose: called with maxsize %d! %s/%s\n",
+ fhp->fh_maxsize, parent->d_name.name, dentry->d_name.name);
+
+ fhp->fh_dentry = dget(dentry); /* our internal copy */
+ fhp->fh_export = exp;
+ cache_get(&exp->h);
+
+ if (ref_fh_version == 0xca) {
+ /* old style filehandle please */
+ memset(&fhp->fh_handle.fh_base, 0, NFS_FHSIZE);
+ fhp->fh_handle.fh_size = NFS_FHSIZE;
+ fhp->fh_handle.ofh_dcookie = 0xfeebbaca;
+ fhp->fh_handle.ofh_dev = old_encode_dev(ex_dev);
+ fhp->fh_handle.ofh_xdev = fhp->fh_handle.ofh_dev;
+ fhp->fh_handle.ofh_xino = ino_t_to_u32(exp->ex_dentry->d_inode->i_ino);
+ fhp->fh_handle.ofh_dirino = ino_t_to_u32(parent_ino(dentry));
+ if (inode)
+ _fh_update_old(dentry, exp, &fhp->fh_handle);
+ } else {
+ int len;
+ fhp->fh_handle.fh_version = 1;
+ fhp->fh_handle.fh_auth_type = 0;
+ datap = fhp->fh_handle.fh_auth+0;
+ fhp->fh_handle.fh_fsid_type = ref_fh_fsid_type;
+ switch (ref_fh_fsid_type) {
+ case 0:
+ /*
+ * fsid_type 0:
+ * 2byte major, 2byte minor, 4byte inode
+ */
+ mk_fsid_v0(datap, ex_dev,
+ exp->ex_dentry->d_inode->i_ino);
+ break;
+ case 1:
+ /* fsid_type 1 == 4 bytes filesystem id */
+ mk_fsid_v1(datap, exp->ex_fsid);
+ break;
+ case 2:
+ /*
+ * fsid_type 2:
+ * 4byte major, 4byte minor, 4byte inode
+ */
+ mk_fsid_v2(datap, ex_dev,
+ exp->ex_dentry->d_inode->i_ino);
+ break;
+ case 3:
+ /*
+ * fsid_type 3:
+ * 4byte devicenumber, 4byte inode
+ */
+ mk_fsid_v3(datap, ex_dev,
+ exp->ex_dentry->d_inode->i_ino);
+ break;
+ }
+ len = key_len(ref_fh_fsid_type);
+ datap += len/4;
+ fhp->fh_handle.fh_size = 4 + len;
+
+ if (inode) {
+ int size = (fhp->fh_maxsize-len-4)/4;
+ fhp->fh_handle.fh_fileid_type =
+ _fh_update(dentry, exp, datap, &size);
+ fhp->fh_handle.fh_size += size*4;
+ }
+ if (fhp->fh_handle.fh_fileid_type == 255)
+ return nfserr_opnotsupp;
+ }
+
+ nfsd_nr_verified++;
+ return 0;
+}
+
+/*
+ * Update file handle information after changing a dentry.
+ * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
+ */
+int
+fh_update(struct svc_fh *fhp)
+{
+ struct dentry *dentry;
+ __u32 *datap;
+
+ if (!fhp->fh_dentry)
+ goto out_bad;
+
+ dentry = fhp->fh_dentry;
+ if (!dentry->d_inode)
+ goto out_negative;
+ if (fhp->fh_handle.fh_version != 1) {
+ _fh_update_old(dentry, fhp->fh_export, &fhp->fh_handle);
+ } else {
+ int size;
+ if (fhp->fh_handle.fh_fileid_type != 0)
+ goto out_uptodate;
+ datap = fhp->fh_handle.fh_auth+
+ fhp->fh_handle.fh_size/4 -1;
+ size = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
+ fhp->fh_handle.fh_fileid_type =
+ _fh_update(dentry, fhp->fh_export, datap, &size);
+ fhp->fh_handle.fh_size += size*4;
+ if (fhp->fh_handle.fh_fileid_type == 255)
+ return nfserr_opnotsupp;
+ }
+out:
+ return 0;
+
+out_bad:
+ printk(KERN_ERR "fh_update: fh not verified!\n");
+ goto out;
+out_negative:
+ printk(KERN_ERR "fh_update: %s/%s still negative!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ goto out;
+out_uptodate:
+ printk(KERN_ERR "fh_update: %s/%s already up-to-date!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ goto out;
+}
+
+/*
+ * Release a file handle.
+ */
+void
+fh_put(struct svc_fh *fhp)
+{
+ struct dentry * dentry = fhp->fh_dentry;
+ struct svc_export * exp = fhp->fh_export;
+ if (dentry) {
+ fh_unlock(fhp);
+ fhp->fh_dentry = NULL;
+ dput(dentry);
+#ifdef CONFIG_NFSD_V3
+ fhp->fh_pre_saved = 0;
+ fhp->fh_post_saved = 0;
+#endif
+ nfsd_nr_put++;
+ }
+ if (exp) {
+ svc_export_put(&exp->h, &svc_export_cache);
+ fhp->fh_export = NULL;
+ }
+ return;
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+char * SVCFH_fmt(struct svc_fh *fhp)
+{
+ struct knfsd_fh *fh = &fhp->fh_handle;
+
+ static char buf[80];
+ sprintf(buf, "%d: %08x %08x %08x %08x %08x %08x",
+ fh->fh_size,
+ fh->fh_base.fh_pad[0],
+ fh->fh_base.fh_pad[1],
+ fh->fh_base.fh_pad[2],
+ fh->fh_base.fh_pad[3],
+ fh->fh_base.fh_pad[4],
+ fh->fh_base.fh_pad[5]);
+ return buf;
+}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
new file mode 100644
index 000000000000..757f9d208034
--- /dev/null
+++ b/fs/nfsd/nfsproc.c
@@ -0,0 +1,605 @@
+/*
+ * nfsproc2.c Process version 2 NFS requests.
+ * linux/fs/nfsd/nfs2proc.c
+ *
+ * Process version 2 NFS requests.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/linkage.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/in.h>
+#include <linux/namei.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/cache.h>
+#include <linux/nfsd/xdr.h>
+
+typedef struct svc_rqst svc_rqst;
+typedef struct svc_buf svc_buf;
+
+#define NFSDDBG_FACILITY NFSDDBG_PROC
+
+
+static int
+nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
+{
+ return nfs_ok;
+}
+
+/*
+ * Get a file's attributes
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
+ struct nfsd_attrstat *resp)
+{
+ dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
+
+ fh_copy(&resp->fh, &argp->fh);
+ return fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+}
+
+/*
+ * Set a file's attributes
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
+ struct nfsd_attrstat *resp)
+{
+ dprintk("nfsd: SETATTR %s, valid=%x, size=%ld\n",
+ SVCFH_fmt(&argp->fh),
+ argp->attrs.ia_valid, (long) argp->attrs.ia_size);
+
+ fh_copy(&resp->fh, &argp->fh);
+ return nfsd_setattr(rqstp, &resp->fh, &argp->attrs,0, (time_t)0);
+}
+
+/*
+ * Look up a path name component
+ * Note: the dentry in the resp->fh may be negative if the file
+ * doesn't exist yet.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+ struct nfsd_diropres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: LOOKUP %s %.*s\n",
+ SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+ fh_init(&resp->fh, NFS_FHSIZE);
+ nfserr = nfsd_lookup(rqstp, &argp->fh, argp->name, argp->len,
+ &resp->fh);
+
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * Read a symlink.
+ */
+static int
+nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
+ struct nfsd_readlinkres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: READLINK %s\n", SVCFH_fmt(&argp->fh));
+
+ /* Read the symlink. */
+ resp->len = NFS_MAXPATHLEN;
+ nfserr = nfsd_readlink(rqstp, &argp->fh, argp->buffer, &resp->len);
+
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * Read a portion of a file.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
+ struct nfsd_readres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: READ %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, argp->offset);
+
+ /* Obtain buffer pointer for payload. 19 is 1 word for
+ * status, 17 words for fattr, and 1 word for the byte count.
+ */
+
+ if (NFSSVC_MAXBLKSIZE < argp->count) {
+ printk(KERN_NOTICE
+ "oversized read request from %u.%u.%u.%u:%d (%d bytes)\n",
+ NIPQUAD(rqstp->rq_addr.sin_addr.s_addr),
+ ntohs(rqstp->rq_addr.sin_port),
+ argp->count);
+ argp->count = NFSSVC_MAXBLKSIZE;
+ }
+ svc_reserve(rqstp, (19<<2) + argp->count + 4);
+
+ resp->count = argp->count;
+ nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
+ argp->offset,
+ argp->vec, argp->vlen,
+ &resp->count);
+
+ return nfserr;
+}
+
+/*
+ * Write data to a file
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
+ struct nfsd_attrstat *resp)
+{
+ int nfserr;
+ int stable = 1;
+
+ dprintk("nfsd: WRITE %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->len, argp->offset);
+
+ nfserr = nfsd_write(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
+ argp->offset,
+ argp->vec, argp->vlen,
+ argp->len,
+ &stable);
+ return nfserr;
+}
+
+/*
+ * CREATE processing is complicated. The keyword here is `overloaded.'
+ * The parent directory is kept locked between the check for existence
+ * and the actual create() call in compliance with VFS protocols.
+ * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
+ */
+static int
+nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
+ struct nfsd_diropres *resp)
+{
+ svc_fh *dirfhp = &argp->fh;
+ svc_fh *newfhp = &resp->fh;
+ struct iattr *attr = &argp->attrs;
+ struct inode *inode;
+ struct dentry *dchild;
+ int nfserr, type, mode;
+ dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
+
+ dprintk("nfsd: CREATE %s %.*s\n",
+ SVCFH_fmt(dirfhp), argp->len, argp->name);
+
+ /* First verify the parent file handle */
+ nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC);
+ if (nfserr)
+ goto done; /* must fh_put dirfhp even on error */
+
+ /* Check for MAY_WRITE in nfsd_create if necessary */
+
+ nfserr = nfserr_acces;
+ if (!argp->len)
+ goto done;
+ nfserr = nfserr_exist;
+ if (isdotent(argp->name, argp->len))
+ goto done;
+ fh_lock(dirfhp);
+ dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
+ if (IS_ERR(dchild)) {
+ nfserr = nfserrno(PTR_ERR(dchild));
+ goto out_unlock;
+ }
+ fh_init(newfhp, NFS_FHSIZE);
+ nfserr = fh_compose(newfhp, dirfhp->fh_export, dchild, dirfhp);
+ if (!nfserr && !dchild->d_inode)
+ nfserr = nfserr_noent;
+ dput(dchild);
+ if (nfserr) {
+ if (nfserr != nfserr_noent)
+ goto out_unlock;
+ /*
+ * If the new file handle wasn't verified, we can't tell
+ * whether the file exists or not. Time to bail ...
+ */
+ nfserr = nfserr_acces;
+ if (!newfhp->fh_dentry) {
+ printk(KERN_WARNING
+ "nfsd_proc_create: file handle not verified\n");
+ goto out_unlock;
+ }
+ }
+
+ inode = newfhp->fh_dentry->d_inode;
+
+ /* Unfudge the mode bits */
+ if (attr->ia_valid & ATTR_MODE) {
+ type = attr->ia_mode & S_IFMT;
+ mode = attr->ia_mode & ~S_IFMT;
+ if (!type) {
+ /* no type, so if target exists, assume same as that,
+ * else assume a file */
+ if (inode) {
+ type = inode->i_mode & S_IFMT;
+ switch(type) {
+ case S_IFCHR:
+ case S_IFBLK:
+ /* reserve rdev for later checking */
+ rdev = inode->i_rdev;
+ attr->ia_valid |= ATTR_SIZE;
+
+ /* FALLTHROUGH */
+ case S_IFIFO:
+ /* this is probably a permission check..
+ * at least IRIX implements perm checking on
+ * echo thing > device-special-file-or-pipe
+ * by doing a CREATE with type==0
+ */
+ nfserr = nfsd_permission(newfhp->fh_export,
+ newfhp->fh_dentry,
+ MAY_WRITE|MAY_LOCAL_ACCESS);
+ if (nfserr && nfserr != nfserr_rofs)
+ goto out_unlock;
+ }
+ } else
+ type = S_IFREG;
+ }
+ } else if (inode) {
+ type = inode->i_mode & S_IFMT;
+ mode = inode->i_mode & ~S_IFMT;
+ } else {
+ type = S_IFREG;
+ mode = 0; /* ??? */
+ }
+
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode = mode;
+
+ /* Special treatment for non-regular files according to the
+ * gospel of sun micro
+ */
+ if (type != S_IFREG) {
+ int is_borc = 0;
+ if (type != S_IFBLK && type != S_IFCHR) {
+ rdev = 0;
+ } else if (type == S_IFCHR && !(attr->ia_valid & ATTR_SIZE)) {
+ /* If you think you've seen the worst, grok this. */
+ type = S_IFIFO;
+ } else {
+ /* Okay, char or block special */
+ is_borc = 1;
+ if (!rdev)
+ rdev = wanted;
+ }
+
+ /* we've used the SIZE information, so discard it */
+ attr->ia_valid &= ~ATTR_SIZE;
+
+ /* Make sure the type and device matches */
+ nfserr = nfserr_exist;
+ if (inode && type != (inode->i_mode & S_IFMT))
+ goto out_unlock;
+ }
+
+ nfserr = 0;
+ if (!inode) {
+ /* File doesn't exist. Create it and set attrs */
+ nfserr = nfsd_create(rqstp, dirfhp, argp->name, argp->len,
+ attr, type, rdev, newfhp);
+ } else if (type == S_IFREG) {
+ dprintk("nfsd: existing %s, valid=%x, size=%ld\n",
+ argp->name, attr->ia_valid, (long) attr->ia_size);
+ /* File already exists. We ignore all attributes except
+ * size, so that creat() behaves exactly like
+ * open(..., O_CREAT|O_TRUNC|O_WRONLY).
+ */
+ attr->ia_valid &= ATTR_SIZE;
+ if (attr->ia_valid)
+ nfserr = nfsd_setattr(rqstp, newfhp, attr, 0, (time_t)0);
+ }
+
+out_unlock:
+ /* We don't really need to unlock, as fh_put does it. */
+ fh_unlock(dirfhp);
+
+done:
+ fh_put(dirfhp);
+ return nfserr;
+}
+
+static int
+nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+ void *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: REMOVE %s %.*s\n", SVCFH_fmt(&argp->fh),
+ argp->len, argp->name);
+
+ /* Unlink. -SIFDIR means file must not be a directory */
+ nfserr = nfsd_unlink(rqstp, &argp->fh, -S_IFDIR, argp->name, argp->len);
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+static int
+nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
+ void *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: RENAME %s %.*s -> \n",
+ SVCFH_fmt(&argp->ffh), argp->flen, argp->fname);
+ dprintk("nfsd: -> %s %.*s\n",
+ SVCFH_fmt(&argp->tfh), argp->tlen, argp->tname);
+
+ nfserr = nfsd_rename(rqstp, &argp->ffh, argp->fname, argp->flen,
+ &argp->tfh, argp->tname, argp->tlen);
+ fh_put(&argp->ffh);
+ fh_put(&argp->tfh);
+ return nfserr;
+}
+
+static int
+nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
+ void *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: LINK %s ->\n",
+ SVCFH_fmt(&argp->ffh));
+ dprintk("nfsd: %s %.*s\n",
+ SVCFH_fmt(&argp->tfh),
+ argp->tlen,
+ argp->tname);
+
+ nfserr = nfsd_link(rqstp, &argp->tfh, argp->tname, argp->tlen,
+ &argp->ffh);
+ fh_put(&argp->ffh);
+ fh_put(&argp->tfh);
+ return nfserr;
+}
+
+static int
+nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
+ void *resp)
+{
+ struct svc_fh newfh;
+ int nfserr;
+
+ dprintk("nfsd: SYMLINK %s %.*s -> %.*s\n",
+ SVCFH_fmt(&argp->ffh), argp->flen, argp->fname,
+ argp->tlen, argp->tname);
+
+ fh_init(&newfh, NFS_FHSIZE);
+ /*
+ * Create the link, look up new file and set attrs.
+ */
+ nfserr = nfsd_symlink(rqstp, &argp->ffh, argp->fname, argp->flen,
+ argp->tname, argp->tlen,
+ &newfh, &argp->attrs);
+
+
+ fh_put(&argp->ffh);
+ fh_put(&newfh);
+ return nfserr;
+}
+
+/*
+ * Make directory. This operation is not idempotent.
+ * N.B. After this call resp->fh needs an fh_put
+ */
+static int
+nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
+ struct nfsd_diropres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: MKDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+ if (resp->fh.fh_dentry) {
+ printk(KERN_WARNING
+ "nfsd_proc_mkdir: response already verified??\n");
+ }
+
+ argp->attrs.ia_valid &= ~ATTR_SIZE;
+ fh_init(&resp->fh, NFS_FHSIZE);
+ nfserr = nfsd_create(rqstp, &argp->fh, argp->name, argp->len,
+ &argp->attrs, S_IFDIR, 0, &resp->fh);
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * Remove a directory
+ */
+static int
+nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
+ void *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: RMDIR %s %.*s\n", SVCFH_fmt(&argp->fh), argp->len, argp->name);
+
+ nfserr = nfsd_unlink(rqstp, &argp->fh, S_IFDIR, argp->name, argp->len);
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * Read a portion of a directory.
+ */
+static int
+nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
+ struct nfsd_readdirres *resp)
+{
+ int nfserr, count;
+ loff_t offset;
+
+ dprintk("nfsd: READDIR %s %d bytes at %d\n",
+ SVCFH_fmt(&argp->fh),
+ argp->count, argp->cookie);
+
+ /* Shrink to the client read size */
+ count = (argp->count >> 2) - 2;
+
+ /* Make sure we've room for the NULL ptr & eof flag */
+ count -= 2;
+ if (count < 0)
+ count = 0;
+
+ resp->buffer = argp->buffer;
+ resp->offset = NULL;
+ resp->buflen = count;
+ resp->common.err = nfs_ok;
+ /* Read directory and encode entries on the fly */
+ offset = argp->cookie;
+ nfserr = nfsd_readdir(rqstp, &argp->fh, &offset,
+ &resp->common, nfssvc_encode_entry);
+
+ resp->count = resp->buffer - argp->buffer;
+ if (resp->offset)
+ *resp->offset = htonl(offset);
+
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * Get file system info
+ */
+static int
+nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
+ struct nfsd_statfsres *resp)
+{
+ int nfserr;
+
+ dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh));
+
+ nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats);
+ fh_put(&argp->fh);
+ return nfserr;
+}
+
+/*
+ * NFSv2 Server procedures.
+ * Only the results of non-idempotent operations are cached.
+ */
+#define nfsd_proc_none NULL
+#define nfssvc_release_none NULL
+struct nfsd_void { int dummy; };
+
+#define PROC(name, argt, rest, relt, cache, respsize) \
+ { (svc_procfunc) nfsd_proc_##name, \
+ (kxdrproc_t) nfssvc_decode_##argt, \
+ (kxdrproc_t) nfssvc_encode_##rest, \
+ (kxdrproc_t) nfssvc_release_##relt, \
+ sizeof(struct nfsd_##argt), \
+ sizeof(struct nfsd_##rest), \
+ 0, \
+ cache, \
+ respsize, \
+ }
+
+#define ST 1 /* status */
+#define FH 8 /* filehandle */
+#define AT 18 /* attributes */
+
+static struct svc_procedure nfsd_procedures2[18] = {
+ PROC(null, void, void, none, RC_NOCACHE, ST),
+ PROC(getattr, fhandle, attrstat, fhandle, RC_NOCACHE, ST+AT),
+ PROC(setattr, sattrargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
+ PROC(none, void, void, none, RC_NOCACHE, ST),
+ PROC(lookup, diropargs, diropres, fhandle, RC_NOCACHE, ST+FH+AT),
+ PROC(readlink, readlinkargs, readlinkres, none, RC_NOCACHE, ST+1+NFS_MAXPATHLEN/4),
+ PROC(read, readargs, readres, fhandle, RC_NOCACHE, ST+AT+1+NFSSVC_MAXBLKSIZE),
+ PROC(none, void, void, none, RC_NOCACHE, ST),
+ PROC(write, writeargs, attrstat, fhandle, RC_REPLBUFF, ST+AT),
+ PROC(create, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
+ PROC(remove, diropargs, void, none, RC_REPLSTAT, ST),
+ PROC(rename, renameargs, void, none, RC_REPLSTAT, ST),
+ PROC(link, linkargs, void, none, RC_REPLSTAT, ST),
+ PROC(symlink, symlinkargs, void, none, RC_REPLSTAT, ST),
+ PROC(mkdir, createargs, diropres, fhandle, RC_REPLBUFF, ST+FH+AT),
+ PROC(rmdir, diropargs, void, none, RC_REPLSTAT, ST),
+ PROC(readdir, readdirargs, readdirres, none, RC_NOCACHE, 0),
+ PROC(statfs, fhandle, statfsres, none, RC_NOCACHE, ST+5),
+};
+
+
+struct svc_version nfsd_version2 = {
+ .vs_vers = 2,
+ .vs_nproc = 18,
+ .vs_proc = nfsd_procedures2,
+ .vs_dispatch = nfsd_dispatch,
+ .vs_xdrsize = NFS2_SVC_XDRSIZE,
+};
+
+/*
+ * Map errnos to NFS errnos.
+ */
+int
+nfserrno (int errno)
+{
+ static struct {
+ int nfserr;
+ int syserr;
+ } nfs_errtbl[] = {
+ { nfs_ok, 0 },
+ { nfserr_perm, -EPERM },
+ { nfserr_noent, -ENOENT },
+ { nfserr_io, -EIO },
+ { nfserr_nxio, -ENXIO },
+ { nfserr_acces, -EACCES },
+ { nfserr_exist, -EEXIST },
+ { nfserr_xdev, -EXDEV },
+ { nfserr_mlink, -EMLINK },
+ { nfserr_nodev, -ENODEV },
+ { nfserr_notdir, -ENOTDIR },
+ { nfserr_isdir, -EISDIR },
+ { nfserr_inval, -EINVAL },
+ { nfserr_fbig, -EFBIG },
+ { nfserr_nospc, -ENOSPC },
+ { nfserr_rofs, -EROFS },
+ { nfserr_mlink, -EMLINK },
+ { nfserr_nametoolong, -ENAMETOOLONG },
+ { nfserr_notempty, -ENOTEMPTY },
+#ifdef EDQUOT
+ { nfserr_dquot, -EDQUOT },
+#endif
+ { nfserr_stale, -ESTALE },
+ { nfserr_jukebox, -ETIMEDOUT },
+ { nfserr_dropit, -EAGAIN },
+ { nfserr_dropit, -ENOMEM },
+ { nfserr_badname, -ESRCH },
+ { nfserr_io, -ETXTBSY },
+ { -1, -EIO }
+ };
+ int i;
+
+ for (i = 0; nfs_errtbl[i].nfserr != -1; i++) {
+ if (nfs_errtbl[i].syserr == errno)
+ return nfs_errtbl[i].nfserr;
+ }
+ printk (KERN_INFO "nfsd: non-standard errno: %d\n", errno);
+ return nfserr_io;
+}
+
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
new file mode 100644
index 000000000000..39551657e656
--- /dev/null
+++ b/fs/nfsd/nfssvc.c
@@ -0,0 +1,385 @@
+/*
+ * linux/fs/nfsd/nfssvc.c
+ *
+ * Central processing for nfsd.
+ *
+ * Authors: Olaf Kirch (okir@monad.swb.de)
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/nfs.h>
+#include <linux/in.h>
+#include <linux/uio.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/fs_struct.h>
+
+#include <linux/sunrpc/types.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svcsock.h>
+#include <linux/sunrpc/cache.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/stats.h>
+#include <linux/nfsd/cache.h>
+#include <linux/lockd/bind.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_SVC
+
+/* these signals will be delivered to an nfsd thread
+ * when handling a request
+ */
+#define ALLOWED_SIGS (sigmask(SIGKILL))
+/* these signals will be delivered to an nfsd thread
+ * when not handling a request. i.e. when waiting
+ */
+#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
+/* if the last thread dies with SIGHUP, then the exports table is
+ * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
+ * the exports table (like 2.2).
+ */
+#define SIG_NOCLEAN SIGHUP
+
+extern struct svc_program nfsd_program;
+static void nfsd(struct svc_rqst *rqstp);
+struct timeval nfssvc_boot;
+static struct svc_serv *nfsd_serv;
+static atomic_t nfsd_busy;
+static unsigned long nfsd_last_call;
+static DEFINE_SPINLOCK(nfsd_call_lock);
+
+struct nfsd_list {
+ struct list_head list;
+ struct task_struct *task;
+};
+static struct list_head nfsd_list = LIST_HEAD_INIT(nfsd_list);
+
+/*
+ * Maximum number of nfsd processes
+ */
+#define NFSD_MAXSERVS 8192
+
+int nfsd_nrthreads(void)
+{
+ if (nfsd_serv == NULL)
+ return 0;
+ else
+ return nfsd_serv->sv_nrthreads;
+}
+
+int
+nfsd_svc(unsigned short port, int nrservs)
+{
+ int error;
+ int none_left;
+ struct list_head *victim;
+
+ lock_kernel();
+ dprintk("nfsd: creating service\n");
+ error = -EINVAL;
+ if (nrservs <= 0)
+ nrservs = 0;
+ if (nrservs > NFSD_MAXSERVS)
+ nrservs = NFSD_MAXSERVS;
+
+ /* Readahead param cache - will no-op if it already exists */
+ error = nfsd_racache_init(2*nrservs);
+ if (error<0)
+ goto out;
+ error = nfs4_state_init();
+ if (error<0)
+ goto out;
+ if (!nfsd_serv) {
+ atomic_set(&nfsd_busy, 0);
+ error = -ENOMEM;
+ nfsd_serv = svc_create(&nfsd_program, NFSD_BUFSIZE);
+ if (nfsd_serv == NULL)
+ goto out;
+ error = svc_makesock(nfsd_serv, IPPROTO_UDP, port);
+ if (error < 0)
+ goto failure;
+
+#ifdef CONFIG_NFSD_TCP
+ error = svc_makesock(nfsd_serv, IPPROTO_TCP, port);
+ if (error < 0)
+ goto failure;
+#endif
+ do_gettimeofday(&nfssvc_boot); /* record boot time */
+ } else
+ nfsd_serv->sv_nrthreads++;
+ nrservs -= (nfsd_serv->sv_nrthreads-1);
+ while (nrservs > 0) {
+ nrservs--;
+ __module_get(THIS_MODULE);
+ error = svc_create_thread(nfsd, nfsd_serv);
+ if (error < 0) {
+ module_put(THIS_MODULE);
+ break;
+ }
+ }
+ victim = nfsd_list.next;
+ while (nrservs < 0 && victim != &nfsd_list) {
+ struct nfsd_list *nl =
+ list_entry(victim,struct nfsd_list, list);
+ victim = victim->next;
+ send_sig(SIG_NOCLEAN, nl->task, 1);
+ nrservs++;
+ }
+ failure:
+ none_left = (nfsd_serv->sv_nrthreads == 1);
+ svc_destroy(nfsd_serv); /* Release server */
+ if (none_left) {
+ nfsd_serv = NULL;
+ nfsd_racache_shutdown();
+ nfs4_state_shutdown();
+ }
+ out:
+ unlock_kernel();
+ return error;
+}
+
+static inline void
+update_thread_usage(int busy_threads)
+{
+ unsigned long prev_call;
+ unsigned long diff;
+ int decile;
+
+ spin_lock(&nfsd_call_lock);
+ prev_call = nfsd_last_call;
+ nfsd_last_call = jiffies;
+ decile = busy_threads*10/nfsdstats.th_cnt;
+ if (decile>0 && decile <= 10) {
+ diff = nfsd_last_call - prev_call;
+ if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP)
+ nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP;
+ if (decile == 10)
+ nfsdstats.th_fullcnt++;
+ }
+ spin_unlock(&nfsd_call_lock);
+}
+
+/*
+ * This is the NFS server kernel thread
+ */
+static void
+nfsd(struct svc_rqst *rqstp)
+{
+ struct svc_serv *serv = rqstp->rq_server;
+ struct fs_struct *fsp;
+ int err;
+ struct nfsd_list me;
+ sigset_t shutdown_mask, allowed_mask;
+
+ /* Lock module and set up kernel thread */
+ lock_kernel();
+ daemonize("nfsd");
+
+ /* After daemonize() this kernel thread shares current->fs
+ * with the init process. We need to create files with a
+ * umask of 0 instead of init's umask. */
+ fsp = copy_fs_struct(current->fs);
+ if (!fsp) {
+ printk("Unable to start nfsd thread: out of memory\n");
+ goto out;
+ }
+ exit_fs(current);
+ current->fs = fsp;
+ current->fs->umask = 0;
+
+ siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS);
+ siginitsetinv(&allowed_mask, ALLOWED_SIGS);
+
+ nfsdstats.th_cnt++;
+
+ lockd_up(); /* start lockd */
+
+ me.task = current;
+ list_add(&me.list, &nfsd_list);
+
+ unlock_kernel();
+
+ /*
+ * We want less throttling in balance_dirty_pages() so that nfs to
+ * localhost doesn't cause nfsd to lock up due to all the client's
+ * dirty pages.
+ */
+ current->flags |= PF_LESS_THROTTLE;
+
+ /*
+ * The main request loop
+ */
+ for (;;) {
+ /* Block all but the shutdown signals */
+ sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
+
+ /*
+ * Find a socket with data available and call its
+ * recvfrom routine.
+ */
+ while ((err = svc_recv(serv, rqstp,
+ 60*60*HZ)) == -EAGAIN)
+ ;
+ if (err < 0)
+ break;
+ update_thread_usage(atomic_read(&nfsd_busy));
+ atomic_inc(&nfsd_busy);
+
+ /* Lock the export hash tables for reading. */
+ exp_readlock();
+
+ /* Process request with signals blocked. */
+ sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
+
+ svc_process(serv, rqstp);
+
+ /* Unlock export hash tables */
+ exp_readunlock();
+ update_thread_usage(atomic_read(&nfsd_busy));
+ atomic_dec(&nfsd_busy);
+ }
+
+ if (err != -EINTR) {
+ printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
+ } else {
+ unsigned int signo;
+
+ for (signo = 1; signo <= _NSIG; signo++)
+ if (sigismember(&current->pending.signal, signo) &&
+ !sigismember(&current->blocked, signo))
+ break;
+ err = signo;
+ }
+
+ lock_kernel();
+
+ /* Release lockd */
+ lockd_down();
+
+ /* Check if this is last thread */
+ if (serv->sv_nrthreads==1) {
+
+ printk(KERN_WARNING "nfsd: last server has exited\n");
+ if (err != SIG_NOCLEAN) {
+ printk(KERN_WARNING "nfsd: unexporting all filesystems\n");
+ nfsd_export_flush();
+ }
+ nfsd_serv = NULL;
+ nfsd_racache_shutdown(); /* release read-ahead cache */
+ nfs4_state_shutdown();
+ }
+ list_del(&me.list);
+ nfsdstats.th_cnt --;
+
+out:
+ /* Release the thread */
+ svc_exit_thread(rqstp);
+
+ /* Release module */
+ module_put_and_exit(0);
+}
+
+int
+nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
+{
+ struct svc_procedure *proc;
+ kxdrproc_t xdr;
+ u32 nfserr;
+ u32 *nfserrp;
+
+ dprintk("nfsd_dispatch: vers %d proc %d\n",
+ rqstp->rq_vers, rqstp->rq_proc);
+ proc = rqstp->rq_procinfo;
+
+ /* Check whether we have this call in the cache. */
+ switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) {
+ case RC_INTR:
+ case RC_DROPIT:
+ return 0;
+ case RC_REPLY:
+ return 1;
+ case RC_DOIT:;
+ /* do it */
+ }
+
+ /* Decode arguments */
+ xdr = proc->pc_decode;
+ if (xdr && !xdr(rqstp, (u32*)rqstp->rq_arg.head[0].iov_base,
+ rqstp->rq_argp)) {
+ dprintk("nfsd: failed to decode arguments!\n");
+ nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ *statp = rpc_garbage_args;
+ return 1;
+ }
+
+ /* need to grab the location to store the status, as
+ * nfsv4 does some encoding while processing
+ */
+ nfserrp = rqstp->rq_res.head[0].iov_base
+ + rqstp->rq_res.head[0].iov_len;
+ rqstp->rq_res.head[0].iov_len += sizeof(u32);
+
+ /* Now call the procedure handler, and encode NFS status. */
+ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
+ if (nfserr == nfserr_jukebox && rqstp->rq_vers == 2)
+ nfserr = nfserr_dropit;
+ if (nfserr == nfserr_dropit) {
+ dprintk("nfsd: Dropping request due to malloc failure!\n");
+ nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ return 0;
+ }
+
+ if (rqstp->rq_proc != 0)
+ *nfserrp++ = nfserr;
+
+ /* Encode result.
+ * For NFSv2, additional info is never returned in case of an error.
+ */
+ if (!(nfserr && rqstp->rq_vers == 2)) {
+ xdr = proc->pc_encode;
+ if (xdr && !xdr(rqstp, nfserrp,
+ rqstp->rq_resp)) {
+ /* Failed to encode result. Release cache entry */
+ dprintk("nfsd: failed to encode result!\n");
+ nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
+ *statp = rpc_system_err;
+ return 1;
+ }
+ }
+
+ /* Store reply in cache. */
+ nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
+ return 1;
+}
+
+extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4;
+
+static struct svc_version * nfsd_version[] = {
+ [2] = &nfsd_version2,
+#if defined(CONFIG_NFSD_V3)
+ [3] = &nfsd_version3,
+#endif
+#if defined(CONFIG_NFSD_V4)
+ [4] = &nfsd_version4,
+#endif
+};
+
+#define NFSD_NRVERS (sizeof(nfsd_version)/sizeof(nfsd_version[0]))
+struct svc_program nfsd_program = {
+ .pg_prog = NFS_PROGRAM, /* program number */
+ .pg_nvers = NFSD_NRVERS, /* nr of entries in nfsd_version */
+ .pg_vers = nfsd_version, /* version table */
+ .pg_name = "nfsd", /* program name */
+ .pg_class = "nfsd", /* authentication class */
+ .pg_stats = &nfsd_svcstats, /* version table */
+ .pg_authenticate = &svc_set_client, /* export authentication */
+
+};
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
new file mode 100644
index 000000000000..948b08287c99
--- /dev/null
+++ b/fs/nfsd/nfsxdr.c
@@ -0,0 +1,511 @@
+/*
+ * linux/fs/nfsd/xdr.c
+ *
+ * XDR support for nfsd
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/types.h>
+#include <linux/time.h>
+#include <linux/nfs.h>
+#include <linux/vfs.h>
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/xdr.h>
+#include <linux/mm.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_XDR
+
+
+#ifdef NFSD_OPTIMIZE_SPACE
+# define inline
+#endif
+
+/*
+ * Mapping of S_IF* types to NFS file types
+ */
+static u32 nfs_ftypes[] = {
+ NFNON, NFCHR, NFCHR, NFBAD,
+ NFDIR, NFBAD, NFBLK, NFBAD,
+ NFREG, NFBAD, NFLNK, NFBAD,
+ NFSOCK, NFBAD, NFLNK, NFBAD,
+};
+
+
+/*
+ * XDR functions for basic NFS types
+ */
+static inline u32 *
+decode_fh(u32 *p, struct svc_fh *fhp)
+{
+ fh_init(fhp, NFS_FHSIZE);
+ memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
+ fhp->fh_handle.fh_size = NFS_FHSIZE;
+
+ /* FIXME: Look up export pointer here and verify
+ * Sun Secure RPC if requested */
+ return p + (NFS_FHSIZE >> 2);
+}
+
+static inline u32 *
+encode_fh(u32 *p, struct svc_fh *fhp)
+{
+ memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
+ return p + (NFS_FHSIZE>> 2);
+}
+
+/*
+ * Decode a file name and make sure that the path contains
+ * no slashes or null bytes.
+ */
+static inline u32 *
+decode_filename(u32 *p, char **namp, int *lenp)
+{
+ char *name;
+ int i;
+
+ if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXNAMLEN)) != NULL) {
+ for (i = 0, name = *namp; i < *lenp; i++, name++) {
+ if (*name == '\0' || *name == '/')
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static inline u32 *
+decode_pathname(u32 *p, char **namp, int *lenp)
+{
+ char *name;
+ int i;
+
+ if ((p = xdr_decode_string_inplace(p, namp, lenp, NFS_MAXPATHLEN)) != NULL) {
+ for (i = 0, name = *namp; i < *lenp; i++, name++) {
+ if (*name == '\0')
+ return NULL;
+ }
+ }
+
+ return p;
+}
+
+static inline u32 *
+decode_sattr(u32 *p, struct iattr *iap)
+{
+ u32 tmp, tmp1;
+
+ iap->ia_valid = 0;
+
+ /* Sun client bug compatibility check: some sun clients seem to
+ * put 0xffff in the mode field when they mean 0xffffffff.
+ * Quoting the 4.4BSD nfs server code: Nah nah nah nah na nah.
+ */
+ if ((tmp = ntohl(*p++)) != (u32)-1 && tmp != 0xffff) {
+ iap->ia_valid |= ATTR_MODE;
+ iap->ia_mode = tmp;
+ }
+ if ((tmp = ntohl(*p++)) != (u32)-1) {
+ iap->ia_valid |= ATTR_UID;
+ iap->ia_uid = tmp;
+ }
+ if ((tmp = ntohl(*p++)) != (u32)-1) {
+ iap->ia_valid |= ATTR_GID;
+ iap->ia_gid = tmp;
+ }
+ if ((tmp = ntohl(*p++)) != (u32)-1) {
+ iap->ia_valid |= ATTR_SIZE;
+ iap->ia_size = tmp;
+ }
+ tmp = ntohl(*p++); tmp1 = ntohl(*p++);
+ if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+ iap->ia_valid |= ATTR_ATIME | ATTR_ATIME_SET;
+ iap->ia_atime.tv_sec = tmp;
+ iap->ia_atime.tv_nsec = tmp1 * 1000;
+ }
+ tmp = ntohl(*p++); tmp1 = ntohl(*p++);
+ if (tmp != (u32)-1 && tmp1 != (u32)-1) {
+ iap->ia_valid |= ATTR_MTIME | ATTR_MTIME_SET;
+ iap->ia_mtime.tv_sec = tmp;
+ iap->ia_mtime.tv_nsec = tmp1 * 1000;
+ /*
+ * Passing the invalid value useconds=1000000 for mtime
+ * is a Sun convention for "set both mtime and atime to
+ * current server time". It's needed to make permissions
+ * checks for the "touch" program across v2 mounts to
+ * Solaris and Irix boxes work correctly. See description of
+ * sattr in section 6.1 of "NFS Illustrated" by
+ * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5
+ */
+ if (tmp1 == 1000000)
+ iap->ia_valid &= ~(ATTR_ATIME_SET|ATTR_MTIME_SET);
+ }
+ return p;
+}
+
+static inline u32 *
+encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+{
+ struct vfsmount *mnt = fhp->fh_export->ex_mnt;
+ struct dentry *dentry = fhp->fh_dentry;
+ struct kstat stat;
+ int type;
+ struct timespec time;
+
+ vfs_getattr(mnt, dentry, &stat);
+ type = (stat.mode & S_IFMT);
+
+ *p++ = htonl(nfs_ftypes[type >> 12]);
+ *p++ = htonl((u32) stat.mode);
+ *p++ = htonl((u32) stat.nlink);
+ *p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
+ *p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+
+ if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) {
+ *p++ = htonl(NFS_MAXPATHLEN);
+ } else {
+ *p++ = htonl((u32) stat.size);
+ }
+ *p++ = htonl((u32) stat.blksize);
+ if (S_ISCHR(type) || S_ISBLK(type))
+ *p++ = htonl(new_encode_dev(stat.rdev));
+ else
+ *p++ = htonl(0xffffffff);
+ *p++ = htonl((u32) stat.blocks);
+ if (is_fsid(fhp, rqstp->rq_reffh))
+ *p++ = htonl((u32) fhp->fh_export->ex_fsid);
+ else
+ *p++ = htonl(new_encode_dev(stat.dev));
+ *p++ = htonl((u32) stat.ino);
+ *p++ = htonl((u32) stat.atime.tv_sec);
+ *p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0);
+ lease_get_mtime(dentry->d_inode, &time);
+ *p++ = htonl((u32) time.tv_sec);
+ *p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0);
+ *p++ = htonl((u32) stat.ctime.tv_sec);
+ *p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0);
+
+ return p;
+}
+
+
+/*
+ * XDR decode functions
+ */
+int
+nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_sattrargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_sattr(p, &args->attrs)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_diropargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_readargs *args)
+{
+ unsigned int len;
+ int v,pn;
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+
+ args->offset = ntohl(*p++);
+ len = args->count = ntohl(*p++);
+ p++; /* totalcount - unused */
+
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+
+ /* set up somewhere to store response.
+ * We take pages, put them on reslist and include in iovec
+ */
+ v=0;
+ while (len > 0) {
+ pn=rqstp->rq_resused;
+ svc_take_page(rqstp);
+ args->vec[v].iov_base = page_address(rqstp->rq_respages[pn]);
+ args->vec[v].iov_len = len < PAGE_SIZE?len:PAGE_SIZE;
+ len -= args->vec[v].iov_len;
+ v++;
+ }
+ args->vlen = v;
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_writeargs *args)
+{
+ unsigned int len;
+ int v;
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+
+ p++; /* beginoffset */
+ args->offset = ntohl(*p++); /* offset */
+ p++; /* totalcount */
+ len = args->len = ntohl(*p++);
+ args->vec[0].iov_base = (void*)p;
+ args->vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
+ (((void*)p) - rqstp->rq_arg.head[0].iov_base);
+ if (len > NFSSVC_MAXBLKSIZE)
+ len = NFSSVC_MAXBLKSIZE;
+ v = 0;
+ while (len > args->vec[v].iov_len) {
+ len -= args->vec[v].iov_len;
+ v++;
+ args->vec[v].iov_base = page_address(rqstp->rq_argpages[v]);
+ args->vec[v].iov_len = PAGE_SIZE;
+ }
+ args->vec[v].iov_len = len;
+ args->vlen = v+1;
+ return args->vec[0].iov_len > 0;
+}
+
+int
+nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_createargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len))
+ || !(p = decode_sattr(p, &args->attrs)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_renameargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinkargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_linkargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_fh(p, &args->tfh))
+ || !(p = decode_filename(p, &args->tname, &args->tlen)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_symlinkargs *args)
+{
+ if (!(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_pathname(p, &args->tname, &args->tlen))
+ || !(p = decode_sattr(p, &args->attrs)))
+ return 0;
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+int
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_readdirargs *args)
+{
+ if (!(p = decode_fh(p, &args->fh)))
+ return 0;
+ args->cookie = ntohl(*p++);
+ args->count = ntohl(*p++);
+ if (args->count > PAGE_SIZE)
+ args->count = PAGE_SIZE;
+
+ svc_take_page(rqstp);
+ args->buffer = page_address(rqstp->rq_respages[rqstp->rq_resused-1]);
+
+ return xdr_argsize_check(rqstp, p);
+}
+
+/*
+ * XDR encode functions
+ */
+int
+nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_attrstat *resp)
+{
+ p = encode_fattr(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_diropres *resp)
+{
+ p = encode_fh(p, &resp->fh);
+ p = encode_fattr(rqstp, p, &resp->fh);
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_readlinkres *resp)
+{
+ *p++ = htonl(resp->len);
+ xdr_ressize_check(rqstp, p);
+ rqstp->rq_res.page_len = resp->len;
+ if (resp->len & 3) {
+ /* need to pad the tail */
+ rqstp->rq_restailpage = 0;
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
+ }
+ return 1;
+}
+
+int
+nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_readres *resp)
+{
+ p = encode_fattr(rqstp, p, &resp->fh);
+ *p++ = htonl(resp->count);
+ xdr_ressize_check(rqstp, p);
+
+ /* now update rqstp->rq_res to reflect data aswell */
+ rqstp->rq_res.page_len = resp->count;
+ if (resp->count & 3) {
+ /* need to pad the tail */
+ rqstp->rq_restailpage = 0;
+ rqstp->rq_res.tail[0].iov_base = p;
+ *p = 0;
+ rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
+ }
+ return 1;
+}
+
+int
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_readdirres *resp)
+{
+ xdr_ressize_check(rqstp, p);
+ p = resp->buffer;
+ *p++ = 0; /* no more entries */
+ *p++ = htonl((resp->common.err == nfserr_eof));
+ rqstp->rq_res.page_len = (((unsigned long)p-1) & ~PAGE_MASK)+1;
+
+ return 1;
+}
+
+int
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_statfsres *resp)
+{
+ struct kstatfs *stat = &resp->stats;
+
+ *p++ = htonl(NFSSVC_MAXBLKSIZE); /* max transfer size */
+ *p++ = htonl(stat->f_bsize);
+ *p++ = htonl(stat->f_blocks);
+ *p++ = htonl(stat->f_bfree);
+ *p++ = htonl(stat->f_bavail);
+ return xdr_ressize_check(rqstp, p);
+}
+
+int
+nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
+ int namlen, loff_t offset, ino_t ino, unsigned int d_type)
+{
+ struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
+ u32 *p = cd->buffer;
+ int buflen, slen;
+
+ /*
+ dprintk("nfsd: entry(%.*s off %ld ino %ld)\n",
+ namlen, name, offset, ino);
+ */
+
+ if (offset > ~((u32) 0)) {
+ cd->common.err = nfserr_fbig;
+ return -EINVAL;
+ }
+ if (cd->offset)
+ *cd->offset = htonl(offset);
+ if (namlen > NFS2_MAXNAMLEN)
+ namlen = NFS2_MAXNAMLEN;/* truncate filename */
+
+ slen = XDR_QUADLEN(namlen);
+ if ((buflen = cd->buflen - slen - 4) < 0) {
+ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+ *p++ = xdr_one; /* mark entry present */
+ *p++ = htonl((u32) ino); /* file id */
+ p = xdr_encode_array(p, name, namlen);/* name length & name */
+ cd->offset = p; /* remember pointer */
+ *p++ = ~(u32) 0; /* offset of next entry */
+
+ cd->buflen = buflen;
+ cd->buffer = p;
+ cd->common.err = nfs_ok;
+ return 0;
+}
+
+/*
+ * XDR release functions
+ */
+int
+nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+ struct nfsd_fhandle *resp)
+{
+ fh_put(&resp->fh);
+ return 1;
+}
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
new file mode 100644
index 000000000000..1cf955bcc526
--- /dev/null
+++ b/fs/nfsd/stats.c
@@ -0,0 +1,101 @@
+/*
+ * linux/fs/nfsd/stats.c
+ *
+ * procfs-based user access to knfsd statistics
+ *
+ * /proc/net/rpc/nfsd
+ *
+ * Format:
+ * rc <hits> <misses> <nocache>
+ * Statistsics for the reply cache
+ * fh <stale> <total-lookups> <anonlookups> <dir-not-in-dcache> <nondir-not-in-dcache>
+ * statistics for filehandle lookup
+ * io <bytes-read> <bytes-writtten>
+ * statistics for IO throughput
+ * th <threads> <fullcnt> <10%-20%> <20%-30%> ... <90%-100%> <100%>
+ * time (seconds) when nfsd thread usage above thresholds
+ * and number of times that all threads were in use
+ * ra cache-size <10% <20% <30% ... <100% not-found
+ * number of times that read-ahead entry was found that deep in
+ * the cache.
+ * plus generic RPC stats (see net/sunrpc/stats.c)
+ *
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/stat.h>
+#include <linux/module.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/stats.h>
+#include <linux/nfsd/nfsd.h>
+#include <linux/nfsd/stats.h>
+
+struct nfsd_stats nfsdstats;
+struct svc_stat nfsd_svcstats = {
+ .program = &nfsd_program,
+};
+
+static int nfsd_proc_show(struct seq_file *seq, void *v)
+{
+ int i;
+
+ seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+ nfsdstats.rchits,
+ nfsdstats.rcmisses,
+ nfsdstats.rcnocache,
+ nfsdstats.fh_stale,
+ nfsdstats.fh_lookup,
+ nfsdstats.fh_anon,
+ nfsdstats.fh_nocache_dir,
+ nfsdstats.fh_nocache_nondir,
+ nfsdstats.io_read,
+ nfsdstats.io_write);
+ /* thread usage: */
+ seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+ for (i=0; i<10; i++) {
+ unsigned int jifs = nfsdstats.th_usage[i];
+ unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ;
+ seq_printf(seq, " %u.%03u", sec, msec);
+ }
+
+ /* newline and ra-cache */
+ seq_printf(seq, "\nra %u", nfsdstats.ra_size);
+ for (i=0; i<11; i++)
+ seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
+ seq_putc(seq, '\n');
+
+ /* show my rpc info */
+ svc_seq_show(seq, &nfsd_svcstats);
+
+ return 0;
+}
+
+static int nfsd_proc_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, nfsd_proc_show, NULL);
+}
+
+static struct file_operations nfsd_proc_fops = {
+ .owner = THIS_MODULE,
+ .open = nfsd_proc_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void
+nfsd_stat_init(void)
+{
+ svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops);
+}
+
+void
+nfsd_stat_shutdown(void)
+{
+ svc_proc_unregister("nfsd");
+}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
new file mode 100644
index 000000000000..e3e9d217236e
--- /dev/null
+++ b/fs/nfsd/vfs.c
@@ -0,0 +1,1859 @@
+#define MSNFS /* HACK HACK */
+/*
+ * linux/fs/nfsd/vfs.c
+ *
+ * File operations used by nfsd. Some of these have been ripped from
+ * other parts of the kernel because they weren't exported, others
+ * are partial duplicates with added or changed functionality.
+ *
+ * Note that several functions dget() the dentry upon which they want
+ * to act, most notably those that create directory entries. Response
+ * dentry's are dput()'d if necessary in the release callback.
+ * So if you notice code paths that apparently fail to dput() the
+ * dentry, don't worry--they have been taken care of.
+ *
+ * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
+ * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
+ */
+
+#include <linux/config.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mount.h>
+#include <linux/major.h>
+#include <linux/ext2_fs.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include <linux/in.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+#include <linux/vfs.h>
+#include <linux/delay.h>
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#ifdef CONFIG_NFSD_V3
+#include <linux/nfs3.h>
+#include <linux/nfsd/xdr3.h>
+#endif /* CONFIG_NFSD_V3 */
+#include <linux/nfsd/nfsfh.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#ifdef CONFIG_NFSD_V4
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr_acl.h>
+#include <linux/xattr.h>
+#include <linux/nfs4.h>
+#include <linux/nfs4_acl.h>
+#include <linux/nfsd_idmap.h>
+#include <linux/security.h>
+#endif /* CONFIG_NFSD_V4 */
+
+#include <asm/uaccess.h>
+
+#define NFSDDBG_FACILITY NFSDDBG_FILEOP
+#define NFSD_PARANOIA
+
+
+/* We must ignore files (but only files) which might have mandatory
+ * locks on them because there is no way to know if the accesser has
+ * the lock.
+ */
+#define IS_ISMNDLK(i) (S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
+
+/*
+ * This is a cache of readahead params that help us choose the proper
+ * readahead strategy. Initially, we set all readahead parameters to 0
+ * and let the VFS handle things.
+ * If you increase the number of cached files very much, you'll need to
+ * add a hash table here.
+ */
+struct raparms {
+ struct raparms *p_next;
+ unsigned int p_count;
+ ino_t p_ino;
+ dev_t p_dev;
+ int p_set;
+ struct file_ra_state p_ra;
+};
+
+static struct raparms * raparml;
+static struct raparms * raparm_cache;
+
+/*
+ * Called from nfsd_lookup and encode_dirent. Check if we have crossed
+ * a mount point.
+ * Returns -EAGAIN leaving *dpp and *expp unchanged,
+ * or nfs_ok having possibly changed *dpp and *expp
+ */
+int
+nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp)
+{
+ struct svc_export *exp = *expp, *exp2 = NULL;
+ struct dentry *dentry = *dpp;
+ struct vfsmount *mnt = mntget(exp->ex_mnt);
+ struct dentry *mounts = dget(dentry);
+ int err = nfs_ok;
+
+ while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+
+ exp2 = exp_get_by_name(exp->ex_client, mnt, mounts, &rqstp->rq_chandle);
+ if (IS_ERR(exp2)) {
+ err = PTR_ERR(exp2);
+ dput(mounts);
+ mntput(mnt);
+ goto out;
+ }
+ if (exp2 && ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2))) {
+ /* successfully crossed mount point */
+ exp_put(exp);
+ *expp = exp2;
+ dput(dentry);
+ *dpp = mounts;
+ } else {
+ if (exp2) exp_put(exp2);
+ dput(mounts);
+ }
+ mntput(mnt);
+out:
+ return err;
+}
+
+/*
+ * Look up one component of a pathname.
+ * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_NOHIDE, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ * clients and is explicitly disallowed for NFSv3
+ * NeilBrown <neilb@cse.unsw.edu.au>
+ */
+int
+nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+ int len, struct svc_fh *resfh)
+{
+ struct svc_export *exp;
+ struct dentry *dparent;
+ struct dentry *dentry;
+ int err;
+
+ dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+
+ /* Obtain dentry and export. */
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
+ if (err)
+ return err;
+
+ dparent = fhp->fh_dentry;
+ exp = fhp->fh_export;
+ exp_get(exp);
+
+ err = nfserr_acces;
+
+ /* Lookup the name, but don't follow links */
+ if (isdotent(name, len)) {
+ if (len==1)
+ dentry = dget(dparent);
+ else if (dparent != exp->ex_dentry) {
+ dentry = dget_parent(dparent);
+ } else if (!EX_NOHIDE(exp))
+ dentry = dget(dparent); /* .. == . just like at / */
+ else {
+ /* checking mountpoint crossing is very different when stepping up */
+ struct svc_export *exp2 = NULL;
+ struct dentry *dp;
+ struct vfsmount *mnt = mntget(exp->ex_mnt);
+ dentry = dget(dparent);
+ while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+ ;
+ dp = dget_parent(dentry);
+ dput(dentry);
+ dentry = dp;
+
+ exp2 = exp_parent(exp->ex_client, mnt, dentry,
+ &rqstp->rq_chandle);
+ if (IS_ERR(exp2)) {
+ err = PTR_ERR(exp2);
+ dput(dentry);
+ mntput(mnt);
+ goto out_nfserr;
+ }
+ if (!exp2) {
+ dput(dentry);
+ dentry = dget(dparent);
+ } else {
+ exp_put(exp);
+ exp = exp2;
+ }
+ mntput(mnt);
+ }
+ } else {
+ fh_lock(fhp);
+ dentry = lookup_one_len(name, dparent, len);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+ goto out_nfserr;
+ /*
+ * check if we have crossed a mount point ...
+ */
+ if (d_mountpoint(dentry)) {
+ if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
+ dput(dentry);
+ goto out_nfserr;
+ }
+ }
+ }
+ /*
+ * Note: we compose the file handle now, but as the
+ * dentry may be negative, it may need to be updated.
+ */
+ err = fh_compose(resfh, exp, dentry, fhp);
+ if (!err && !dentry->d_inode)
+ err = nfserr_noent;
+ dput(dentry);
+out:
+ exp_put(exp);
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+/*
+ * Set various file attributes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = MAY_SATTR;
+ int ftype = 0;
+ int imode;
+ int err;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err || !iap->ia_valid)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* NFSv2 does not differentiate between "set-[ac]time-to-now"
+ * which only requires access, and "set-[ac]time-to-X" which
+ * requires ownership.
+ * So if it looks like it might be "set both to the same time which
+ * is close to now", and if inode_change_ok fails, then we
+ * convert to "set to now" instead of "set to explicit time"
+ *
+ * We only call inode_change_ok as the last test as technically
+ * it is not an interface that we should be using. It is only
+ * valid if the filesystem does not define it's own i_op->setattr.
+ */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define MAX_TOUCH_TIME_ERROR (30*60)
+ if ((iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
+ && iap->ia_mtime.tv_sec == iap->ia_atime.tv_sec
+ ) {
+ /* Looks probable. Now just make sure time is in the right ballpark.
+ * Solaris, at least, doesn't seem to care what the time request is.
+ * We require it be within 30 minutes of now.
+ */
+ time_t delta = iap->ia_atime.tv_sec - get_seconds();
+ if (delta<0) delta = -delta;
+ if (delta < MAX_TOUCH_TIME_ERROR &&
+ inode_change_ok(inode, iap) != 0) {
+ /* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
+ * this will cause notify_change to set these times to "now"
+ */
+ iap->ia_valid &= ~BOTH_TIME_SET;
+ }
+ }
+
+ /* The size case is special. It changes the file as well as the attributes. */
+ if (iap->ia_valid & ATTR_SIZE) {
+ if (iap->ia_size < inode->i_size) {
+ err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+ }
+
+ /*
+ * If we are changing the size of the file, then
+ * we need to break all leases.
+ */
+ err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+ if (err == -EWOULDBLOCK)
+ err = -ETIMEDOUT;
+ if (err) /* ENOMEM or EWOULDBLOCK */
+ goto out_nfserr;
+
+ err = get_write_access(inode);
+ if (err)
+ goto out_nfserr;
+
+ size_change = 1;
+ err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (err) {
+ put_write_access(inode);
+ goto out_nfserr;
+ }
+ DQUOT_INIT(inode);
+ }
+
+ imode = inode->i_mode;
+ if (iap->ia_valid & ATTR_MODE) {
+ iap->ia_mode &= S_IALLUGO;
+ imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+ }
+
+ /* Revoke setuid/setgid bit on chown/chgrp */
+ if ((iap->ia_valid & ATTR_UID) && iap->ia_uid != inode->i_uid)
+ iap->ia_valid |= ATTR_KILL_SUID;
+ if ((iap->ia_valid & ATTR_GID) && iap->ia_gid != inode->i_gid)
+ iap->ia_valid |= ATTR_KILL_SGID;
+
+ /* Change the attributes. */
+
+ iap->ia_valid |= ATTR_CTIME;
+
+ err = nfserr_notsync;
+ if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+ fh_lock(fhp);
+ err = notify_change(dentry, iap);
+ err = nfserrno(err);
+ fh_unlock(fhp);
+ }
+ if (size_change)
+ put_write_access(inode);
+ if (!err)
+ if (EX_ISSYNC(fhp->fh_export))
+ write_inode_now(inode, 1);
+out:
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+#if defined(CONFIG_NFSD_V4)
+
+static int
+set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
+{
+ int len;
+ size_t buflen;
+ char *buf = NULL;
+ int error = 0;
+ struct inode *inode = dentry->d_inode;
+
+ buflen = posix_acl_xattr_size(pacl->a_count);
+ buf = kmalloc(buflen, GFP_KERNEL);
+ error = -ENOMEM;
+ if (buf == NULL)
+ goto out;
+
+ len = posix_acl_to_xattr(pacl, buf, buflen);
+ if (len < 0) {
+ error = len;
+ goto out;
+ }
+
+ error = -EOPNOTSUPP;
+ if (inode->i_op && inode->i_op->setxattr) {
+ down(&inode->i_sem);
+ security_inode_setxattr(dentry, key, buf, len, 0);
+ error = inode->i_op->setxattr(dentry, key, buf, len, 0);
+ if (!error)
+ security_inode_post_setxattr(dentry, key, buf, len, 0);
+ up(&inode->i_sem);
+ }
+out:
+ kfree(buf);
+ return error;
+}
+
+int
+nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct nfs4_acl *acl)
+{
+ int error;
+ struct dentry *dentry;
+ struct inode *inode;
+ struct posix_acl *pacl = NULL, *dpacl = NULL;
+ unsigned int flags = 0;
+
+ /* Get inode */
+ error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
+ if (error)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+ if (S_ISDIR(inode->i_mode))
+ flags = NFS4_ACL_DIR;
+
+ error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+ if (error == -EINVAL) {
+ error = nfserr_attrnotsupp;
+ goto out;
+ } else if (error < 0)
+ goto out_nfserr;
+
+ if (pacl) {
+ error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
+ if (error < 0)
+ goto out_nfserr;
+ }
+
+ if (dpacl) {
+ error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
+ if (error < 0)
+ goto out_nfserr;
+ }
+
+ error = nfs_ok;
+
+out:
+ posix_acl_release(pacl);
+ posix_acl_release(dpacl);
+ return (error);
+out_nfserr:
+ error = nfserrno(error);
+ goto out;
+}
+
+static struct posix_acl *
+_get_posix_acl(struct dentry *dentry, char *key)
+{
+ struct inode *inode = dentry->d_inode;
+ char *buf = NULL;
+ int buflen, error = 0;
+ struct posix_acl *pacl = NULL;
+
+ error = -EOPNOTSUPP;
+ if (inode->i_op == NULL)
+ goto out_err;
+ if (inode->i_op->getxattr == NULL)
+ goto out_err;
+
+ error = security_inode_getxattr(dentry, key);
+ if (error)
+ goto out_err;
+
+ buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
+ if (buflen <= 0) {
+ error = buflen < 0 ? buflen : -ENODATA;
+ goto out_err;
+ }
+
+ buf = kmalloc(buflen, GFP_KERNEL);
+ if (buf == NULL) {
+ error = -ENOMEM;
+ goto out_err;
+ }
+
+ error = inode->i_op->getxattr(dentry, key, buf, buflen);
+ if (error < 0)
+ goto out_err;
+
+ pacl = posix_acl_from_xattr(buf, buflen);
+ out:
+ kfree(buf);
+ return pacl;
+ out_err:
+ pacl = ERR_PTR(error);
+ goto out;
+}
+
+int
+nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
+{
+ struct inode *inode = dentry->d_inode;
+ int error = 0;
+ struct posix_acl *pacl = NULL, *dpacl = NULL;
+ unsigned int flags = 0;
+
+ pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
+ if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
+ pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
+ if (IS_ERR(pacl)) {
+ error = PTR_ERR(pacl);
+ pacl = NULL;
+ goto out;
+ }
+
+ if (S_ISDIR(inode->i_mode)) {
+ dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
+ if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
+ dpacl = NULL;
+ else if (IS_ERR(dpacl)) {
+ error = PTR_ERR(dpacl);
+ dpacl = NULL;
+ goto out;
+ }
+ flags = NFS4_ACL_DIR;
+ }
+
+ *acl = nfs4_acl_posix_to_nfsv4(pacl, dpacl, flags);
+ if (IS_ERR(*acl)) {
+ error = PTR_ERR(*acl);
+ *acl = NULL;
+ }
+ out:
+ posix_acl_release(pacl);
+ posix_acl_release(dpacl);
+ return error;
+}
+
+#endif /* defined(CONFIG_NFS_V4) */
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Check server access rights to a file system object
+ */
+struct accessmap {
+ u32 access;
+ int how;
+};
+static struct accessmap nfs3_regaccess[] = {
+ { NFS3_ACCESS_READ, MAY_READ },
+ { NFS3_ACCESS_EXECUTE, MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC },
+ { NFS3_ACCESS_EXTEND, MAY_WRITE },
+
+ { 0, 0 }
+};
+
+static struct accessmap nfs3_diraccess[] = {
+ { NFS3_ACCESS_READ, MAY_READ },
+ { NFS3_ACCESS_LOOKUP, MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC },
+ { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE },
+ { NFS3_ACCESS_DELETE, MAY_REMOVE },
+
+ { 0, 0 }
+};
+
+static struct accessmap nfs3_anyaccess[] = {
+ /* Some clients - Solaris 2.6 at least, make an access call
+ * to the server to check for access for things like /dev/null
+ * (which really, the server doesn't care about). So
+ * We provide simple access checking for them, looking
+ * mainly at mode bits, and we make sure to ignore read-only
+ * filesystem checks
+ */
+ { NFS3_ACCESS_READ, MAY_READ },
+ { NFS3_ACCESS_EXECUTE, MAY_EXEC },
+ { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS },
+ { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS },
+
+ { 0, 0 }
+};
+
+int
+nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
+{
+ struct accessmap *map;
+ struct svc_export *export;
+ struct dentry *dentry;
+ u32 query, result = 0, sresult = 0;
+ unsigned int error;
+
+ error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+ if (error)
+ goto out;
+
+ export = fhp->fh_export;
+ dentry = fhp->fh_dentry;
+
+ if (S_ISREG(dentry->d_inode->i_mode))
+ map = nfs3_regaccess;
+ else if (S_ISDIR(dentry->d_inode->i_mode))
+ map = nfs3_diraccess;
+ else
+ map = nfs3_anyaccess;
+
+
+ query = *access;
+ for (; map->access; map++) {
+ if (map->access & query) {
+ unsigned int err2;
+
+ sresult |= map->access;
+
+ err2 = nfsd_permission(export, dentry, map->how);
+ switch (err2) {
+ case nfs_ok:
+ result |= map->access;
+ break;
+
+ /* the following error codes just mean the access was not allowed,
+ * rather than an error occurred */
+ case nfserr_rofs:
+ case nfserr_acces:
+ case nfserr_perm:
+ /* simply don't "or" in the access bit. */
+ break;
+ default:
+ error = err2;
+ goto out;
+ }
+ }
+ }
+ *access = result;
+ if (supported)
+ *supported = sresult;
+
+ out:
+ return error;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+
+
+/*
+ * Open an existing file or directory.
+ * The access argument indicates the type of open (read/write/lock)
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ int access, struct file **filp)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int flags = O_RDONLY|O_LARGEFILE, err;
+
+ /*
+ * If we get here, then the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ */
+ err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Disallow write access to files with the append-only bit set
+ * or any access when mandatory locking enabled
+ */
+ err = nfserr_perm;
+ if (IS_APPEND(inode) && (access & MAY_WRITE))
+ goto out;
+ if (IS_ISMNDLK(inode))
+ goto out;
+
+ if (!inode->i_fop)
+ goto out;
+
+ /*
+ * Check to see if there are any leases on this file.
+ * This may block while leases are broken.
+ */
+ err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+ if (err == -EWOULDBLOCK)
+ err = -ETIMEDOUT;
+ if (err) /* NOMEM or WOULDBLOCK */
+ goto out_nfserr;
+
+ if (access & MAY_WRITE) {
+ flags = O_WRONLY|O_LARGEFILE;
+
+ DQUOT_INIT(inode);
+ }
+ *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
+ if (IS_ERR(*filp))
+ err = PTR_ERR(*filp);
+out_nfserr:
+ if (err)
+ err = nfserrno(err);
+out:
+ return err;
+}
+
+/*
+ * Close a file.
+ */
+void
+nfsd_close(struct file *filp)
+{
+ fput(filp);
+}
+
+/*
+ * Sync a file
+ * As this calls fsync (not fdatasync) there is no need for a write_inode
+ * after it.
+ */
+static inline void nfsd_dosync(struct file *filp, struct dentry *dp,
+ struct file_operations *fop)
+{
+ struct inode *inode = dp->d_inode;
+ int (*fsync) (struct file *, struct dentry *, int);
+
+ filemap_fdatawrite(inode->i_mapping);
+ if (fop && (fsync = fop->fsync))
+ fsync(filp, dp, 0);
+ filemap_fdatawait(inode->i_mapping);
+}
+
+
+static void
+nfsd_sync(struct file *filp)
+{
+ struct inode *inode = filp->f_dentry->d_inode;
+ dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+ down(&inode->i_sem);
+ nfsd_dosync(filp, filp->f_dentry, filp->f_op);
+ up(&inode->i_sem);
+}
+
+static void
+nfsd_sync_dir(struct dentry *dp)
+{
+ nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
+}
+
+/*
+ * Obtain the readahead parameters for the file
+ * specified by (dev, ino).
+ */
+static DEFINE_SPINLOCK(ra_lock);
+
+static inline struct raparms *
+nfsd_get_raparms(dev_t dev, ino_t ino)
+{
+ struct raparms *ra, **rap, **frap = NULL;
+ int depth = 0;
+
+ spin_lock(&ra_lock);
+ for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+ if (ra->p_ino == ino && ra->p_dev == dev)
+ goto found;
+ depth++;
+ if (ra->p_count == 0)
+ frap = rap;
+ }
+ depth = nfsdstats.ra_size*11/10;
+ if (!frap) {
+ spin_unlock(&ra_lock);
+ return NULL;
+ }
+ rap = frap;
+ ra = *frap;
+ ra->p_dev = dev;
+ ra->p_ino = ino;
+ ra->p_set = 0;
+found:
+ if (rap != &raparm_cache) {
+ *rap = ra->p_next;
+ ra->p_next = raparm_cache;
+ raparm_cache = ra;
+ }
+ ra->p_count++;
+ nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
+ spin_unlock(&ra_lock);
+ return ra;
+}
+
+/*
+ * Grab and keep cached pages assosiated with a file in the svc_rqst
+ * so that they can be passed to the netowork sendmsg/sendpage routines
+ * directrly. They will be released after the sending has completed.
+ */
+static int
+nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset , unsigned long size)
+{
+ unsigned long count = desc->count;
+ struct svc_rqst *rqstp = desc->arg.data;
+
+ if (size > count)
+ size = count;
+
+ if (rqstp->rq_res.page_len == 0) {
+ get_page(page);
+ rqstp->rq_respages[rqstp->rq_resused++] = page;
+ rqstp->rq_res.page_base = offset;
+ rqstp->rq_res.page_len = size;
+ } else if (page != rqstp->rq_respages[rqstp->rq_resused-1]) {
+ get_page(page);
+ rqstp->rq_respages[rqstp->rq_resused++] = page;
+ rqstp->rq_res.page_len += size;
+ } else {
+ rqstp->rq_res.page_len += size;
+ }
+
+ desc->count = count - size;
+ desc->written += size;
+ return size;
+}
+
+static inline int
+nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+ struct inode *inode;
+ struct raparms *ra;
+ mm_segment_t oldfs;
+ int err;
+
+ err = nfserr_perm;
+ inode = file->f_dentry->d_inode;
+#ifdef MSNFS
+ if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+ (!lock_may_read(inode, offset, *count)))
+ goto out;
+#endif
+
+ /* Get readahead parameters */
+ ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+
+ if (ra && ra->p_set)
+ file->f_ra = ra->p_ra;
+
+ if (file->f_op->sendfile) {
+ svc_pushback_unused_pages(rqstp);
+ err = file->f_op->sendfile(file, &offset, *count,
+ nfsd_read_actor, rqstp);
+ } else {
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+ set_fs(oldfs);
+ }
+
+ /* Write back readahead params */
+ if (ra) {
+ spin_lock(&ra_lock);
+ ra->p_ra = file->f_ra;
+ ra->p_set = 1;
+ ra->p_count--;
+ spin_unlock(&ra_lock);
+ }
+
+ if (err >= 0) {
+ nfsdstats.io_read += err;
+ *count = err;
+ err = 0;
+ dnotify_parent(file->f_dentry, DN_ACCESS);
+ } else
+ err = nfserrno(err);
+out:
+ return err;
+}
+
+static inline int
+nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ loff_t offset, struct kvec *vec, int vlen,
+ unsigned long cnt, int *stablep)
+{
+ struct svc_export *exp;
+ struct dentry *dentry;
+ struct inode *inode;
+ mm_segment_t oldfs;
+ int err = 0;
+ int stable = *stablep;
+
+ err = nfserr_perm;
+
+#ifdef MSNFS
+ if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+ (!lock_may_write(file->f_dentry->d_inode, offset, cnt)))
+ goto out;
+#endif
+
+ dentry = file->f_dentry;
+ inode = dentry->d_inode;
+ exp = fhp->fh_export;
+
+ /*
+ * Request sync writes if
+ * - the sync export option has been set, or
+ * - the client requested O_SYNC behavior (NFSv3 feature).
+ * - The file system doesn't support fsync().
+ * When gathered writes have been configured for this volume,
+ * flushing the data to disk is handled separately below.
+ */
+
+ if (file->f_op->fsync == 0) {/* COMMIT3 cannot work */
+ stable = 2;
+ *stablep = 2; /* FILE_SYNC */
+ }
+
+ if (!EX_ISSYNC(exp))
+ stable = 0;
+ if (stable && !EX_WGATHER(exp))
+ file->f_flags |= O_SYNC;
+
+ /* Write the data. */
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+ set_fs(oldfs);
+ if (err >= 0) {
+ nfsdstats.io_write += cnt;
+ dnotify_parent(file->f_dentry, DN_MODIFY);
+ }
+
+ /* clear setuid/setgid flag after write */
+ if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) {
+ struct iattr ia;
+ ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID;
+
+ down(&inode->i_sem);
+ notify_change(dentry, &ia);
+ up(&inode->i_sem);
+ }
+
+ if (err >= 0 && stable) {
+ static ino_t last_ino;
+ static dev_t last_dev;
+
+ /*
+ * Gathered writes: If another process is currently
+ * writing to the file, there's a high chance
+ * this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the
+ * file with each write request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates
+ * C. Juszak's idea of gathered writes, but it's a
+ * nice and simple solution (IMHO), and it seems to
+ * work:-)
+ */
+ if (EX_WGATHER(exp)) {
+ if (atomic_read(&inode->i_writecount) > 1
+ || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+ dprintk("nfsd: write defer %d\n", current->pid);
+ msleep(10);
+ dprintk("nfsd: write resume %d\n", current->pid);
+ }
+
+ if (inode->i_state & I_DIRTY) {
+ dprintk("nfsd: write sync %d\n", current->pid);
+ nfsd_sync(file);
+ }
+#if 0
+ wake_up(&inode->i_wait);
+#endif
+ }
+ last_ino = inode->i_ino;
+ last_dev = inode->i_sb->s_dev;
+ }
+
+ dprintk("nfsd: write complete err=%d\n", err);
+ if (err >= 0)
+ err = 0;
+ else
+ err = nfserrno(err);
+out:
+ return err;
+}
+
+/*
+ * Read data from a file. count must contain the requested read count
+ * on entry. On return, *count contains the number of bytes actually read.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ loff_t offset, struct kvec *vec, int vlen,
+ unsigned long *count)
+{
+ int err;
+
+ if (file) {
+ err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
+ MAY_READ|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+ err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+ } else {
+ err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
+ if (err)
+ goto out;
+ err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+ nfsd_close(file);
+ }
+out:
+ return err;
+}
+
+/*
+ * Write data to a file.
+ * The stable flag requests synchronous writes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
+ int *stablep)
+{
+ int err = 0;
+
+ if (file) {
+ err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
+ MAY_WRITE|MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
+ stablep);
+ } else {
+ err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
+ if (err)
+ goto out;
+
+ if (cnt)
+ err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen,
+ cnt, stablep);
+ nfsd_close(file);
+ }
+out:
+ return err;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pending writes to stable storage.
+ * Strictly speaking, we could sync just the indicated file region here,
+ * but there's currently no way we can ask the VFS to do so.
+ *
+ * Unfortunately we cannot lock the file to make sure we return full WCC
+ * data to the client, as locking happens lower down in the filesystem.
+ */
+int
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, unsigned long count)
+{
+ struct file *file;
+ int err;
+
+ if ((u64)count > ~(u64)offset)
+ return nfserr_inval;
+
+ if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+ return err;
+ if (EX_ISSYNC(fhp->fh_export)) {
+ if (file->f_op && file->f_op->fsync) {
+ nfsd_sync(file);
+ } else {
+ err = nfserr_notsupp;
+ }
+ }
+
+ nfsd_close(file);
+ return err;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Create a file (regular, directory, device, fifo); UNIX sockets
+ * not yet implemented.
+ * If the response fh has been verified, the parent directory should
+ * already be locked. Note that the parent directory is left locked.
+ *
+ * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
+ */
+int
+nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *fname, int flen, struct iattr *iap,
+ int type, dev_t rdev, struct svc_fh *resfhp)
+{
+ struct dentry *dentry, *dchild = NULL;
+ struct inode *dirp;
+ int err;
+
+ err = nfserr_perm;
+ if (!flen)
+ goto out;
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
+ goto out;
+
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ dirp = dentry->d_inode;
+
+ err = nfserr_notdir;
+ if(!dirp->i_op || !dirp->i_op->lookup)
+ goto out;
+ /*
+ * Check whether the response file handle has been verified yet.
+ * If it has, the parent directory should already be locked.
+ */
+ if (!resfhp->fh_dentry) {
+ /* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
+ fh_lock(fhp);
+ dchild = lookup_one_len(fname, dentry, flen);
+ err = PTR_ERR(dchild);
+ if (IS_ERR(dchild))
+ goto out_nfserr;
+ err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+ if (err)
+ goto out;
+ } else {
+ /* called from nfsd_proc_create */
+ dchild = dget(resfhp->fh_dentry);
+ if (!fhp->fh_locked) {
+ /* not actually possible */
+ printk(KERN_ERR
+ "nfsd_create: parent %s/%s not locked!\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
+ err = -EIO;
+ goto out;
+ }
+ }
+ /*
+ * Make sure the child dentry is still negative ...
+ */
+ err = nfserr_exist;
+ if (dchild->d_inode) {
+ dprintk("nfsd_create: dentry %s/%s not negative!\n",
+ dentry->d_name.name, dchild->d_name.name);
+ goto out;
+ }
+
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+ iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+
+ /*
+ * Get the dir op function pointer.
+ */
+ err = nfserr_perm;
+ switch (type) {
+ case S_IFREG:
+ err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+ break;
+ case S_IFDIR:
+ err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+ break;
+ case S_IFCHR:
+ case S_IFBLK:
+ case S_IFIFO:
+ case S_IFSOCK:
+ err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+ break;
+ default:
+ printk("nfsd: bad file type %o in nfsd_create\n", type);
+ err = -EINVAL;
+ }
+ if (err < 0)
+ goto out_nfserr;
+
+ if (EX_ISSYNC(fhp->fh_export)) {
+ nfsd_sync_dir(dentry);
+ write_inode_now(dchild->d_inode, 1);
+ }
+
+
+ /* Set file attributes. Mode has already been set and
+ * setting uid/gid works only for root. Irix appears to
+ * send along the gid when it tries to implement setgid
+ * directories via NFS.
+ */
+ err = 0;
+ if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+ err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ /*
+ * Update the file handle to get the new inode info.
+ */
+ if (!err)
+ err = fh_update(resfhp);
+out:
+ if (dchild && !IS_ERR(dchild))
+ dput(dchild);
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *fname, int flen, struct iattr *iap,
+ struct svc_fh *resfhp, int createmode, u32 *verifier,
+ int *truncp)
+{
+ struct dentry *dentry, *dchild = NULL;
+ struct inode *dirp;
+ int err;
+ __u32 v_mtime=0, v_atime=0;
+ int v_mode=0;
+
+ err = nfserr_perm;
+ if (!flen)
+ goto out;
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
+ goto out;
+ if (!(iap->ia_valid & ATTR_MODE))
+ iap->ia_mode = 0;
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ dirp = dentry->d_inode;
+
+ /* Get all the sanity checks out of the way before
+ * we lock the parent. */
+ err = nfserr_notdir;
+ if(!dirp->i_op || !dirp->i_op->lookup)
+ goto out;
+ fh_lock(fhp);
+
+ /*
+ * Compose the response file handle.
+ */
+ dchild = lookup_one_len(fname, dentry, flen);
+ err = PTR_ERR(dchild);
+ if (IS_ERR(dchild))
+ goto out_nfserr;
+
+ err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+ if (err)
+ goto out;
+
+ if (createmode == NFS3_CREATE_EXCLUSIVE) {
+ /* while the verifier would fit in mtime+atime,
+ * solaris7 gets confused (bugid 4218508) if these have
+ * the high bit set, so we use the mode as well
+ */
+ v_mtime = verifier[0]&0x7fffffff;
+ v_atime = verifier[1]&0x7fffffff;
+ v_mode = S_IFREG
+ | ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
+ | ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
+ ;
+ }
+
+ if (dchild->d_inode) {
+ err = 0;
+
+ switch (createmode) {
+ case NFS3_CREATE_UNCHECKED:
+ if (! S_ISREG(dchild->d_inode->i_mode))
+ err = nfserr_exist;
+ else if (truncp) {
+ /* in nfsv4, we need to treat this case a little
+ * differently. we don't want to truncate the
+ * file now; this would be wrong if the OPEN
+ * fails for some other reason. furthermore,
+ * if the size is nonzero, we should ignore it
+ * according to spec!
+ */
+ *truncp = (iap->ia_valid & ATTR_SIZE) && !iap->ia_size;
+ }
+ else {
+ iap->ia_valid &= ATTR_SIZE;
+ goto set_attr;
+ }
+ break;
+ case NFS3_CREATE_EXCLUSIVE:
+ if ( dchild->d_inode->i_mtime.tv_sec == v_mtime
+ && dchild->d_inode->i_atime.tv_sec == v_atime
+ && dchild->d_inode->i_mode == v_mode
+ && dchild->d_inode->i_size == 0 )
+ break;
+ /* fallthru */
+ case NFS3_CREATE_GUARDED:
+ err = nfserr_exist;
+ }
+ goto out;
+ }
+
+ err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+ if (err < 0)
+ goto out_nfserr;
+
+ if (EX_ISSYNC(fhp->fh_export)) {
+ nfsd_sync_dir(dentry);
+ /* setattr will sync the child (or not) */
+ }
+
+ /*
+ * Update the filehandle to get the new inode info.
+ */
+ err = fh_update(resfhp);
+ if (err)
+ goto out;
+
+ if (createmode == NFS3_CREATE_EXCLUSIVE) {
+ /* Cram the verifier into atime/mtime/mode */
+ iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+ | ATTR_MTIME_SET|ATTR_ATIME_SET
+ | ATTR_MODE;
+ /* XXX someone who knows this better please fix it for nsec */
+ iap->ia_mtime.tv_sec = v_mtime;
+ iap->ia_atime.tv_sec = v_atime;
+ iap->ia_mtime.tv_nsec = 0;
+ iap->ia_atime.tv_nsec = 0;
+ iap->ia_mode = v_mode;
+ }
+
+ /* Set file attributes.
+ * Mode has already been set but we might need to reset it
+ * for CREATE_EXCLUSIVE
+ * Irix appears to send along the gid when it tries to
+ * implement setgid directories via NFS. Clear out all that cruft.
+ */
+ set_attr:
+ if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
+ err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+
+ out:
+ fh_unlock(fhp);
+ if (dchild && !IS_ERR(dchild))
+ dput(dchild);
+ return err;
+
+ out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Read a symlink. On entry, *lenp must contain the maximum path length that
+ * fits into the buffer. On return, it contains the true length.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ mm_segment_t oldfs;
+ int err;
+
+ err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
+ if (err)
+ goto out;
+
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ err = nfserr_inval;
+ if (!inode->i_op || !inode->i_op->readlink)
+ goto out;
+
+ touch_atime(fhp->fh_export->ex_mnt, dentry);
+ /* N.B. Why does this call need a get_fs()??
+ * Remove the set_fs and watch the fireworks:-) --okir
+ */
+
+ oldfs = get_fs(); set_fs(KERNEL_DS);
+ err = inode->i_op->readlink(dentry, buf, *lenp);
+ set_fs(oldfs);
+
+ if (err < 0)
+ goto out_nfserr;
+ *lenp = err;
+ err = 0;
+out:
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+/*
+ * Create a symlink and look up its inode
+ * N.B. After this call _both_ fhp and resfhp need an fh_put
+ */
+int
+nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ char *fname, int flen,
+ char *path, int plen,
+ struct svc_fh *resfhp,
+ struct iattr *iap)
+{
+ struct dentry *dentry, *dnew;
+ int err, cerr;
+ umode_t mode;
+
+ err = nfserr_noent;
+ if (!flen || !plen)
+ goto out;
+ err = nfserr_exist;
+ if (isdotent(fname, flen))
+ goto out;
+
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+ fh_lock(fhp);
+ dentry = fhp->fh_dentry;
+ dnew = lookup_one_len(fname, dentry, flen);
+ err = PTR_ERR(dnew);
+ if (IS_ERR(dnew))
+ goto out_nfserr;
+
+ mode = S_IALLUGO;
+ /* Only the MODE ATTRibute is even vaguely meaningful */
+ if (iap && (iap->ia_valid & ATTR_MODE))
+ mode = iap->ia_mode & S_IALLUGO;
+
+ if (unlikely(path[plen] != 0)) {
+ char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
+ if (path_alloced == NULL)
+ err = -ENOMEM;
+ else {
+ strncpy(path_alloced, path, plen);
+ path_alloced[plen] = 0;
+ err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+ kfree(path_alloced);
+ }
+ } else
+ err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+
+ if (!err) {
+ if (EX_ISSYNC(fhp->fh_export))
+ nfsd_sync_dir(dentry);
+ } else
+ err = nfserrno(err);
+ fh_unlock(fhp);
+
+ cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+ dput(dnew);
+ if (err==0) err = cerr;
+out:
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+/*
+ * Create a hardlink
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+ char *name, int len, struct svc_fh *tfhp)
+{
+ struct dentry *ddir, *dnew, *dold;
+ struct inode *dirp, *dest;
+ int err;
+
+ err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+ err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+ if (err)
+ goto out;
+
+ err = nfserr_perm;
+ if (!len)
+ goto out;
+ err = nfserr_exist;
+ if (isdotent(name, len))
+ goto out;
+
+ fh_lock(ffhp);
+ ddir = ffhp->fh_dentry;
+ dirp = ddir->d_inode;
+
+ dnew = lookup_one_len(name, ddir, len);
+ err = PTR_ERR(dnew);
+ if (IS_ERR(dnew))
+ goto out_nfserr;
+
+ dold = tfhp->fh_dentry;
+ dest = dold->d_inode;
+
+ err = vfs_link(dold, dirp, dnew);
+ if (!err) {
+ if (EX_ISSYNC(ffhp->fh_export)) {
+ nfsd_sync_dir(ddir);
+ write_inode_now(dest, 1);
+ }
+ } else {
+ if (err == -EXDEV && rqstp->rq_vers == 2)
+ err = nfserr_acces;
+ else
+ err = nfserrno(err);
+ }
+
+ fh_unlock(ffhp);
+ dput(dnew);
+out:
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+/*
+ * Rename a file
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+ struct svc_fh *tfhp, char *tname, int tlen)
+{
+ struct dentry *fdentry, *tdentry, *odentry, *ndentry, *trap;
+ struct inode *fdir, *tdir;
+ int err;
+
+ err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+ if (err)
+ goto out;
+ err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
+ if (err)
+ goto out;
+
+ fdentry = ffhp->fh_dentry;
+ fdir = fdentry->d_inode;
+
+ tdentry = tfhp->fh_dentry;
+ tdir = tdentry->d_inode;
+
+ err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+ if (fdir->i_sb != tdir->i_sb)
+ goto out;
+
+ err = nfserr_perm;
+ if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
+ goto out;
+
+ /* cannot use fh_lock as we need deadlock protective ordering
+ * so do it by hand */
+ trap = lock_rename(tdentry, fdentry);
+ ffhp->fh_locked = tfhp->fh_locked = 1;
+ fill_pre_wcc(ffhp);
+ fill_pre_wcc(tfhp);
+
+ odentry = lookup_one_len(fname, fdentry, flen);
+ err = PTR_ERR(odentry);
+ if (IS_ERR(odentry))
+ goto out_nfserr;
+
+ err = -ENOENT;
+ if (!odentry->d_inode)
+ goto out_dput_old;
+ err = -EINVAL;
+ if (odentry == trap)
+ goto out_dput_old;
+
+ ndentry = lookup_one_len(tname, tdentry, tlen);
+ err = PTR_ERR(ndentry);
+ if (IS_ERR(ndentry))
+ goto out_dput_old;
+ err = -ENOTEMPTY;
+ if (ndentry == trap)
+ goto out_dput_new;
+
+#ifdef MSNFS
+ if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+ ((atomic_read(&odentry->d_count) > 1)
+ || (atomic_read(&ndentry->d_count) > 1))) {
+ err = nfserr_perm;
+ } else
+#endif
+ err = vfs_rename(fdir, odentry, tdir, ndentry);
+ if (!err && EX_ISSYNC(tfhp->fh_export)) {
+ nfsd_sync_dir(tdentry);
+ nfsd_sync_dir(fdentry);
+ }
+
+ out_dput_new:
+ dput(ndentry);
+ out_dput_old:
+ dput(odentry);
+ out_nfserr:
+ if (err)
+ err = nfserrno(err);
+
+ /* we cannot reply on fh_unlock on the two filehandles,
+ * as that would do the wrong thing if the two directories
+ * were the same, so again we do it by hand
+ */
+ fill_post_wcc(ffhp);
+ fill_post_wcc(tfhp);
+ unlock_rename(tdentry, fdentry);
+ ffhp->fh_locked = tfhp->fh_locked = 0;
+
+out:
+ return err;
+}
+
+/*
+ * Unlink a file or directory
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+ char *fname, int flen)
+{
+ struct dentry *dentry, *rdentry;
+ struct inode *dirp;
+ int err;
+
+ err = nfserr_acces;
+ if (!flen || isdotent(fname, flen))
+ goto out;
+ err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+ if (err)
+ goto out;
+
+ fh_lock(fhp);
+ dentry = fhp->fh_dentry;
+ dirp = dentry->d_inode;
+
+ rdentry = lookup_one_len(fname, dentry, flen);
+ err = PTR_ERR(rdentry);
+ if (IS_ERR(rdentry))
+ goto out_nfserr;
+
+ if (!rdentry->d_inode) {
+ dput(rdentry);
+ err = nfserr_noent;
+ goto out;
+ }
+
+ if (!type)
+ type = rdentry->d_inode->i_mode & S_IFMT;
+
+ if (type != S_IFDIR) { /* It's UNLINK */
+#ifdef MSNFS
+ if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+ (atomic_read(&rdentry->d_count) > 1)) {
+ err = nfserr_perm;
+ } else
+#endif
+ err = vfs_unlink(dirp, rdentry);
+ } else { /* It's RMDIR */
+ err = vfs_rmdir(dirp, rdentry);
+ }
+
+ dput(rdentry);
+
+ if (err)
+ goto out_nfserr;
+ if (EX_ISSYNC(fhp->fh_export))
+ nfsd_sync_dir(dentry);
+
+out:
+ return err;
+
+out_nfserr:
+ err = nfserrno(err);
+ goto out;
+}
+
+/*
+ * Read entries from a directory.
+ * The NFSv3/4 verifier we ignore for now.
+ */
+int
+nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
+ struct readdir_cd *cdp, encode_dent_fn func)
+{
+ int err;
+ struct file *file;
+ loff_t offset = *offsetp;
+
+ err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
+ if (err)
+ goto out;
+
+ offset = vfs_llseek(file, offset, 0);
+ if (offset < 0) {
+ err = nfserrno((int)offset);
+ goto out_close;
+ }
+
+ /*
+ * Read the directory entries. This silly loop is necessary because
+ * readdir() is not guaranteed to fill up the entire buffer, but
+ * may choose to do less.
+ */
+
+ do {
+ cdp->err = nfserr_eof; /* will be cleared on successful read */
+ err = vfs_readdir(file, (filldir_t) func, cdp);
+ } while (err >=0 && cdp->err == nfs_ok);
+ if (err)
+ err = nfserrno(err);
+ else
+ err = cdp->err;
+ *offsetp = vfs_llseek(file, 0, 1);
+
+ if (err == nfserr_eof || err == nfserr_toosmall)
+ err = nfs_ok; /* can still be found in ->err */
+out_close:
+ nfsd_close(file);
+out:
+ return err;
+}
+
+/*
+ * Get file system stats
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
+{
+ int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+ if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
+ err = nfserr_io;
+ return err;
+}
+
+/*
+ * Check for a user's access permissions to this inode.
+ */
+int
+nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
+{
+ struct inode *inode = dentry->d_inode;
+ int err;
+
+ if (acc == MAY_NOP)
+ return 0;
+#if 0
+ dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
+ acc,
+ (acc & MAY_READ)? " read" : "",
+ (acc & MAY_WRITE)? " write" : "",
+ (acc & MAY_EXEC)? " exec" : "",
+ (acc & MAY_SATTR)? " sattr" : "",
+ (acc & MAY_TRUNC)? " trunc" : "",
+ (acc & MAY_LOCK)? " lock" : "",
+ (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
+ inode->i_mode,
+ IS_IMMUTABLE(inode)? " immut" : "",
+ IS_APPEND(inode)? " append" : "",
+ IS_RDONLY(inode)? " ro" : "");
+ dprintk(" owner %d/%d user %d/%d\n",
+ inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+#endif
+
+ /* Normally we reject any write/sattr etc access on a read-only file
+ * system. But if it is IRIX doing check on write-access for a
+ * device special file, we ignore rofs.
+ */
+ if (!(acc & MAY_LOCAL_ACCESS))
+ if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+ if (EX_RDONLY(exp) || IS_RDONLY(inode))
+ return nfserr_rofs;
+ if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode))
+ return nfserr_perm;
+ }
+ if ((acc & MAY_TRUNC) && IS_APPEND(inode))
+ return nfserr_perm;
+
+ if (acc & MAY_LOCK) {
+ /* If we cannot rely on authentication in NLM requests,
+ * just allow locks, otherwise require read permission, or
+ * ownership
+ */
+ if (exp->ex_flags & NFSEXP_NOAUTHNLM)
+ return 0;
+ else
+ acc = MAY_READ | MAY_OWNER_OVERRIDE;
+ }
+ /*
+ * The file owner always gets access permission for accesses that
+ * would normally be checked at open time. This is to make
+ * file access work even when the client has done a fchmod(fd, 0).
+ *
+ * However, `cp foo bar' should fail nevertheless when bar is
+ * readonly. A sensible way to do this might be to reject all
+ * attempts to truncate a read-only file, because a creat() call
+ * always implies file truncation.
+ * ... but this isn't really fair. A process may reasonably call
+ * ftruncate on an open file descriptor on a file with perm 000.
+ * We must trust the client to do permission checking - using "ACCESS"
+ * with NFSv3.
+ */
+ if ((acc & MAY_OWNER_OVERRIDE) &&
+ inode->i_uid == current->fsuid)
+ return 0;
+
+ err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL);
+
+ /* Allow read access to binaries even when mode 111 */
+ if (err == -EACCES && S_ISREG(inode->i_mode) &&
+ acc == (MAY_READ | MAY_OWNER_OVERRIDE))
+ err = permission(inode, MAY_EXEC, NULL);
+
+ return err? nfserrno(err) : 0;
+}
+
+void
+nfsd_racache_shutdown(void)
+{
+ if (!raparm_cache)
+ return;
+ dprintk("nfsd: freeing readahead buffers.\n");
+ kfree(raparml);
+ raparm_cache = raparml = NULL;
+}
+/*
+ * Initialize readahead param cache
+ */
+int
+nfsd_racache_init(int cache_size)
+{
+ int i;
+
+ if (raparm_cache)
+ return 0;
+ raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
+
+ if (raparml != NULL) {
+ dprintk("nfsd: allocating %d readahead buffers.\n",
+ cache_size);
+ memset(raparml, 0, sizeof(struct raparms) * cache_size);
+ for (i = 0; i < cache_size - 1; i++) {
+ raparml[i].p_next = raparml + i + 1;
+ }
+ raparm_cache = raparml;
+ } else {
+ printk(KERN_WARNING
+ "nfsd: Could not allocate memory read-ahead cache.\n");
+ return -ENOMEM;
+ }
+ nfsdstats.ra_size = cache_size;
+ return 0;
+}