diff options
-rw-r--r-- | fs/nfsd/netns.h | 6 | ||||
-rw-r--r-- | fs/nfsd/nfs4proc.c | 135 | ||||
-rw-r--r-- | fs/nfsd/nfs4state.c | 71 | ||||
-rw-r--r-- | fs/nfsd/nfsd.h | 4 | ||||
-rw-r--r-- | fs/nfsd/nfssvc.c | 3 | ||||
-rw-r--r-- | include/linux/nfs_ssc.h | 14 |
6 files changed, 229 insertions, 4 deletions
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index a75abeb1e698..935c1028c217 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -176,6 +176,12 @@ struct nfsd_net { unsigned int longest_chain_cachesize; struct shrinker nfsd_reply_cache_shrinker; + + /* tracking server-to-server copy mounts */ + spinlock_t nfsd_ssc_lock; + struct list_head nfsd_ssc_mount_list; + wait_queue_head_t nfsd_ssc_waitq; + /* utsname taken from the process that starts the server */ char nfsd_name[UNX_MAXNODENAME+1]; }; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 48c0992cb65c..0bd71c6da81d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -55,6 +55,13 @@ module_param(inter_copy_offload_enable, bool, 0644); MODULE_PARM_DESC(inter_copy_offload_enable, "Enable inter server to server copy offload. Default: false"); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +static int nfsd4_ssc_umount_timeout = 900000; /* default to 15 mins */ +module_param(nfsd4_ssc_umount_timeout, int, 0644); +MODULE_PARM_DESC(nfsd4_ssc_umount_timeout, + "idle msecs before unmount export from source server"); +#endif + #ifdef CONFIG_NFSD_V4_SECURITY_LABEL #include <linux/security.h> @@ -1166,6 +1173,81 @@ extern void nfs_sb_deactive(struct super_block *sb); #define NFSD42_INTERSSC_MOUNTOPS "vers=4.2,addr=%s,sec=sys" /* + * setup a work entry in the ssc delayed unmount list. + */ +static int nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, + struct nfsd4_ssc_umount_item **retwork, struct vfsmount **ss_mnt) +{ + struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *work = NULL; + struct nfsd4_ssc_umount_item *tmp; + DEFINE_WAIT(wait); + + *ss_mnt = NULL; + *retwork = NULL; + work = kzalloc(sizeof(*work), GFP_KERNEL); +try_again: + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (strncmp(ni->nsui_ipaddr, ipaddr, sizeof(ni->nsui_ipaddr))) + continue; + /* found a match */ + if (ni->nsui_busy) { + /* wait - and try again */ + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, + TASK_INTERRUPTIBLE); + spin_unlock(&nn->nfsd_ssc_lock); + + /* allow 20secs for mount/unmount for now - revisit */ + if (signal_pending(current) || + (schedule_timeout(20*HZ) == 0)) { + kfree(work); + return nfserr_eagain; + } + finish_wait(&nn->nfsd_ssc_waitq, &wait); + goto try_again; + } + *ss_mnt = ni->nsui_vfsmount; + refcount_inc(&ni->nsui_refcnt); + spin_unlock(&nn->nfsd_ssc_lock); + kfree(work); + + /* return vfsmount in ss_mnt */ + return 0; + } + if (work) { + strncpy(work->nsui_ipaddr, ipaddr, sizeof(work->nsui_ipaddr)); + refcount_set(&work->nsui_refcnt, 2); + work->nsui_busy = true; + list_add_tail(&work->nsui_list, &nn->nfsd_ssc_mount_list); + *retwork = work; + } + spin_unlock(&nn->nfsd_ssc_lock); + return 0; +} + +static void nfsd4_ssc_update_dul_work(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *work, struct vfsmount *ss_mnt) +{ + /* set nsui_vfsmount, clear busy flag and wakeup waiters */ + spin_lock(&nn->nfsd_ssc_lock); + work->nsui_vfsmount = ss_mnt; + work->nsui_busy = false; + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); +} + +static void nfsd4_ssc_cancel_dul_work(struct nfsd_net *nn, + struct nfsd4_ssc_umount_item *work) +{ + spin_lock(&nn->nfsd_ssc_lock); + list_del(&work->nsui_list); + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); + kfree(work); +} + +/* * Support one copy source server for now. */ static __be32 @@ -1181,6 +1263,8 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, char *ipaddr, *dev_name, *raw_data; int len, raw_len; __be32 status = nfserr_inval; + struct nfsd4_ssc_umount_item *work = NULL; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); naddr = &nss->u.nl4_addr; tmp_addrlen = rpc_uaddr2sockaddr(SVC_NET(rqstp), naddr->addr, @@ -1229,12 +1313,23 @@ nfsd4_interssc_connect(struct nl4_server *nss, struct svc_rqst *rqstp, goto out_free_rawdata; snprintf(dev_name, len + 5, "%s%s%s:/", startsep, ipaddr, endsep); + status = nfsd4_ssc_setup_dul(nn, ipaddr, &work, &ss_mnt); + if (status) + goto out_free_devname; + if (ss_mnt) + goto out_done; + /* Use an 'internal' mount: SB_KERNMOUNT -> MNT_INTERNAL */ ss_mnt = vfs_kern_mount(type, SB_KERNMOUNT, dev_name, raw_data); module_put(type->owner); - if (IS_ERR(ss_mnt)) + if (IS_ERR(ss_mnt)) { + if (work) + nfsd4_ssc_cancel_dul_work(nn, work); goto out_free_devname; - + } + if (work) + nfsd4_ssc_update_dul_work(nn, work, ss_mnt); +out_done: status = 0; *mount = ss_mnt; @@ -1301,10 +1396,42 @@ static void nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { + bool found = false; + long timeout; + struct nfsd4_ssc_umount_item *tmp; + struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd_net *nn = net_generic(dst->nf_net, nfsd_net_id); + nfs42_ssc_close(src->nf_file); - fput(src->nf_file); nfsd_file_put(dst); - mntput(ss_mnt); + fput(src->nf_file); + + if (!nn) { + mntput(ss_mnt); + return; + } + spin_lock(&nn->nfsd_ssc_lock); + timeout = msecs_to_jiffies(nfsd4_ssc_umount_timeout); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (ni->nsui_vfsmount->mnt_sb == ss_mnt->mnt_sb) { + list_del(&ni->nsui_list); + /* + * vfsmount can be shared by multiple exports, + * decrement refcnt. If the count drops to 1 it + * will be unmounted when nsui_expire expires. + */ + refcount_dec(&ni->nsui_refcnt); + ni->nsui_expire = jiffies + timeout; + list_add_tail(&ni->nsui_list, &nn->nfsd_ssc_mount_list); + found = true; + break; + } + } + spin_unlock(&nn->nfsd_ssc_lock); + if (!found) { + mntput(ss_mnt); + return; + } } #else /* CONFIG_NFSD_V4_2_INTER_SSC */ diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b2573d3ecd3c..99dfc668f605 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -44,6 +44,7 @@ #include <linux/jhash.h> #include <linux/string_helpers.h> #include <linux/fsnotify.h> +#include <linux/nfs_ssc.h> #include "xdr4.h" #include "xdr4cb.h" #include "vfs.h" @@ -5480,6 +5481,69 @@ static bool state_expired(struct laundry_time *lt, time64_t last_refresh) return false; } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +void nfsd4_ssc_init_umount_work(struct nfsd_net *nn) +{ + spin_lock_init(&nn->nfsd_ssc_lock); + INIT_LIST_HEAD(&nn->nfsd_ssc_mount_list); + init_waitqueue_head(&nn->nfsd_ssc_waitq); +} +EXPORT_SYMBOL_GPL(nfsd4_ssc_init_umount_work); + +/* + * This is called when nfsd is being shutdown, after all inter_ssc + * cleanup were done, to destroy the ssc delayed unmount list. + */ +static void nfsd4_ssc_shutdown_umount(struct nfsd_net *nn) +{ + struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *tmp; + + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + list_del(&ni->nsui_list); + spin_unlock(&nn->nfsd_ssc_lock); + mntput(ni->nsui_vfsmount); + kfree(ni); + spin_lock(&nn->nfsd_ssc_lock); + } + spin_unlock(&nn->nfsd_ssc_lock); +} + +static void nfsd4_ssc_expire_umount(struct nfsd_net *nn) +{ + bool do_wakeup = false; + struct nfsd4_ssc_umount_item *ni = 0; + struct nfsd4_ssc_umount_item *tmp; + + spin_lock(&nn->nfsd_ssc_lock); + list_for_each_entry_safe(ni, tmp, &nn->nfsd_ssc_mount_list, nsui_list) { + if (time_after(jiffies, ni->nsui_expire)) { + if (refcount_read(&ni->nsui_refcnt) > 1) + continue; + + /* mark being unmount */ + ni->nsui_busy = true; + spin_unlock(&nn->nfsd_ssc_lock); + mntput(ni->nsui_vfsmount); + spin_lock(&nn->nfsd_ssc_lock); + + /* waiters need to start from begin of list */ + list_del(&ni->nsui_list); + kfree(ni); + + /* wakeup ssc_connect waiters */ + do_wakeup = true; + continue; + } + break; + } + if (do_wakeup) + wake_up_all(&nn->nfsd_ssc_waitq); + spin_unlock(&nn->nfsd_ssc_lock); +} +#endif + static time64_t nfs4_laundromat(struct nfsd_net *nn) { @@ -5589,6 +5653,10 @@ nfs4_laundromat(struct nfsd_net *nn) list_del_init(&nbl->nbl_lru); free_blocked_lock(nbl); } +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + /* service the server-to-server copy delayed unmount list */ + nfsd4_ssc_expire_umount(nn); +#endif out: return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT); } @@ -7506,6 +7574,9 @@ nfs4_state_shutdown_net(struct net *net) nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_shutdown_umount(nn); +#endif } void diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 14dbfa75059d..9664303afdaf 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -484,6 +484,10 @@ static inline bool nfsd_attrs_supported(u32 minorversion, const u32 *bmval) extern int nfsd4_is_junction(struct dentry *dentry); extern int register_cld_notifier(void); extern void unregister_cld_notifier(void); +#ifdef CONFIG_NFSD_V4_2_INTER_SSC +extern void nfsd4_ssc_init_umount_work(struct nfsd_net *nn); +#endif + #else /* CONFIG_NFSD_V4 */ static inline int nfsd4_is_junction(struct dentry *dentry) { diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index dd5d69921676..ccb59e91011b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -403,6 +403,9 @@ static int nfsd_startup_net(struct net *net, const struct cred *cred) if (ret) goto out_filecache; +#ifdef CONFIG_NFSD_V4_2_INTER_SSC + nfsd4_ssc_init_umount_work(nn); +#endif nn->nfsd_net_up = true; return 0; diff --git a/include/linux/nfs_ssc.h b/include/linux/nfs_ssc.h index f5ba0fbff72f..222ae8883e85 100644 --- a/include/linux/nfs_ssc.h +++ b/include/linux/nfs_ssc.h @@ -8,6 +8,7 @@ */ #include <linux/nfs_fs.h> +#include <linux/sunrpc/svc.h> extern struct nfs_ssc_client_ops_tbl nfs_ssc_client_tbl; @@ -52,6 +53,19 @@ static inline void nfs42_ssc_close(struct file *filep) if (nfs_ssc_client_tbl.ssc_nfs4_ops) (*nfs_ssc_client_tbl.ssc_nfs4_ops->sco_close)(filep); } + +struct nfsd4_ssc_umount_item { + struct list_head nsui_list; + bool nsui_busy; + /* + * nsui_refcnt inited to 2, 1 on list and 1 for consumer. Entry + * is removed when refcnt drops to 1 and nsui_expire expires. + */ + refcount_t nsui_refcnt; + unsigned long nsui_expire; + struct vfsmount *nsui_vfsmount; + char nsui_ipaddr[RPC_MAX_ADDRBUFLEN]; +}; #endif /* |