diff options
Diffstat (limited to 'net/core/sock.c')
-rw-r--r-- | net/core/sock.c | 140 |
1 files changed, 78 insertions, 62 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 02f8dfe320b7..9e5b71fda6ec 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -89,6 +89,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> @@ -111,11 +113,11 @@ #include <linux/init.h> #include <linux/highmem.h> #include <linux/user_namespace.h> -#include <linux/jump_label.h> +#include <linux/static_key.h> #include <linux/memcontrol.h> +#include <linux/prefetch.h> #include <asm/uaccess.h> -#include <asm/system.h> #include <linux/netdevice.h> #include <net/protocol.h> @@ -141,7 +143,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM -int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) +int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; int ret = 0; @@ -149,7 +151,7 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) mutex_lock(&proto_list_mutex); list_for_each_entry(proto, &proto_list, node) { if (proto->init_cgroup) { - ret = proto->init_cgroup(cgrp, ss); + ret = proto->init_cgroup(memcg, ss); if (ret) goto out; } @@ -160,19 +162,19 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) out: list_for_each_entry_continue_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(memcg); mutex_unlock(&proto_list_mutex); return ret; } -void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) +void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg) { struct proto *proto; mutex_lock(&proto_list_mutex); list_for_each_entry_reverse(proto, &proto_list, node) if (proto->destroy_cgroup) - proto->destroy_cgroup(cgrp, ss); + proto->destroy_cgroup(memcg); mutex_unlock(&proto_list_mutex); } #endif @@ -184,7 +186,7 @@ void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss) static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -struct jump_label_key memcg_socket_limit_enabled; +struct static_key memcg_socket_limit_enabled; EXPORT_SYMBOL(memcg_socket_limit_enabled); /* @@ -259,7 +261,9 @@ static struct lock_class_key af_callback_keys[AF_MAX]; /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; +EXPORT_SYMBOL(sysctl_wmem_max); __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; +EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; @@ -295,9 +299,8 @@ static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) *timeo_p = 0; if (warned < 10 && net_ratelimit()) { warned++; - printk(KERN_INFO "sock_set_timeout: `%s' (pid %d) " - "tries to set negative timeout\n", - current->comm, task_pid_nr(current)); + pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", + __func__, current->comm, task_pid_nr(current)); } return 0; } @@ -315,8 +318,8 @@ static void sock_warn_obsolete_bsdism(const char *name) static char warncomm[TASK_COMM_LEN]; if (strcmp(warncomm, current->comm) && warned < 5) { strcpy(warncomm, current->comm); - printk(KERN_WARNING "process `%s' is using obsolete " - "%s SO_BSDCOMPAT\n", warncomm, name); + pr_warn("process `%s' is using obsolete %s SO_BSDCOMPAT\n", + warncomm, name); warned++; } } @@ -390,7 +393,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) skb->dev = NULL; - if (sk_rcvqueues_full(sk, skb)) { + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); goto discard_and_relse; } @@ -407,7 +410,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog(sk, skb)) { + } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; @@ -562,7 +565,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: - sk->sk_reuse = valbool; + sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; case SO_TYPE: case SO_PROTOCOL: @@ -578,23 +581,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_SNDBUF: /* Don't error on this BSD doesn't and if you think - about it this is right. Otherwise apps have to - play 'guess the biggest size' games. RCVBUF/SNDBUF - are treated in BSD as hints */ - - if (val > sysctl_wmem_max) - val = sysctl_wmem_max; + * about it this is right. Otherwise apps have to + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ + val = min_t(u32, val, sysctl_wmem_max); set_sndbuf: sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - if ((val * 2) < SOCK_MIN_SNDBUF) - sk->sk_sndbuf = SOCK_MIN_SNDBUF; - else - sk->sk_sndbuf = val * 2; - - /* - * Wake up sending tasks if we - * upped the value. - */ + sk->sk_sndbuf = max_t(u32, val * 2, SOCK_MIN_SNDBUF); + /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; @@ -607,12 +602,11 @@ set_sndbuf: case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think - about it this is right. Otherwise apps have to - play 'guess the biggest size' games. RCVBUF/SNDBUF - are treated in BSD as hints */ - - if (val > sysctl_rmem_max) - val = sysctl_rmem_max; + * about it this is right. Otherwise apps have to + * play 'guess the biggest size' games. RCVBUF/SNDBUF + * are treated in BSD as hints + */ + val = min_t(u32, val, sysctl_rmem_max); set_rcvbuf: sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* @@ -630,10 +624,7 @@ set_rcvbuf: * returning the value we actually used in getsockopt * is the most desirable behavior. */ - if ((val * 2) < SOCK_MIN_RCVBUF) - sk->sk_rcvbuf = SOCK_MIN_RCVBUF; - else - sk->sk_rcvbuf = val * 2; + sk->sk_rcvbuf = max_t(u32, val * 2, SOCK_MIN_RCVBUF); break; case SO_RCVBUFFORCE: @@ -793,6 +784,17 @@ set_rcvbuf: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; + case SO_PEEK_OFF: + if (sock->ops->set_peek_off) + sock->ops->set_peek_off(sk, val); + else + ret = -EOPNOTSUPP; + break; + + case SO_NOFCS: + sock_valbool_flag(sk, SOCK_NOFCS, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -811,8 +813,8 @@ void cred_to_ucred(struct pid *pid, const struct cred *cred, if (cred) { struct user_namespace *current_ns = current_user_ns(); - ucred->uid = user_ns_map_uid(current_ns, cred, cred->euid); - ucred->gid = user_ns_map_gid(current_ns, cred, cred->egid); + ucred->uid = from_kuid(current_ns, cred->euid); + ucred->gid = from_kgid(current_ns, cred->egid); } } EXPORT_SYMBOL_GPL(cred_to_ucred); @@ -848,7 +850,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_BROADCAST: - v.val = !!sock_flag(sk, SOCK_BROADCAST); + v.val = sock_flag(sk, SOCK_BROADCAST); break; case SO_SNDBUF: @@ -864,7 +866,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_KEEPALIVE: - v.val = !!sock_flag(sk, SOCK_KEEPOPEN); + v.val = sock_flag(sk, SOCK_KEEPOPEN); break; case SO_TYPE: @@ -886,7 +888,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_OOBINLINE: - v.val = !!sock_flag(sk, SOCK_URGINLINE); + v.val = sock_flag(sk, SOCK_URGINLINE); break; case SO_NO_CHECK: @@ -899,7 +901,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_LINGER: lv = sizeof(v.ling); - v.ling.l_onoff = !!sock_flag(sk, SOCK_LINGER); + v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); v.ling.l_linger = sk->sk_lingertime / HZ; break; @@ -965,7 +967,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSCRED: - v.val = test_bit(SOCK_PASSCRED, &sock->flags) ? 1 : 0; + v.val = !!test_bit(SOCK_PASSCRED, &sock->flags); break; case SO_PEERCRED: @@ -1000,7 +1002,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_PASSSEC: - v.val = test_bit(SOCK_PASSSEC, &sock->flags) ? 1 : 0; + v.val = !!test_bit(SOCK_PASSSEC, &sock->flags); break; case SO_PEERSEC: @@ -1011,13 +1013,22 @@ int sock_getsockopt(struct socket *sock, int level, int optname, break; case SO_RXQ_OVFL: - v.val = !!sock_flag(sk, SOCK_RXQ_OVFL); + v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; case SO_WIFI_STATUS: - v.val = !!sock_flag(sk, SOCK_WIFI_STATUS); + v.val = sock_flag(sk, SOCK_WIFI_STATUS); break; + case SO_PEEK_OFF: + if (!sock->ops->set_peek_off) + return -EOPNOTSUPP; + + v.val = sk->sk_peek_off; + break; + case SO_NOFCS: + v.val = sock_flag(sk, SOCK_NOFCS); + break; default: return -ENOPROTOOPT; } @@ -1228,8 +1239,8 @@ static void __sk_free(struct sock *sk) sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); if (atomic_read(&sk->sk_omem_alloc)) - printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", - __func__, atomic_read(&sk->sk_omem_alloc)); + pr_debug("%s: optmem leakage (%d bytes) detected\n", + __func__, atomic_read(&sk->sk_omem_alloc)); if (sk->sk_peer_cred) put_cred(sk->sk_peer_cred); @@ -1515,7 +1526,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { - if ((unsigned)size <= sysctl_optmem_max && + if ((unsigned int)size <= sysctl_optmem_max && atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { void *mem; /* First do the add, to avoid the race if kmalloc @@ -1581,6 +1592,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, gfp_t gfp_mask; long timeo; int err; + int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; + + err = -EMSGSIZE; + if (npages > MAX_SKB_FRAGS) + goto failure; gfp_mask = sk->sk_allocation; if (gfp_mask & __GFP_WAIT) @@ -1599,14 +1615,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { skb = alloc_skb(header_len, gfp_mask); if (skb) { - int npages; int i; /* No pages, we're done... */ if (!data_len) break; - npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT; skb->truesize += data_len; skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { @@ -1693,6 +1707,7 @@ static void __release_sock(struct sock *sk) do { struct sk_buff *next = skb->next; + prefetch(next); WARN_ON_ONCE(skb_dst_is_noref(skb)); skb->next = NULL; sk_backlog_rcv(sk, skb); @@ -2092,6 +2107,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndmsg_page = NULL; sk->sk_sndmsg_off = 0; + sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; @@ -2412,7 +2428,7 @@ static void assign_proto_idx(struct proto *prot) prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { - printk(KERN_ERR "PROTO_INUSE_NR exhausted\n"); + pr_err("PROTO_INUSE_NR exhausted\n"); return; } @@ -2442,8 +2458,8 @@ int proto_register(struct proto *prot, int alloc_slab) NULL); if (prot->slab == NULL) { - printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", - prot->name); + pr_crit("%s: Can't create sock SLAB cache!\n", + prot->name); goto out; } @@ -2457,8 +2473,8 @@ int proto_register(struct proto *prot, int alloc_slab) SLAB_HWCACHE_ALIGN, NULL); if (prot->rsk_prot->slab == NULL) { - printk(KERN_CRIT "%s: Can't create request sock SLAB cache!\n", - prot->name); + pr_crit("%s: Can't create request sock SLAB cache!\n", + prot->name); goto out_free_request_sock_slab_name; } } @@ -2556,7 +2572,7 @@ static char proto_method_implemented(const void *method) } static long sock_prot_memory_allocated(struct proto *proto) { - return proto->memory_allocated != NULL ? proto_memory_allocated(proto): -1L; + return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; } static char *sock_prot_memory_pressure(struct proto *proto) |