diff options
Diffstat (limited to 'net/sched')
31 files changed, 1154 insertions, 687 deletions
diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0566e4606a4a..f32bcb094915 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -231,7 +231,7 @@ override: if (ret != ACT_P_CREATED) return ret; - police->tcfp_t_c = ktime_to_ns(ktime_get()); + police->tcfp_t_c = ktime_get_ns(); police->tcf_index = parm->index ? parm->index : tcf_hash_new_index(hinfo); h = tcf_hash(police->tcf_index, POL_TAB_MASK); @@ -279,7 +279,7 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcfp_result; } - now = ktime_to_ns(ktime_get()); + now = ktime_get_ns(); toks = min_t(s64, now - police->tcfp_t_c, police->tcfp_burst); if (police->peak_present) { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c28b0d327b12..e547efdaba9d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -117,7 +117,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; - spinlock_t *root_lock; struct tcmsg *t; u32 protocol; u32 prio; @@ -125,7 +124,8 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n) u32 parent; struct net_device *dev; struct Qdisc *q; - struct tcf_proto **back, **chain; + struct tcf_proto __rcu **back; + struct tcf_proto __rcu **chain; struct tcf_proto *tp; const struct tcf_proto_ops *tp_ops; const struct Qdisc_class_ops *cops; @@ -197,7 +197,9 @@ replay: goto errout; /* Check the chain for existence of proto-tcf with this priority */ - for (back = chain; (tp = *back) != NULL; back = &tp->next) { + for (back = chain; + (tp = rtnl_dereference(*back)) != NULL; + back = &tp->next) { if (tp->prio >= prio) { if (tp->prio == prio) { if (!nprio || @@ -209,8 +211,6 @@ replay: } } - root_lock = qdisc_root_sleeping_lock(q); - if (tp == NULL) { /* Proto-tcf does not exist, create new one */ @@ -259,7 +259,8 @@ replay: } tp->ops = tp_ops; tp->protocol = protocol; - tp->prio = nprio ? : TC_H_MAJ(tcf_auto_prio(*back)); + tp->prio = nprio ? : + TC_H_MAJ(tcf_auto_prio(rtnl_dereference(*back))); tp->q = q; tp->classify = tp_ops->classify; tp->classid = parent; @@ -280,9 +281,9 @@ replay: if (fh == 0) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { - spin_lock_bh(root_lock); - *back = tp->next; - spin_unlock_bh(root_lock); + struct tcf_proto *next = rtnl_dereference(tp->next); + + RCU_INIT_POINTER(*back, next); tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); tcf_destroy(tp); @@ -322,10 +323,8 @@ replay: n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE); if (err == 0) { if (tp_created) { - spin_lock_bh(root_lock); - tp->next = *back; - *back = tp; - spin_unlock_bh(root_lock); + RCU_INIT_POINTER(tp->next, rtnl_dereference(*back)); + rcu_assign_pointer(*back, tp); } tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); } else { @@ -420,7 +419,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) int s_t; struct net_device *dev; struct Qdisc *q; - struct tcf_proto *tp, **chain; + struct tcf_proto *tp, __rcu **chain; struct tcmsg *tcm = nlmsg_data(cb->nlh); unsigned long cl = 0; const struct Qdisc_class_ops *cops; @@ -454,7 +453,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; - for (tp = *chain, t = 0; tp; tp = tp->next, t++) { + for (tp = rtnl_dereference(*chain), t = 0; + tp; tp = rtnl_dereference(tp->next), t++) { if (t < s_t) continue; if (TC_H_MAJ(tcm->tcm_info) && diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0ae1813e3e90..1937298d6775 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -24,6 +24,7 @@ struct basic_head { u32 hgenerator; struct list_head flist; + struct rcu_head rcu; }; struct basic_filter { @@ -31,17 +32,19 @@ struct basic_filter { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_result res; + struct tcf_proto *tp; struct list_head link; + struct rcu_head rcu; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; - struct basic_head *head = tp->root; + struct basic_head *head = rcu_dereference_bh(tp->root); struct basic_filter *f; - list_for_each_entry(f, &head->flist, link) { + list_for_each_entry_rcu(f, &head->flist, link) { if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; *res = f->res; @@ -56,7 +59,7 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long basic_get(struct tcf_proto *tp, u32 handle) { unsigned long l = 0UL; - struct basic_head *head = tp->root; + struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; if (head == NULL) @@ -81,12 +84,15 @@ static int basic_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); - tp->root = head; + rcu_assign_pointer(tp->root, head); return 0; } -static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f) +static void basic_delete_filter(struct rcu_head *head) { + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + struct tcf_proto *tp = f->tp; + tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); tcf_em_tree_destroy(tp, &f->ematches); @@ -95,27 +101,26 @@ static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f) static void basic_destroy(struct tcf_proto *tp) { - struct basic_head *head = tp->root; + struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { - list_del(&f->link); - basic_delete_filter(tp, f); + list_del_rcu(&f->link); + call_rcu(&f->rcu, basic_delete_filter); } - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } static int basic_delete(struct tcf_proto *tp, unsigned long arg) { - struct basic_head *head = tp->root; + struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *t, *f = (struct basic_filter *) arg; list_for_each_entry(t, &head->flist, link) if (t == f) { - tcf_tree_lock(tp); - list_del(&t->link); - tcf_tree_unlock(tp); - basic_delete_filter(tp, t); + list_del_rcu(&t->link); + call_rcu(&t->rcu, basic_delete_filter); return 0; } @@ -152,6 +157,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &f->exts, &e); tcf_em_tree_change(tp, &f->ematches, &t); + f->tp = tp; return 0; errout: @@ -164,9 +170,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg, bool ovr) { int err; - struct basic_head *head = tp->root; + struct basic_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_BASIC_MAX + 1]; - struct basic_filter *f = (struct basic_filter *) *arg; + struct basic_filter *fold = (struct basic_filter *) *arg; + struct basic_filter *fnew; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; @@ -176,22 +183,23 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - if (f != NULL) { - if (handle && f->handle != handle) + if (fold != NULL) { + if (handle && fold->handle != handle) return -EINVAL; - return basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); } err = -ENOBUFS; - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (f == NULL) + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (fnew == NULL) goto errout; - tcf_exts_init(&f->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); err = -EINVAL; - if (handle) - f->handle = handle; - else { + if (handle) { + fnew->handle = handle; + } else if (fold) { + fnew->handle = fold->handle; + } else { unsigned int i = 0x80000000; do { if (++head->hgenerator == 0x7FFFFFFF) @@ -203,29 +211,31 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, goto errout; } - f->handle = head->hgenerator; + fnew->handle = head->hgenerator; } - err = basic_set_parms(net, tp, f, base, tb, tca[TCA_RATE], ovr); + err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); if (err < 0) goto errout; - tcf_tree_lock(tp); - list_add(&f->link, &head->flist); - tcf_tree_unlock(tp); - *arg = (unsigned long) f; + *arg = (unsigned long)fnew; + + if (fold) { + list_replace_rcu(&fold->link, &fnew->link); + call_rcu(&fold->rcu, basic_delete_filter); + } else { + list_add_rcu(&fnew->link, &head->flist); + } return 0; errout: - if (*arg == 0UL && f) - kfree(f); - + kfree(fnew); return err; } static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct basic_head *head = tp->root; + struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 0e30d58149da..4e3f5bfc0b26 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); struct cls_bpf_head { struct list_head plist; u32 hgen; + struct rcu_head rcu; }; struct cls_bpf_prog { @@ -37,6 +38,8 @@ struct cls_bpf_prog { struct list_head link; u32 handle; u16 bpf_len; + struct tcf_proto *tp; + struct rcu_head rcu; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -49,11 +52,11 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct cls_bpf_head *head = tp->root; + struct cls_bpf_head *head = rcu_dereference_bh(tp->root); struct cls_bpf_prog *prog; int ret; - list_for_each_entry(prog, &head->plist, link) { + list_for_each_entry_rcu(prog, &head->plist, link) { int filter_res = BPF_PROG_RUN(prog->filter, skb); if (filter_res == 0) @@ -81,8 +84,8 @@ static int cls_bpf_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; - INIT_LIST_HEAD(&head->plist); - tp->root = head; + INIT_LIST_HEAD_RCU(&head->plist); + rcu_assign_pointer(tp->root, head); return 0; } @@ -98,18 +101,22 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog) kfree(prog); } +static void __cls_bpf_delete_prog(struct rcu_head *rcu) +{ + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + cls_bpf_delete_prog(prog->tp, prog); +} + static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { - struct cls_bpf_head *head = tp->root; + struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *todel = (struct cls_bpf_prog *) arg; list_for_each_entry(prog, &head->plist, link) { if (prog == todel) { - tcf_tree_lock(tp); - list_del(&prog->link); - tcf_tree_unlock(tp); - - cls_bpf_delete_prog(tp, prog); + list_del_rcu(&prog->link); + call_rcu(&prog->rcu, __cls_bpf_delete_prog); return 0; } } @@ -119,27 +126,28 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) static void cls_bpf_destroy(struct tcf_proto *tp) { - struct cls_bpf_head *head = tp->root; + struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog, *tmp; list_for_each_entry_safe(prog, tmp, &head->plist, link) { - list_del(&prog->link); - cls_bpf_delete_prog(tp, prog); + list_del_rcu(&prog->link); + call_rcu(&prog->rcu, __cls_bpf_delete_prog); } - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) { - struct cls_bpf_head *head = tp->root; + struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; unsigned long ret = 0UL; if (head == NULL) return 0UL; - list_for_each_entry(prog, &head->plist, link) { + list_for_each_entry_rcu(prog, &head->plist, link) { if (prog->handle == handle) { ret = (unsigned long) prog; break; @@ -158,10 +166,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct sock_filter *bpf_ops, *bpf_old; + struct sock_filter *bpf_ops; struct tcf_exts exts; struct sock_fprog_kern tmp; - struct bpf_prog *fp, *fp_old; + struct bpf_prog *fp; u16 bpf_size, bpf_len; u32 classid; int ret; @@ -197,26 +205,15 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if (ret) goto errout_free; - tcf_tree_lock(tp); - fp_old = prog->filter; - bpf_old = prog->bpf_ops; - prog->bpf_len = bpf_len; prog->bpf_ops = bpf_ops; prog->filter = fp; prog->res.classid = classid; - tcf_tree_unlock(tp); tcf_bind_filter(tp, &prog->res, base); tcf_exts_change(tp, &prog->exts, &exts); - if (fp_old) - bpf_prog_destroy(fp_old); - if (bpf_old) - kfree(bpf_old); - return 0; - errout_free: kfree(bpf_ops); errout: @@ -244,9 +241,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, u32 handle, struct nlattr **tca, unsigned long *arg, bool ovr) { - struct cls_bpf_head *head = tp->root; - struct cls_bpf_prog *prog = (struct cls_bpf_prog *) *arg; + struct cls_bpf_head *head = rtnl_dereference(tp->root); + struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; + struct cls_bpf_prog *prog; int ret; if (tca[TCA_OPTIONS] == NULL) @@ -256,18 +254,19 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) return ret; - if (prog != NULL) { - if (handle && prog->handle != handle) - return -EINVAL; - return cls_bpf_modify_existing(net, tp, prog, base, tb, - tca[TCA_RATE], ovr); - } - prog = kzalloc(sizeof(*prog), GFP_KERNEL); - if (prog == NULL) + if (!prog) return -ENOBUFS; tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + + if (oldprog) { + if (handle && oldprog->handle != handle) { + ret = -EINVAL; + goto errout; + } + } + if (handle == 0) prog->handle = cls_bpf_grab_new_handle(tp, head); else @@ -281,16 +280,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (ret < 0) goto errout; - tcf_tree_lock(tp); - list_add(&prog->link, &head->plist); - tcf_tree_unlock(tp); + if (oldprog) { + list_replace_rcu(&prog->link, &oldprog->link); + call_rcu(&oldprog->rcu, __cls_bpf_delete_prog); + } else { + list_add_rcu(&prog->link, &head->plist); + } *arg = (unsigned long) prog; - return 0; errout: - if (*arg == 0UL && prog) - kfree(prog); + kfree(prog); return ret; } @@ -339,10 +339,10 @@ nla_put_failure: static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct cls_bpf_head *head = tp->root; + struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; - list_for_each_entry(prog, &head->plist, link) { + list_for_each_entry_rcu(prog, &head->plist, link) { if (arg->count < arg->skip) goto skip; if (arg->fn(tp, (unsigned long) prog, arg) < 0) { diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index cacf01bd04f0..15c34d4ccd9e 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -22,17 +22,17 @@ struct cls_cgroup_head { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; + struct tcf_proto *tp; + struct rcu_head rcu; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct cls_cgroup_head *head = tp->root; + struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid; - rcu_read_lock(); classid = task_cls_state(current)->classid; - rcu_read_unlock(); /* * Due to the nature of the classifier it is required to ignore all @@ -80,13 +80,25 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void cls_cgroup_destroy_rcu(struct rcu_head *root) +{ + struct cls_cgroup_head *head = container_of(root, + struct cls_cgroup_head, + rcu); + + tcf_exts_destroy(head->tp, &head->exts); + tcf_em_tree_destroy(head->tp, &head->ematches); + kfree(head); +} + static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; - struct cls_cgroup_head *head = tp->root; + struct cls_cgroup_head *head = rtnl_dereference(tp->root); + struct cls_cgroup_head *new; struct tcf_ematch_tree t; struct tcf_exts e; int err; @@ -94,53 +106,60 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (!tca[TCA_OPTIONS]) return -EINVAL; - if (head == NULL) { - if (!handle) - return -EINVAL; - - head = kzalloc(sizeof(*head), GFP_KERNEL); - if (head == NULL) - return -ENOBUFS; + if (!head && !handle) + return -EINVAL; - tcf_exts_init(&head->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - head->handle = handle; + if (head && handle != head->handle) + return -ENOENT; - tcf_tree_lock(tp); - tp->root = head; - tcf_tree_unlock(tp); - } + new = kzalloc(sizeof(*head), GFP_KERNEL); + if (!new) + return -ENOBUFS; - if (handle != head->handle) - return -ENOENT; + tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + if (head) + new->handle = head->handle; + else + new->handle = handle; + new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], cgroup_policy); if (err < 0) - return err; + goto errout; tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) - return err; + goto errout; err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); - if (err < 0) - return err; + if (err < 0) { + tcf_exts_destroy(tp, &e); + goto errout; + } - tcf_exts_change(tp, &head->exts, &e); - tcf_em_tree_change(tp, &head->ematches, &t); + tcf_exts_change(tp, &new->exts, &e); + tcf_em_tree_change(tp, &new->ematches, &t); + rcu_assign_pointer(tp->root, new); + if (head) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); return 0; +errout: + kfree(new); + return err; } static void cls_cgroup_destroy(struct tcf_proto *tp) { - struct cls_cgroup_head *head = tp->root; + struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (head) { tcf_exts_destroy(tp, &head->exts); tcf_em_tree_destroy(tp, &head->ematches); - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } } @@ -151,7 +170,7 @@ static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg) static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct cls_cgroup_head *head = tp->root; + struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; @@ -167,7 +186,7 @@ skip: static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_cgroup_head *head = tp->root; + struct cls_cgroup_head *head = rtnl_dereference(tp->root); unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 35be16f7c192..95736fa479f3 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -34,12 +34,14 @@ struct flow_head { struct list_head filters; + struct rcu_head rcu; }; struct flow_filter { struct list_head list; struct tcf_exts exts; struct tcf_ematch_tree ematches; + struct tcf_proto *tp; struct timer_list perturb_timer; u32 perturb_period; u32 handle; @@ -54,6 +56,7 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; + struct rcu_head rcu; }; static inline u32 addr_fold(void *addr) @@ -276,14 +279,14 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct flow_head *head = tp->root; + struct flow_head *head = rcu_dereference_bh(tp->root); struct flow_filter *f; u32 keymask; u32 classid; unsigned int n, key; int r; - list_for_each_entry(f, &head->filters, list) { + list_for_each_entry_rcu(f, &head->filters, list) { u32 keys[FLOW_KEY_MAX + 1]; struct flow_keys flow_keys; @@ -346,13 +349,23 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + del_timer_sync(&f->perturb_timer); + tcf_exts_destroy(f->tp, &f->exts); + tcf_em_tree_destroy(f->tp, &f->ematches); + kfree(f); +} + static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, unsigned long *arg, bool ovr) { - struct flow_head *head = tp->root; - struct flow_filter *f; + struct flow_head *head = rtnl_dereference(tp->root); + struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; struct tcf_exts e; @@ -401,20 +414,42 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto err1; - f = (struct flow_filter *)*arg; - if (f != NULL) { + err = -ENOBUFS; + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + goto err2; + + fold = (struct flow_filter *)*arg; + if (fold) { err = -EINVAL; - if (f->handle != handle && handle) + if (fold->handle != handle && handle) goto err2; - mode = f->mode; + /* Copy fold into fnew */ + fnew->handle = fold->handle; + fnew->keymask = fold->keymask; + fnew->tp = fold->tp; + + fnew->handle = fold->handle; + fnew->nkeys = fold->nkeys; + fnew->keymask = fold->keymask; + fnew->mode = fold->mode; + fnew->mask = fold->mask; + fnew->xor = fold->xor; + fnew->rshift = fold->rshift; + fnew->addend = fold->addend; + fnew->divisor = fold->divisor; + fnew->baseclass = fold->baseclass; + fnew->hashrnd = fold->hashrnd; + + mode = fold->mode; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) goto err2; if (mode == FLOW_MODE_HASH) - perturb_period = f->perturb_period; + perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) goto err2; @@ -444,83 +479,70 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); - err = -ENOBUFS; - f = kzalloc(sizeof(*f), GFP_KERNEL); - if (f == NULL) - goto err2; - - f->handle = handle; - f->mask = ~0U; - tcf_exts_init(&f->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); - - get_random_bytes(&f->hashrnd, 4); - f->perturb_timer.function = flow_perturbation; - f->perturb_timer.data = (unsigned long)f; - init_timer_deferrable(&f->perturb_timer); + fnew->handle = handle; + fnew->mask = ~0U; + fnew->tp = tp; + get_random_bytes(&fnew->hashrnd, 4); + tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); } - tcf_exts_change(tp, &f->exts, &e); - tcf_em_tree_change(tp, &f->ematches, &t); + fnew->perturb_timer.function = flow_perturbation; + fnew->perturb_timer.data = (unsigned long)fnew; + init_timer_deferrable(&fnew->perturb_timer); - tcf_tree_lock(tp); + tcf_exts_change(tp, &fnew->exts, &e); + tcf_em_tree_change(tp, &fnew->ematches, &t); if (tb[TCA_FLOW_KEYS]) { - f->keymask = keymask; - f->nkeys = nkeys; + fnew->keymask = keymask; + fnew->nkeys = nkeys; } - f->mode = mode; + fnew->mode = mode; if (tb[TCA_FLOW_MASK]) - f->mask = nla_get_u32(tb[TCA_FLOW_MASK]); + fnew->mask = nla_get_u32(tb[TCA_FLOW_MASK]); if (tb[TCA_FLOW_XOR]) - f->xor = nla_get_u32(tb[TCA_FLOW_XOR]); + fnew->xor = nla_get_u32(tb[TCA_FLOW_XOR]); if (tb[TCA_FLOW_RSHIFT]) - f->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); + fnew->rshift = nla_get_u32(tb[TCA_FLOW_RSHIFT]); if (tb[TCA_FLOW_ADDEND]) - f->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); + fnew->addend = nla_get_u32(tb[TCA_FLOW_ADDEND]); if (tb[TCA_FLOW_DIVISOR]) - f->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); + fnew->divisor = nla_get_u32(tb[TCA_FLOW_DIVISOR]); if (baseclass) - f->baseclass = baseclass; + fnew->baseclass = baseclass; - f->perturb_period = perturb_period; - del_timer(&f->perturb_timer); + fnew->perturb_period = perturb_period; if (perturb_period) - mod_timer(&f->perturb_timer, jiffies + perturb_period); + mod_timer(&fnew->perturb_timer, jiffies + perturb_period); if (*arg == 0) - list_add_tail(&f->list, &head->filters); + list_add_tail_rcu(&fnew->list, &head->filters); + else + list_replace_rcu(&fnew->list, &fold->list); - tcf_tree_unlock(tp); + *arg = (unsigned long)fnew; - *arg = (unsigned long)f; + if (fold) + call_rcu(&fold->rcu, flow_destroy_filter); return 0; err2: tcf_em_tree_destroy(tp, &t); + kfree(fnew); err1: tcf_exts_destroy(tp, &e); return err; } -static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f) -{ - del_timer_sync(&f->perturb_timer); - tcf_exts_destroy(tp, &f->exts); - tcf_em_tree_destroy(tp, &f->ematches); - kfree(f); -} - static int flow_delete(struct tcf_proto *tp, unsigned long arg) { struct flow_filter *f = (struct flow_filter *)arg; - tcf_tree_lock(tp); - list_del(&f->list); - tcf_tree_unlock(tp); - flow_destroy_filter(tp, f); + list_del_rcu(&f->list); + call_rcu(&f->rcu, flow_destroy_filter); return 0; } @@ -532,28 +554,29 @@ static int flow_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->filters); - tp->root = head; + rcu_assign_pointer(tp->root, head); return 0; } static void flow_destroy(struct tcf_proto *tp) { - struct flow_head *head = tp->root; + struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f, *next; list_for_each_entry_safe(f, next, &head->filters, list) { - list_del(&f->list); - flow_destroy_filter(tp, f); + list_del_rcu(&f->list); + call_rcu(&f->rcu, flow_destroy_filter); } - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } static unsigned long flow_get(struct tcf_proto *tp, u32 handle) { - struct flow_head *head = tp->root; + struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; - list_for_each_entry(f, &head->filters, list) + list_for_each_entry_rcu(f, &head->filters, list) if (f->handle == handle) return (unsigned long)f; return 0; @@ -626,10 +649,10 @@ nla_put_failure: static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct flow_head *head = tp->root; + struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; - list_for_each_entry(f, &head->filters, list) { + list_for_each_entry_rcu(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; if (arg->fn(tp, (unsigned long)f, arg) < 0) { diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 861b03ccfed0..2650285620ee 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -33,17 +33,20 @@ struct fw_head { u32 mask; - struct fw_filter *ht[HTSIZE]; + struct fw_filter __rcu *ht[HTSIZE]; + struct rcu_head rcu; }; struct fw_filter { - struct fw_filter *next; + struct fw_filter __rcu *next; u32 id; struct tcf_result res; #ifdef CONFIG_NET_CLS_IND int ifindex; #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; + struct tcf_proto *tp; + struct rcu_head rcu; }; static u32 fw_hash(u32 handle) @@ -56,14 +59,16 @@ static u32 fw_hash(u32 handle) static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct fw_head *head = tp->root; + struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; int r; u32 id = skb->mark; if (head != NULL) { id &= head->mask; - for (f = head->ht[fw_hash(id)]; f; f = f->next) { + + for (f = rcu_dereference_bh(head->ht[fw_hash(id)]); f; + f = rcu_dereference_bh(f->next)) { if (f->id == id) { *res = f->res; #ifdef CONFIG_NET_CLS_IND @@ -92,13 +97,14 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long fw_get(struct tcf_proto *tp, u32 handle) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; if (head == NULL) return 0; - for (f = head->ht[fw_hash(handle)]; f; f = f->next) { + f = rtnl_dereference(head->ht[fw_hash(handle)]); + for (; f; f = rtnl_dereference(f->next)) { if (f->id == handle) return (unsigned long)f; } @@ -114,8 +120,11 @@ static int fw_init(struct tcf_proto *tp) return 0; } -static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) +static void fw_delete_filter(struct rcu_head *head) { + struct fw_filter *f = container_of(head, struct fw_filter, rcu); + struct tcf_proto *tp = f->tp; + tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); kfree(f); @@ -123,7 +132,7 @@ static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) static void fw_destroy(struct tcf_proto *tp) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; int h; @@ -131,29 +140,33 @@ static void fw_destroy(struct tcf_proto *tp) return; for (h = 0; h < HTSIZE; h++) { - while ((f = head->ht[h]) != NULL) { - head->ht[h] = f->next; - fw_delete_filter(tp, f); + while ((f = rtnl_dereference(head->ht[h])) != NULL) { + RCU_INIT_POINTER(head->ht[h], + rtnl_dereference(f->next)); + call_rcu(&f->rcu, fw_delete_filter); } } - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } static int fw_delete(struct tcf_proto *tp, unsigned long arg) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *)arg; - struct fw_filter **fp; + struct fw_filter __rcu **fp; + struct fw_filter *pfp; if (head == NULL || f == NULL) goto out; - for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { - if (*fp == f) { - tcf_tree_lock(tp); - *fp = f->next; - tcf_tree_unlock(tp); - fw_delete_filter(tp, f); + fp = &head->ht[fw_hash(f->id)]; + + for (pfp = rtnl_dereference(*fp); pfp; + fp = &pfp->next, pfp = rtnl_dereference(*fp)) { + if (pfp == f) { + RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); + call_rcu(&f->rcu, fw_delete_filter); return 0; } } @@ -171,7 +184,7 @@ static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; u32 mask; int err; @@ -220,7 +233,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg, bool ovr) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; @@ -233,10 +246,44 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - if (f != NULL) { + if (f) { + struct fw_filter *pfp, *fnew; + struct fw_filter __rcu **fp; + if (f->id != handle && handle) return -EINVAL; - return fw_change_attrs(net, tp, f, tb, tca, base, ovr); + + fnew = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; + + fnew->id = f->id; + fnew->res = f->res; +#ifdef CONFIG_NET_CLS_IND + fnew->ifindex = f->ifindex; +#endif /* CONFIG_NET_CLS_IND */ + fnew->tp = f->tp; + + tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + + err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); + if (err < 0) { + kfree(fnew); + return err; + } + + fp = &head->ht[fw_hash(fnew->id)]; + for (pfp = rtnl_dereference(*fp); pfp; + fp = &pfp->next, pfp = rtnl_dereference(*fp)) + if (pfp == f) + break; + + RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); + rcu_assign_pointer(*fp, fnew); + call_rcu(&f->rcu, fw_delete_filter); + + *arg = (unsigned long)fnew; + return err; } if (!handle) @@ -252,9 +299,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; head->mask = mask; - tcf_tree_lock(tp); - tp->root = head; - tcf_tree_unlock(tp); + rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL); @@ -263,15 +308,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); f->id = handle; + f->tp = tp; err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; - f->next = head->ht[fw_hash(handle)]; - tcf_tree_lock(tp); - head->ht[fw_hash(handle)] = f; - tcf_tree_unlock(tp); + RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]); + rcu_assign_pointer(head->ht[fw_hash(handle)], f); *arg = (unsigned long)f; return 0; @@ -283,7 +327,7 @@ errout: static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); int h; if (head == NULL) @@ -295,7 +339,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) for (h = 0; h < HTSIZE; h++) { struct fw_filter *f; - for (f = head->ht[h]; f; f = f->next) { + for (f = rtnl_dereference(head->ht[h]); f; + f = rtnl_dereference(f->next)) { if (arg->count < arg->skip) { arg->count++; continue; @@ -312,7 +357,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct fw_head *head = tp->root; + struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *)fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index dd9fc2523c76..ba96deacf27c 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -29,25 +29,26 @@ * are mutually exclusive. * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" */ - struct route4_fastmap { - struct route4_filter *filter; - u32 id; - int iif; + struct route4_filter *filter; + u32 id; + int iif; }; struct route4_head { - struct route4_fastmap fastmap[16]; - struct route4_bucket *table[256 + 1]; + struct route4_fastmap fastmap[16]; + struct route4_bucket __rcu *table[256 + 1]; + struct rcu_head rcu; }; struct route4_bucket { /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ - struct route4_filter *ht[16 + 16 + 1]; + struct route4_filter __rcu *ht[16 + 16 + 1]; + struct rcu_head rcu; }; struct route4_filter { - struct route4_filter *next; + struct route4_filter __rcu *next; u32 id; int iif; @@ -55,6 +56,8 @@ struct route4_filter { struct tcf_exts exts; u32 handle; struct route4_bucket *bkt; + struct tcf_proto *tp; + struct rcu_head rcu; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -64,14 +67,13 @@ static inline int route4_fastmap_hash(u32 id, int iif) return id & 0xF; } +static DEFINE_SPINLOCK(fastmap_lock); static void -route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) +route4_reset_fastmap(struct route4_head *head) { - spinlock_t *root_lock = qdisc_root_sleeping_lock(q); - - spin_lock_bh(root_lock); + spin_lock_bh(&fastmap_lock); memset(head->fastmap, 0, sizeof(head->fastmap)); - spin_unlock_bh(root_lock); + spin_unlock_bh(&fastmap_lock); } static void @@ -80,9 +82,12 @@ route4_set_fastmap(struct route4_head *head, u32 id, int iif, { int h = route4_fastmap_hash(id, iif); + /* fastmap updates must look atomic to aling id, iff, filter */ + spin_lock_bh(&fastmap_lock); head->fastmap[h].id = id; head->fastmap[h].iif = iif; head->fastmap[h].filter = f; + spin_unlock_bh(&fastmap_lock); } static inline int route4_hash_to(u32 id) @@ -123,7 +128,7 @@ static inline int route4_hash_wild(void) static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct route4_head *head = tp->root; + struct route4_head *head = rcu_dereference_bh(tp->root); struct dst_entry *dst; struct route4_bucket *b; struct route4_filter *f; @@ -141,32 +146,43 @@ static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, iif = inet_iif(skb); h = route4_fastmap_hash(id, iif); + + spin_lock(&fastmap_lock); if (id == head->fastmap[h].id && iif == head->fastmap[h].iif && (f = head->fastmap[h].filter) != NULL) { - if (f == ROUTE4_FAILURE) + if (f == ROUTE4_FAILURE) { + spin_unlock(&fastmap_lock); goto failure; + } *res = f->res; + spin_unlock(&fastmap_lock); return 0; } + spin_unlock(&fastmap_lock); h = route4_hash_to(id); restart: - b = head->table[h]; + b = rcu_dereference_bh(head->table[h]); if (b) { - for (f = b->ht[route4_hash_from(id)]; f; f = f->next) + for (f = rcu_dereference_bh(b->ht[route4_hash_from(id)]); + f; + f = rcu_dereference_bh(f->next)) if (f->id == id) ROUTE4_APPLY_RESULT(); - for (f = b->ht[route4_hash_iif(iif)]; f; f = f->next) + for (f = rcu_dereference_bh(b->ht[route4_hash_iif(iif)]); + f; + f = rcu_dereference_bh(f->next)) if (f->iif == iif) ROUTE4_APPLY_RESULT(); - for (f = b->ht[route4_hash_wild()]; f; f = f->next) + for (f = rcu_dereference_bh(b->ht[route4_hash_wild()]); + f; + f = rcu_dereference_bh(f->next)) ROUTE4_APPLY_RESULT(); - } if (h < 256) { h = 256; @@ -213,7 +229,7 @@ static inline u32 from_hash(u32 id) static unsigned long route4_get(struct tcf_proto *tp, u32 handle) { - struct route4_head *head = tp->root; + struct route4_head *head = rtnl_dereference(tp->root); struct route4_bucket *b; struct route4_filter *f; unsigned int h1, h2; @@ -229,9 +245,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) if (h2 > 32) return 0; - b = head->table[h1]; + b = rtnl_dereference(head->table[h1]); if (b) { - for (f = b->ht[h2]; f; f = f->next) + for (f = rtnl_dereference(b->ht[h2]); + f; + f = rtnl_dereference(f->next)) if (f->handle == handle) return (unsigned long)f; } @@ -248,8 +266,11 @@ static int route4_init(struct tcf_proto *tp) } static void -route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) +route4_delete_filter(struct rcu_head *head) { + struct route4_filter *f = container_of(head, struct route4_filter, rcu); + struct tcf_proto *tp = f->tp; + tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); kfree(f); @@ -257,7 +278,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) static void route4_destroy(struct tcf_proto *tp) { - struct route4_head *head = tp->root; + struct route4_head *head = rtnl_dereference(tp->root); int h1, h2; if (head == NULL) @@ -266,28 +287,35 @@ static void route4_destroy(struct tcf_proto *tp) for (h1 = 0; h1 <= 256; h1++) { struct route4_bucket *b; - b = head->table[h1]; + b = rtnl_dereference(head->table[h1]); if (b) { for (h2 = 0; h2 <= 32; h2++) { struct route4_filter *f; - while ((f = b->ht[h2]) != NULL) { - b->ht[h2] = f->next; - route4_delete_filter(tp, f); + while ((f = rtnl_dereference(b->ht[h2])) != NULL) { + struct route4_filter *next; + + next = rtnl_dereference(f->next); + RCU_INIT_POINTER(b->ht[h2], next); + call_rcu(&f->rcu, route4_delete_filter); } } - kfree(b); + RCU_INIT_POINTER(head->table[h1], NULL); + kfree_rcu(b, rcu); } } - kfree(head); + RCU_INIT_POINTER(tp->root, NULL); + kfree_rcu(head, rcu); } static int route4_delete(struct tcf_proto *tp, unsigned long arg) { - struct route4_head *head = tp->root; - struct route4_filter **fp, *f = (struct route4_filter *)arg; - unsigned int h = 0; + struct route4_head *head = rtnl_dereference(tp->root); + struct route4_filter *f = (struct route4_filter *)arg; + struct route4_filter __rcu **fp; + struct route4_filter *nf; struct route4_bucket *b; + unsigned int h = 0; int i; if (!head || !f) @@ -296,27 +324,35 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) h = f->handle; b = f->bkt; - for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) { - if (*fp == f) { - tcf_tree_lock(tp); - *fp = f->next; - tcf_tree_unlock(tp); + fp = &b->ht[from_hash(h >> 16)]; + for (nf = rtnl_dereference(*fp); nf; + fp = &nf->next, nf = rtnl_dereference(*fp)) { + if (nf == f) { + /* unlink it */ + RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); - route4_reset_fastmap(tp->q, head, f->id); - route4_delete_filter(tp, f); + /* Remove any fastmap lookups that might ref filter + * notice we unlink'd the filter so we can't get it + * back in the fastmap. + */ + route4_reset_fastmap(head); - /* Strip tree */ + /* Delete it */ + call_rcu(&f->rcu, route4_delete_filter); - for (i = 0; i <= 32; i++) - if (b->ht[i]) + /* Strip RTNL protected tree */ + for (i = 0; i <= 32; i++) { + struct route4_filter *rt; + + rt = rtnl_dereference(b->ht[i]); + if (rt) return 0; + } /* OK, session has no flows */ - tcf_tree_lock(tp); - head->table[to_hash(h)] = NULL; - tcf_tree_unlock(tp); + RCU_INIT_POINTER(head->table[to_hash(h)], NULL); + kfree_rcu(b, rcu); - kfree(b); return 0; } } @@ -380,26 +416,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, } h1 = to_hash(nhandle); - b = head->table[h1]; + b = rtnl_dereference(head->table[h1]); if (!b) { err = -ENOBUFS; b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) goto errout; - tcf_tree_lock(tp); - head->table[h1] = b; - tcf_tree_unlock(tp); + rcu_assign_pointer(head->table[h1], b); } else { unsigned int h2 = from_hash(nhandle >> 16); err = -EEXIST; - for (fp = b->ht[h2]; fp; fp = fp->next) + for (fp = rtnl_dereference(b->ht[h2]); + fp; + fp = rtnl_dereference(fp->next)) if (fp->handle == f->handle) goto errout; } - tcf_tree_lock(tp); if (tb[TCA_ROUTE4_TO]) f->id = to; @@ -410,7 +445,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, f->handle = nhandle; f->bkt = b; - tcf_tree_unlock(tp); + f->tp = tp; if (tb[TCA_ROUTE4_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_ROUTE4_CLASSID]); @@ -431,14 +466,15 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg, bool ovr) { - struct route4_head *head = tp->root; - struct route4_filter *f, *f1, **fp; + struct route4_head *head = rtnl_dereference(tp->root); + struct route4_filter __rcu **fp; + struct route4_filter *fold, *f1, *pfp, *f = NULL; struct route4_bucket *b; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_ROUTE4_MAX + 1]; unsigned int h, th; - u32 old_handle = 0; int err; + bool new = true; if (opt == NULL) return handle ? -EINVAL : 0; @@ -447,70 +483,70 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - f = (struct route4_filter *)*arg; - if (f) { - if (f->handle != handle && handle) + fold = (struct route4_filter *)*arg; + if (fold && handle && fold->handle != handle) return -EINVAL; - if (f->bkt) - old_handle = f->handle; - - err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 0, ovr); - if (err < 0) - return err; - - goto reinsert; - } - err = -ENOBUFS; if (head == NULL) { head = kzalloc(sizeof(struct route4_head), GFP_KERNEL); if (head == NULL) goto errout; - - tcf_tree_lock(tp); - tp->root = head; - tcf_tree_unlock(tp); + rcu_assign_pointer(tp->root, head); } f = kzalloc(sizeof(struct route4_filter), GFP_KERNEL); - if (f == NULL) + if (!f) goto errout; tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + if (fold) { + f->id = fold->id; + f->iif = fold->iif; + f->res = fold->res; + f->handle = fold->handle; + + f->tp = fold->tp; + f->bkt = fold->bkt; + new = false; + } + err = route4_set_parms(net, tp, base, f, handle, head, tb, - tca[TCA_RATE], 1, ovr); + tca[TCA_RATE], new, ovr); if (err < 0) goto errout; -reinsert: h = from_hash(f->handle >> 16); - for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next) + fp = &f->bkt->ht[h]; + for (pfp = rtnl_dereference(*fp); + (f1 = rtnl_dereference(*fp)) != NULL; + fp = &f1->next) if (f->handle < f1->handle) break; - f->next = f1; - tcf_tree_lock(tp); - *fp = f; + rcu_assign_pointer(f->next, f1); + rcu_assign_pointer(*fp, f); - if (old_handle && f->handle != old_handle) { - th = to_hash(old_handle); - h = from_hash(old_handle >> 16); - b = head->table[th]; + if (fold && fold->handle && f->handle != fold->handle) { + th = to_hash(fold->handle); + h = from_hash(fold->handle >> 16); + b = rtnl_dereference(head->table[th]); if (b) { - for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { - if (*fp == f) { + fp = &b->ht[h]; + for (pfp = rtnl_dereference(*fp); pfp; + fp = &pfp->next, pfp = rtnl_dereference(*fp)) { + if (pfp == f) { *fp = f->next; break; } } } } - tcf_tree_unlock(tp); - route4_reset_fastmap(tp->q, head, f->id); + route4_reset_fastmap(head); *arg = (unsigned long)f; + if (fold) + call_rcu(&fold->rcu, route4_delete_filter); return 0; errout: @@ -520,7 +556,7 @@ errout: static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct route4_head *head = tp->root; + struct route4_head *head = rtnl_dereference(tp->root); unsigned int h, h1; if (head == NULL) @@ -530,13 +566,15 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) return; for (h = 0; h <= 256; h++) { - struct route4_bucket *b = head->table[h]; + struct route4_bucket *b = rtnl_dereference(head->table[h]); if (b) { for (h1 = 0; h1 <= 32; h1++) { struct route4_filter *f; - for (f = b->ht[h1]; f; f = f->next) { + for (f = rtnl_dereference(b->ht[h1]); + f; + f = rtnl_dereference(f->next)) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 1020e233a5d6..b044c208b133 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -70,31 +70,34 @@ struct rsvp_head { u32 tmap[256/32]; u32 hgenerator; u8 tgenerator; - struct rsvp_session *ht[256]; + struct rsvp_session __rcu *ht[256]; + struct rcu_head rcu; }; struct rsvp_session { - struct rsvp_session *next; - __be32 dst[RSVP_DST_LEN]; - struct tc_rsvp_gpi dpi; - u8 protocol; - u8 tunnelid; + struct rsvp_session __rcu *next; + __be32 dst[RSVP_DST_LEN]; + struct tc_rsvp_gpi dpi; + u8 protocol; + u8 tunnelid; /* 16 (src,sport) hash slots, and one wildcard source slot */ - struct rsvp_filter *ht[16 + 1]; + struct rsvp_filter __rcu *ht[16 + 1]; + struct rcu_head rcu; }; struct rsvp_filter { - struct rsvp_filter *next; - __be32 src[RSVP_DST_LEN]; - struct tc_rsvp_gpi spi; - u8 tunnelhdr; + struct rsvp_filter __rcu *next; + __be32 src[RSVP_DST_LEN]; + struct tc_rsvp_gpi spi; + u8 tunnelhdr; - struct tcf_result res; - struct tcf_exts exts; + struct tcf_result res; + struct tcf_exts exts; - u32 handle; - struct rsvp_session *sess; + u32 handle; + struct rsvp_session *sess; + struct rcu_head rcu; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -128,7 +131,7 @@ static inline unsigned int hash_src(__be32 *src) static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; + struct rsvp_head *head = rcu_dereference_bh(tp->root); struct rsvp_session *s; struct rsvp_filter *f; unsigned int h1, h2; @@ -169,7 +172,8 @@ restart: h1 = hash_dst(dst, protocol, tunnelid); h2 = hash_src(src); - for (s = sht[h1]; s; s = s->next) { + for (s = rcu_dereference_bh(head->ht[h1]); s; + s = rcu_dereference_bh(s->next)) { if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && protocol == s->protocol && !(s->dpi.mask & @@ -181,7 +185,8 @@ restart: #endif tunnelid == s->tunnelid) { - for (f = s->ht[h2]; f; f = f->next) { + for (f = rcu_dereference_bh(s->ht[h2]); f; + f = rcu_dereference_bh(f->next)) { if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) #if RSVP_DST_LEN == 4 @@ -205,7 +210,8 @@ matched: } /* And wildcard bucket... */ - for (f = s->ht[16]; f; f = f->next) { + for (f = rcu_dereference_bh(s->ht[16]); f; + f = rcu_dereference_bh(f->next)) { *res = f->res; RSVP_APPLY_RESULT(); goto matched; @@ -218,7 +224,7 @@ matched: static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) { - struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; + struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_session *s; struct rsvp_filter *f; unsigned int h1 = handle & 0xFF; @@ -227,8 +233,10 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) if (h2 > 16) return 0; - for (s = sht[h1]; s; s = s->next) { - for (f = s->ht[h2]; f; f = f->next) { + for (s = rtnl_dereference(head->ht[h1]); s; + s = rtnl_dereference(s->next)) { + for (f = rtnl_dereference(s->ht[h2]); f; + f = rtnl_dereference(f->next)) { if (f->handle == handle) return (unsigned long)f; } @@ -246,7 +254,7 @@ static int rsvp_init(struct tcf_proto *tp) data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL); if (data) { - tp->root = data; + rcu_assign_pointer(tp->root, data); return 0; } return -ENOBUFS; @@ -257,53 +265,54 @@ rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) { tcf_unbind_filter(tp, &f->res); tcf_exts_destroy(tp, &f->exts); - kfree(f); + kfree_rcu(f, rcu); } static void rsvp_destroy(struct tcf_proto *tp) { - struct rsvp_head *data = xchg(&tp->root, NULL); - struct rsvp_session **sht; + struct rsvp_head *data = rtnl_dereference(tp->root); int h1, h2; if (data == NULL) return; - sht = data->ht; + RCU_INIT_POINTER(tp->root, NULL); for (h1 = 0; h1 < 256; h1++) { struct rsvp_session *s; - while ((s = sht[h1]) != NULL) { - sht[h1] = s->next; + while ((s = rtnl_dereference(data->ht[h1])) != NULL) { + RCU_INIT_POINTER(data->ht[h1], s->next); for (h2 = 0; h2 <= 16; h2++) { struct rsvp_filter *f; - while ((f = s->ht[h2]) != NULL) { - s->ht[h2] = f->next; + while ((f = rtnl_dereference(s->ht[h2])) != NULL) { + rcu_assign_pointer(s->ht[h2], f->next); rsvp_delete_filter(tp, f); } } - kfree(s); + kfree_rcu(s, rcu); } } - kfree(data); + kfree_rcu(data, rcu); } static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) { - struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg; + struct rsvp_head *head = rtnl_dereference(tp->root); + struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg; + struct rsvp_filter __rcu **fp; unsigned int h = f->handle; - struct rsvp_session **sp; - struct rsvp_session *s = f->sess; + struct rsvp_session __rcu **sp; + struct rsvp_session *nsp, *s = f->sess; int i; - for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) { - if (*fp == f) { - tcf_tree_lock(tp); - *fp = f->next; - tcf_tree_unlock(tp); + fp = &s->ht[(h >> 8) & 0xFF]; + for (nfp = rtnl_dereference(*fp); nfp; + fp = &nfp->next, nfp = rtnl_dereference(*fp)) { + if (nfp == f) { + RCU_INIT_POINTER(*fp, f->next); rsvp_delete_filter(tp, f); /* Strip tree */ @@ -313,14 +322,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) return 0; /* OK, session has no flows */ - for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF]; - *sp; sp = &(*sp)->next) { - if (*sp == s) { - tcf_tree_lock(tp); - *sp = s->next; - tcf_tree_unlock(tp); - - kfree(s); + sp = &head->ht[h & 0xFF]; + for (nsp = rtnl_dereference(*sp); nsp; + sp = &nsp->next, nsp = rtnl_dereference(*sp)) { + if (nsp == s) { + RCU_INIT_POINTER(*sp, s->next); + kfree_rcu(s, rcu); return 0; } } @@ -333,7 +340,7 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) { - struct rsvp_head *data = tp->root; + struct rsvp_head *data = rtnl_dereference(tp->root); int i = 0xFFFF; while (i-- > 0) { @@ -361,7 +368,7 @@ static int tunnel_bts(struct rsvp_head *data) static void tunnel_recycle(struct rsvp_head *data) { - struct rsvp_session **sht = data->ht; + struct rsvp_session __rcu **sht = data->ht; u32 tmap[256/32]; int h1, h2; @@ -369,11 +376,13 @@ static void tunnel_recycle(struct rsvp_head *data) for (h1 = 0; h1 < 256; h1++) { struct rsvp_session *s; - for (s = sht[h1]; s; s = s->next) { + for (s = rtnl_dereference(sht[h1]); s; + s = rtnl_dereference(s->next)) { for (h2 = 0; h2 <= 16; h2++) { struct rsvp_filter *f; - for (f = s->ht[h2]; f; f = f->next) { + for (f = rtnl_dereference(s->ht[h2]); f; + f = rtnl_dereference(f->next)) { if (f->tunnelhdr == 0) continue; data->tgenerator = f->res.classid; @@ -417,9 +426,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct nlattr **tca, unsigned long *arg, bool ovr) { - struct rsvp_head *data = tp->root; - struct rsvp_filter *f, **fp; - struct rsvp_session *s, **sp; + struct rsvp_head *data = rtnl_dereference(tp->root); + struct rsvp_filter *f, *nfp; + struct rsvp_filter __rcu **fp; + struct rsvp_session *nsp, *s; + struct rsvp_session __rcu **sp; struct tc_rsvp_pinfo *pinfo = NULL; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_RSVP_MAX + 1]; @@ -499,7 +510,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, goto errout; } - for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) { + for (sp = &data->ht[h1]; + (s = rtnl_dereference(*sp)) != NULL; + sp = &s->next) { if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && pinfo && pinfo->protocol == s->protocol && memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && @@ -521,12 +534,16 @@ insert: tcf_exts_change(tp, &f->exts, &e); - for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) - if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask) + fp = &s->ht[h2]; + for (nfp = rtnl_dereference(*fp); nfp; + fp = &nfp->next, nfp = rtnl_dereference(*fp)) { + __u32 mask = nfp->spi.mask & f->spi.mask; + + if (mask != f->spi.mask) break; - f->next = *fp; - wmb(); - *fp = f; + } + RCU_INIT_POINTER(f->next, nfp); + rcu_assign_pointer(*fp, f); *arg = (unsigned long)f; return 0; @@ -546,13 +563,14 @@ insert: s->protocol = pinfo->protocol; s->tunnelid = pinfo->tunnelid; } - for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) { - if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask) + sp = &data->ht[h1]; + for (nsp = rtnl_dereference(*sp); nsp; + sp = &nsp->next, nsp = rtnl_dereference(*sp)) { + if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask) break; } - s->next = *sp; - wmb(); - *sp = s; + RCU_INIT_POINTER(s->next, nsp); + rcu_assign_pointer(*sp, s); goto insert; @@ -565,7 +583,7 @@ errout2: static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) { - struct rsvp_head *head = tp->root; + struct rsvp_head *head = rtnl_dereference(tp->root); unsigned int h, h1; if (arg->stop) @@ -574,11 +592,13 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) for (h = 0; h < 256; h++) { struct rsvp_session *s; - for (s = head->ht[h]; s; s = s->next) { + for (s = rtnl_dereference(head->ht[h]); s; + s = rtnl_dereference(s->next)) { for (h1 = 0; h1 <= 16; h1++) { struct rsvp_filter *f; - for (f = s->ht[h1]; f; f = f->next) { + for (f = rtnl_dereference(s->ht[h1]); f; + f = rtnl_dereference(f->next)) { if (arg->count < arg->skip) { arg->count++; continue; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 3e9f76413b3b..5054fae33a48 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -32,19 +32,21 @@ struct tcindex_filter_result { struct tcindex_filter { u16 key; struct tcindex_filter_result result; - struct tcindex_filter *next; + struct tcindex_filter __rcu *next; + struct rcu_head rcu; }; struct tcindex_data { struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ - struct tcindex_filter **h; /* imperfect hash; only used if !perfect; - NULL if unused */ + struct tcindex_filter __rcu **h; /* imperfect hash; */ + struct tcf_proto *tp; u16 mask; /* AND key with mask */ - int shift; /* shift ANDed key to the right */ - int hash; /* hash table size; 0 if undefined */ - int alloc_hash; /* allocated size */ - int fall_through; /* 0: only classify if explicit match */ + u32 shift; /* shift ANDed key to the right */ + u32 hash; /* hash table size; 0 if undefined */ + u32 alloc_hash; /* allocated size */ + u32 fall_through; /* 0: only classify if explicit match */ + struct rcu_head rcu; }; static inline int @@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r) static struct tcindex_filter_result * tcindex_lookup(struct tcindex_data *p, u16 key) { - struct tcindex_filter *f; + if (p->perfect) { + struct tcindex_filter_result *f = p->perfect + key; + + return tcindex_filter_is_set(f) ? f : NULL; + } else if (p->h) { + struct tcindex_filter __rcu **fp; + struct tcindex_filter *f; - if (p->perfect) - return tcindex_filter_is_set(p->perfect + key) ? - p->perfect + key : NULL; - else if (p->h) { - for (f = p->h[key % p->hash]; f; f = f->next) + fp = &p->h[key % p->hash]; + for (f = rcu_dereference_bh_rtnl(*fp); + f; + fp = &f->next, f = rcu_dereference_bh_rtnl(*fp)) if (f->key == key) return &f->result; } @@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key) static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rcu_dereference_bh(tp->root); struct tcindex_filter_result *f; int key = (skb->tc_index & p->mask) >> p->shift; @@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); @@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp) p->hash = DEFAULT_HASH_SIZE; p->fall_through = 1; - tp->root = p; + rcu_assign_pointer(tp->root, p); return 0; } - static int -__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) +tcindex_delete(struct tcf_proto *tp, unsigned long arg) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f); + pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; } else { int i; - struct tcindex_filter **walk = NULL; - for (i = 0; i < p->hash; i++) - for (walk = p->h+i; *walk; walk = &(*walk)->next) - if (&(*walk)->result == r) + for (i = 0; i < p->hash; i++) { + walk = p->h + i; + for (f = rtnl_dereference(*walk); f; + walk = &f->next, f = rtnl_dereference(*walk)) { + if (&f->result == r) goto found; + } + } return -ENOENT; found: - f = *walk; - if (lock) - tcf_tree_lock(tp); - *walk = f->next; - if (lock) - tcf_tree_unlock(tp); + rcu_assign_pointer(*walk, rtnl_dereference(f->next)); } tcf_unbind_filter(tp, &r->res); tcf_exts_destroy(tp, &r->exts); - kfree(f); + if (f) + kfree_rcu(f, rcu); return 0; } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) +static int tcindex_destroy_element(struct tcf_proto *tp, + unsigned long arg, + struct tcf_walker *walker) +{ + return tcindex_delete(tp, arg); +} + +static void __tcindex_destroy(struct rcu_head *head) { - return __tcindex_delete(tp, arg, 1); + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p->h); + kfree(p); } static inline int @@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r) tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } +static void __tcindex_partial_destroy(struct rcu_head *head) +{ + struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); + + kfree(p->perfect); + kfree(p); +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, @@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data cp; + struct tcindex_data *cp, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ struct tcf_exts e; @@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (err < 0) return err; - memcpy(&cp, p, sizeof(cp)); - tcindex_filter_result_init(&new_filter_result); + /* tcindex_data attributes must look atomic to classifier/lookup so + * allocate new tcindex data and RCU assign it onto root. Keeping + * perfect hash and hash pointers from old data. + */ + cp = kzalloc(sizeof(*cp), GFP_KERNEL); + if (!cp) { + err = -ENOMEM; + goto errout; + } + cp->mask = p->mask; + cp->shift = p->shift; + cp->hash = p->hash; + cp->alloc_hash = p->alloc_hash; + cp->fall_through = p->fall_through; + cp->tp = tp; + + if (p->perfect) { + cp->perfect = kmemdup(p->perfect, + sizeof(*r) * cp->hash, GFP_KERNEL); + if (!cp->perfect) + goto errout; + balloc = 1; + } + cp->h = p->h; + + tcindex_filter_result_init(&new_filter_result); tcindex_filter_result_init(&cr); if (old_r) cr.res = r->res; if (tb[TCA_TCINDEX_HASH]) - cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); if (tb[TCA_TCINDEX_MASK]) - cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); + cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]); if (tb[TCA_TCINDEX_SHIFT]) - cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); + cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]); err = -EBUSY; + /* Hash already allocated, make sure that we still meet the * requirements for the allocated hash. */ - if (cp.perfect) { - if (!valid_perfect_hash(&cp) || - cp.hash > cp.alloc_hash) - goto errout; - } else if (cp.h && cp.hash != cp.alloc_hash) - goto errout; + if (cp->perfect) { + if (!valid_perfect_hash(cp) || + cp->hash > cp->alloc_hash) + goto errout_alloc; + } else if (cp->h && cp->hash != cp->alloc_hash) { + goto errout_alloc; + } err = -EINVAL; if (tb[TCA_TCINDEX_FALL_THROUGH]) - cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); + cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]); - if (!cp.hash) { + if (!cp->hash) { /* Hash not specified, use perfect hash if the upper limit * of the hashing index is below the threshold. */ - if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) - cp.hash = (cp.mask >> cp.shift) + 1; + if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD) + cp->hash = (cp->mask >> cp->shift) + 1; else - cp.hash = DEFAULT_HASH_SIZE; + cp->hash = DEFAULT_HASH_SIZE; } - if (!cp.perfect && !cp.h) - cp.alloc_hash = cp.hash; + if (!cp->perfect && cp->h) + cp->alloc_hash = cp->hash; /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) * but then, we'd fail handles that may become valid after some future * mask change. While this is extremely unlikely to ever matter, * the check below is safer (and also more backwards-compatible). */ - if (cp.perfect || valid_perfect_hash(&cp)) - if (handle >= cp.alloc_hash) - goto errout; + if (cp->perfect || valid_perfect_hash(cp)) + if (handle >= cp->alloc_hash) + goto errout_alloc; err = -ENOMEM; - if (!cp.perfect && !cp.h) { - if (valid_perfect_hash(&cp)) { + if (!cp->perfect && !cp->h) { + if (valid_perfect_hash(cp)) { int i; - cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL); - if (!cp.perfect) - goto errout; - for (i = 0; i < cp.hash; i++) - tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT, + cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); + if (!cp->perfect) + goto errout_alloc; + for (i = 0; i < cp->hash; i++) + tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); balloc = 1; } else { - cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL); - if (!cp.h) - goto errout; + struct tcindex_filter __rcu **hash; + + hash = kcalloc(cp->hash, + sizeof(struct tcindex_filter *), + GFP_KERNEL); + + if (!hash) + goto errout_alloc; + + cp->h = hash; balloc = 2; } } - if (cp.perfect) - r = cp.perfect + handle; + if (cp->perfect) + r = cp->perfect + handle; else - r = tcindex_lookup(&cp, handle) ? : &new_filter_result; + r = tcindex_lookup(cp, handle) ? : &new_filter_result; if (r == &new_filter_result) { f = kzalloc(sizeof(*f), GFP_KERNEL); @@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - tcf_tree_lock(tp); if (old_r && old_r != r) tcindex_filter_result_init(old_r); - memcpy(p, &cp, sizeof(cp)); + oldp = p; r->res = cr.res; + rcu_assign_pointer(tp->root, cp); if (r == &new_filter_result) { - struct tcindex_filter **fp; + struct tcindex_filter *nfp; + struct tcindex_filter __rcu **fp; f->key = handle; f->result = new_filter_result; f->next = NULL; - for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next) - /* nothing */; - *fp = f; + + fp = cp->h + (handle % cp->hash); + for (nfp = rtnl_dereference(*fp); + nfp; + fp = &nfp->next, nfp = rtnl_dereference(*fp)) + ; /* nothing */ + + rcu_assign_pointer(*fp, f); } - tcf_tree_unlock(tp); + if (oldp) + call_rcu(&oldp->rcu, __tcindex_partial_destroy); return 0; errout_alloc: if (balloc == 1) - kfree(cp.perfect); + kfree(cp->perfect); else if (balloc == 2) - kfree(cp.h); + kfree(cp->h); errout: + kfree(cp); tcf_exts_destroy(tp, &e); return err; } @@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; int err; @@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb, tca[TCA_RATE], ovr); } - static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter *f, *next; int i; @@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->h) return; for (i = 0; i < p->hash; i++) { - for (f = p->h[i]; f; f = next) { - next = f->next; + for (f = rtnl_dereference(p->h[i]); f; f = next) { + next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { if (walker->fn(tp, (unsigned long) &f->result, walker) < 0) { @@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) } } - -static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, struct tcf_walker *walker) -{ - return __tcindex_delete(tp, arg, 0); -} - - static void tcindex_destroy(struct tcf_proto *tp) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcf_walker walker; pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); @@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp) walker.skip = 0; walker.fn = tcindex_destroy_element; tcindex_walk(tp, &walker); - kfree(p->perfect); - kfree(p->h); - kfree(p); - tp->root = NULL; + + RCU_INIT_POINTER(tp->root, NULL); + call_rcu(&p->rcu, __tcindex_destroy); } static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct tcindex_data *p = tp->root; + struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; @@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, nla_nest_end(skb, nest); } else { if (p->perfect) { - t->tcm_handle = r-p->perfect; + t->tcm_handle = r - p->perfect; } else { struct tcindex_filter *f; + struct tcindex_filter __rcu **fp; int i; t->tcm_handle = 0; for (i = 0; !t->tcm_handle && i < p->hash; i++) { - for (f = p->h[i]; !t->tcm_handle && f; - f = f->next) { + fp = &p->h[i]; + for (f = rtnl_dereference(*fp); + !t->tcm_handle && f; + fp = &f->next, f = rtnl_dereference(*fp)) { if (&f->result == r) t->tcm_handle = f->key; } diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 70c0be8d0121..ef97a646ee94 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -36,6 +36,7 @@ #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/percpu.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/bitmap.h> @@ -44,40 +45,49 @@ #include <net/pkt_cls.h> struct tc_u_knode { - struct tc_u_knode *next; + struct tc_u_knode __rcu *next; u32 handle; - struct tc_u_hnode *ht_up; + struct tc_u_hnode __rcu *ht_up; struct tcf_exts exts; #ifdef CONFIG_NET_CLS_IND int ifindex; #endif u8 fshift; struct tcf_result res; - struct tc_u_hnode *ht_down; + struct tc_u_hnode __rcu *ht_down; #ifdef CONFIG_CLS_U32_PERF - struct tc_u32_pcnt *pf; + struct tc_u32_pcnt __percpu *pf; #endif #ifdef CONFIG_CLS_U32_MARK - struct tc_u32_mark mark; + u32 val; + u32 mask; + u32 __percpu *pcpu_success; #endif + struct tcf_proto *tp; + struct rcu_head rcu; + /* The 'sel' field MUST be the last field in structure to allow for + * tc_u32_keys allocated at end of structure. + */ struct tc_u32_sel sel; }; struct tc_u_hnode { - struct tc_u_hnode *next; + struct tc_u_hnode __rcu *next; u32 handle; u32 prio; struct tc_u_common *tp_c; int refcnt; unsigned int divisor; - struct tc_u_knode *ht[1]; + struct tc_u_knode __rcu *ht[1]; + struct rcu_head rcu; }; struct tc_u_common { - struct tc_u_hnode *hlist; + struct tc_u_hnode __rcu *hlist; struct Qdisc *q; int refcnt; u32 hgenerator; + struct rcu_head rcu; }; static inline unsigned int u32_hash_fold(__be32 key, @@ -96,7 +106,7 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct unsigned int off; } stack[TC_U32_MAXDEPTH]; - struct tc_u_hnode *ht = tp->root; + struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); unsigned int off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; @@ -108,23 +118,23 @@ static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct int i, r; next_ht: - n = ht->ht[sel]; + n = rcu_dereference_bh(ht->ht[sel]); next_knode: if (n) { struct tc_u32_key *key = n->sel.keys; #ifdef CONFIG_CLS_U32_PERF - n->pf->rcnt += 1; + __this_cpu_inc(n->pf->rcnt); j = 0; #endif #ifdef CONFIG_CLS_U32_MARK - if ((skb->mark & n->mark.mask) != n->mark.val) { - n = n->next; + if ((skb->mark & n->mask) != n->val) { + n = rcu_dereference_bh(n->next); goto next_knode; } else { - n->mark.success++; + __this_cpu_inc(*n->pcpu_success); } #endif @@ -139,37 +149,39 @@ next_knode: if (!data) goto out; if ((*data ^ key->val) & key->mask) { - n = n->next; + n = rcu_dereference_bh(n->next); goto next_knode; } #ifdef CONFIG_CLS_U32_PERF - n->pf->kcnts[j] += 1; + __this_cpu_inc(n->pf->kcnts[j]); j++; #endif } - if (n->ht_down == NULL) { + + ht = rcu_dereference_bh(n->ht_down); + if (!ht) { check_terminal: if (n->sel.flags & TC_U32_TERMINAL) { *res = n->res; #ifdef CONFIG_NET_CLS_IND if (!tcf_match_indev(skb, n->ifindex)) { - n = n->next; + n = rcu_dereference_bh(n->next); goto next_knode; } #endif #ifdef CONFIG_CLS_U32_PERF - n->pf->rhit += 1; + __this_cpu_inc(n->pf->rhit); #endif r = tcf_exts_exec(skb, &n->exts, res); if (r < 0) { - n = n->next; + n = rcu_dereference_bh(n->next); goto next_knode; } return r; } - n = n->next; + n = rcu_dereference_bh(n->next); goto next_knode; } @@ -180,7 +192,7 @@ check_terminal: stack[sdepth].off = off; sdepth++; - ht = n->ht_down; + ht = rcu_dereference_bh(n->ht_down); sel = 0; if (ht->divisor) { __be32 *data, hdata; @@ -222,7 +234,7 @@ check_terminal: /* POP */ if (sdepth--) { n = stack[sdepth].knode; - ht = n->ht_up; + ht = rcu_dereference_bh(n->ht_up); off = stack[sdepth].off; goto check_terminal; } @@ -239,7 +251,9 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; - for (ht = tp_c->hlist; ht; ht = ht->next) + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) if (ht->handle == handle) break; @@ -256,7 +270,9 @@ u32_lookup_key(struct tc_u_hnode *ht, u32 handle) if (sel > ht->divisor) goto out; - for (n = ht->ht[sel]; n; n = n->next) + for (n = rtnl_dereference(ht->ht[sel]); + n; + n = rtnl_dereference(n->next)) if (n->handle == handle) break; out: @@ -270,7 +286,7 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle) struct tc_u_common *tp_c = tp->data; if (TC_U32_HTID(handle) == TC_U32_ROOT) - ht = tp->root; + ht = rtnl_dereference(tp->root); else ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); @@ -291,6 +307,9 @@ static u32 gen_new_htid(struct tc_u_common *tp_c) { int i = 0x800; + /* hgenerator only used inside rtnl lock it is safe to increment + * without read _copy_ update semantics + */ do { if (++tp_c->hgenerator == 0x7FF) tp_c->hgenerator = 1; @@ -326,41 +345,78 @@ static int u32_init(struct tcf_proto *tp) } tp_c->refcnt++; - root_ht->next = tp_c->hlist; - tp_c->hlist = root_ht; + RCU_INIT_POINTER(root_ht->next, tp_c->hlist); + rcu_assign_pointer(tp_c->hlist, root_ht); root_ht->tp_c = tp_c; - tp->root = root_ht; + rcu_assign_pointer(tp->root, root_ht); tp->data = tp_c; return 0; } -static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n) +static int u32_destroy_key(struct tcf_proto *tp, + struct tc_u_knode *n, + bool free_pf) { tcf_unbind_filter(tp, &n->res); tcf_exts_destroy(tp, &n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF - kfree(n->pf); + if (free_pf) + free_percpu(n->pf); +#endif +#ifdef CONFIG_CLS_U32_MARK + if (free_pf) + free_percpu(n->pcpu_success); #endif kfree(n); return 0; } +/* u32_delete_key_rcu should be called when free'ing a copied + * version of a tc_u_knode obtained from u32_init_knode(). When + * copies are obtained from u32_init_knode() the statistics are + * shared between the old and new copies to allow readers to + * continue to update the statistics during the copy. To support + * this the u32_delete_key_rcu variant does not free the percpu + * statistics. + */ +static void u32_delete_key_rcu(struct rcu_head *rcu) +{ + struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); + + u32_destroy_key(key->tp, key, false); +} + +/* u32_delete_key_freepf_rcu is the rcu callback variant + * that free's the entire structure including the statistics + * percpu variables. Only use this if the key is not a copy + * returned by u32_init_knode(). See u32_delete_key_rcu() + * for the variant that should be used with keys return from + * u32_init_knode() + */ +static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) +{ + struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); + + u32_destroy_key(key->tp, key, true); +} + static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) { - struct tc_u_knode **kp; - struct tc_u_hnode *ht = key->ht_up; + struct tc_u_knode __rcu **kp; + struct tc_u_knode *pkp; + struct tc_u_hnode *ht = rtnl_dereference(key->ht_up); if (ht) { - for (kp = &ht->ht[TC_U32_HASH(key->handle)]; *kp; kp = &(*kp)->next) { - if (*kp == key) { - tcf_tree_lock(tp); - *kp = key->next; - tcf_tree_unlock(tp); + kp = &ht->ht[TC_U32_HASH(key->handle)]; + for (pkp = rtnl_dereference(*kp); pkp; + kp = &pkp->next, pkp = rtnl_dereference(*kp)) { + if (pkp == key) { + RCU_INIT_POINTER(*kp, key->next); - u32_destroy_key(tp, key); + call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } } @@ -369,16 +425,16 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) +static void u32_clear_hnode(struct tc_u_hnode *ht) { struct tc_u_knode *n; unsigned int h; for (h = 0; h <= ht->divisor; h++) { - while ((n = ht->ht[h]) != NULL) { - ht->ht[h] = n->next; - - u32_destroy_key(tp, n); + while ((n = rtnl_dereference(ht->ht[h])) != NULL) { + RCU_INIT_POINTER(ht->ht[h], + rtnl_dereference(n->next)); + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); } } } @@ -386,28 +442,31 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode **hn; + struct tc_u_hnode __rcu **hn; + struct tc_u_hnode *phn; WARN_ON(ht->refcnt); - u32_clear_hnode(tp, ht); + u32_clear_hnode(ht); - for (hn = &tp_c->hlist; *hn; hn = &(*hn)->next) { - if (*hn == ht) { - *hn = ht->next; - kfree(ht); + hn = &tp_c->hlist; + for (phn = rtnl_dereference(*hn); + phn; + hn = &phn->next, phn = rtnl_dereference(*hn)) { + if (phn == ht) { + RCU_INIT_POINTER(*hn, ht->next); + kfree_rcu(ht, rcu); return 0; } } - WARN_ON(1); return -ENOENT; } static void u32_destroy(struct tcf_proto *tp) { struct tc_u_common *tp_c = tp->data; - struct tc_u_hnode *root_ht = tp->root; + struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); WARN_ON(root_ht == NULL); @@ -419,17 +478,16 @@ static void u32_destroy(struct tcf_proto *tp) tp->q->u32_node = NULL; - for (ht = tp_c->hlist; ht; ht = ht->next) { + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) { ht->refcnt--; - u32_clear_hnode(tp, ht); + u32_clear_hnode(ht); } - while ((ht = tp_c->hlist) != NULL) { - tp_c->hlist = ht->next; - - WARN_ON(ht->refcnt != 0); - - kfree(ht); + while ((ht = rtnl_dereference(tp_c->hlist)) != NULL) { + RCU_INIT_POINTER(tp_c->hlist, ht->next); + kfree_rcu(ht, rcu); } kfree(tp_c); @@ -441,6 +499,7 @@ static void u32_destroy(struct tcf_proto *tp) static int u32_delete(struct tcf_proto *tp, unsigned long arg) { struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; + struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); if (ht == NULL) return 0; @@ -448,7 +507,7 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) if (TC_U32_KEY(ht->handle)) return u32_delete_key(tp, (struct tc_u_knode *)ht); - if (tp->root == ht) + if (root_ht == ht) return -EINVAL; if (ht->refcnt == 1) { @@ -471,7 +530,9 @@ static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) if (!bitmap) return handle | 0xFFF; - for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) + for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); + n; + n = rtnl_dereference(n->next)) set_bit(TC_U32_NODE(n->handle), bitmap); i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); @@ -521,10 +582,8 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, ht_down->refcnt++; } - tcf_tree_lock(tp); - ht_old = n->ht_down; - n->ht_down = ht_down; - tcf_tree_unlock(tp); + ht_old = rtnl_dereference(n->ht_down); + rcu_assign_pointer(n->ht_down, ht_down); if (ht_old) ht_old->refcnt--; @@ -551,6 +610,82 @@ errout: return err; } +static void u32_replace_knode(struct tcf_proto *tp, + struct tc_u_common *tp_c, + struct tc_u_knode *n) +{ + struct tc_u_knode __rcu **ins; + struct tc_u_knode *pins; + struct tc_u_hnode *ht; + + if (TC_U32_HTID(n->handle) == TC_U32_ROOT) + ht = rtnl_dereference(tp->root); + else + ht = u32_lookup_ht(tp_c, TC_U32_HTID(n->handle)); + + ins = &ht->ht[TC_U32_HASH(n->handle)]; + + /* The node must always exist for it to be replaced if this is not the + * case then something went very wrong elsewhere. + */ + for (pins = rtnl_dereference(*ins); ; + ins = &pins->next, pins = rtnl_dereference(*ins)) + if (pins->handle == n->handle) + break; + + RCU_INIT_POINTER(n->next, pins->next); + rcu_assign_pointer(*ins, n); +} + +static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, + struct tc_u_knode *n) +{ + struct tc_u_knode *new; + struct tc_u32_sel *s = &n->sel; + + new = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), + GFP_KERNEL); + + if (!new) + return NULL; + + RCU_INIT_POINTER(new->next, n->next); + new->handle = n->handle; + RCU_INIT_POINTER(new->ht_up, n->ht_up); + +#ifdef CONFIG_NET_CLS_IND + new->ifindex = n->ifindex; +#endif + new->fshift = n->fshift; + new->res = n->res; + RCU_INIT_POINTER(new->ht_down, n->ht_down); + + /* bump reference count as long as we hold pointer to structure */ + if (new->ht_down) + new->ht_down->refcnt++; + +#ifdef CONFIG_CLS_U32_PERF + /* Statistics may be incremented by readers during update + * so we must keep them in tact. When the node is later destroyed + * a special destroy call must be made to not free the pf memory. + */ + new->pf = n->pf; +#endif + +#ifdef CONFIG_CLS_U32_MARK + new->val = n->val; + new->mask = n->mask; + /* Similarly success statistics must be moved as pointers */ + new->pcpu_success = n->pcpu_success; +#endif + new->tp = tp; + memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); + + tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE); + + return new; +} + static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, @@ -564,6 +699,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, struct nlattr *tb[TCA_U32_MAX + 1]; u32 htid; int err; +#ifdef CONFIG_CLS_U32_PERF + size_t size; +#endif if (opt == NULL) return handle ? -EINVAL : 0; @@ -574,11 +712,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n = (struct tc_u_knode *)*arg; if (n) { + struct tc_u_knode *new; + if (TC_U32_KEY(n->handle) == 0) return -EINVAL; - return u32_set_parms(net, tp, base, n->ht_up, n, tb, - tca[TCA_RATE], ovr); + new = u32_init_knode(tp, n); + if (!new) + return -ENOMEM; + + err = u32_set_parms(net, tp, base, + rtnl_dereference(n->ht_up), new, tb, + tca[TCA_RATE], ovr); + + if (err) { + u32_destroy_key(tp, new, false); + return err; + } + + u32_replace_knode(tp, tp_c, new); + call_rcu(&n->rcu, u32_delete_key_rcu); + return 0; } if (tb[TCA_U32_DIVISOR]) { @@ -601,8 +755,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; - ht->next = tp_c->hlist; - tp_c->hlist = ht; + RCU_INIT_POINTER(ht->next, tp_c->hlist); + rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; return 0; } @@ -610,7 +764,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_HASH]) { htid = nla_get_u32(tb[TCA_U32_HASH]); if (TC_U32_HTID(htid) == TC_U32_ROOT) { - ht = tp->root; + ht = rtnl_dereference(tp->root); htid = ht->handle; } else { ht = u32_lookup_ht(tp->data, TC_U32_HTID(htid)); @@ -618,7 +772,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } } else { - ht = tp->root; + ht = rtnl_dereference(tp->root); htid = ht->handle; } @@ -642,46 +796,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -ENOBUFS; #ifdef CONFIG_CLS_U32_PERF - n->pf = kzalloc(sizeof(struct tc_u32_pcnt) + s->nkeys*sizeof(u64), GFP_KERNEL); - if (n->pf == NULL) { + size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); + n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); + if (!n->pf) { kfree(n); return -ENOBUFS; } #endif memcpy(&n->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); - n->ht_up = ht; + RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); + n->tp = tp; #ifdef CONFIG_CLS_U32_MARK + n->pcpu_success = alloc_percpu(u32); + if (!n->pcpu_success) { + err = -ENOMEM; + goto errout; + } + if (tb[TCA_U32_MARK]) { struct tc_u32_mark *mark; mark = nla_data(tb[TCA_U32_MARK]); - memcpy(&n->mark, mark, sizeof(struct tc_u32_mark)); - n->mark.success = 0; + n->val = mark->val; + n->mask = mark->mask; } #endif err = u32_set_parms(net, tp, base, ht, n, tb, tca[TCA_RATE], ovr); if (err == 0) { - struct tc_u_knode **ins; - for (ins = &ht->ht[TC_U32_HASH(handle)]; *ins; ins = &(*ins)->next) - if (TC_U32_NODE(handle) < TC_U32_NODE((*ins)->handle)) + struct tc_u_knode __rcu **ins; + struct tc_u_knode *pins; + + ins = &ht->ht[TC_U32_HASH(handle)]; + for (pins = rtnl_dereference(*ins); pins; + ins = &pins->next, pins = rtnl_dereference(*ins)) + if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) break; - n->next = *ins; - tcf_tree_lock(tp); - *ins = n; - tcf_tree_unlock(tp); + RCU_INIT_POINTER(n->next, pins); + rcu_assign_pointer(*ins, n); *arg = (unsigned long)n; return 0; } + +#ifdef CONFIG_CLS_U32_MARK + free_percpu(n->pcpu_success); +errout: +#endif + #ifdef CONFIG_CLS_U32_PERF - kfree(n->pf); + free_percpu(n->pf); #endif kfree(n); return err; @@ -697,7 +867,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->stop) return; - for (ht = tp_c->hlist; ht; ht = ht->next) { + for (ht = rtnl_dereference(tp_c->hlist); + ht; + ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; if (arg->count >= arg->skip) { @@ -708,7 +880,9 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } arg->count++; for (h = 0; h <= ht->divisor; h++) { - for (n = ht->ht[h]; n; n = n->next) { + for (n = rtnl_dereference(ht->ht[h]); + n; + n = rtnl_dereference(n->next)) { if (arg->count < arg->skip) { arg->count++; continue; @@ -727,6 +901,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; + struct tc_u_hnode *ht_up, *ht_down; struct nlattr *nest; if (n == NULL) @@ -745,11 +920,18 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) goto nla_put_failure; } else { +#ifdef CONFIG_CLS_U32_PERF + struct tc_u32_pcnt *gpf; + int cpu; +#endif + if (nla_put(skb, TCA_U32_SEL, sizeof(n->sel) + n->sel.nkeys*sizeof(struct tc_u32_key), &n->sel)) goto nla_put_failure; - if (n->ht_up) { + + ht_up = rtnl_dereference(n->ht_up); + if (ht_up) { u32 htid = n->handle & 0xFFFFF000; if (nla_put_u32(skb, TCA_U32_HASH, htid)) goto nla_put_failure; @@ -757,14 +939,28 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (n->res.classid && nla_put_u32(skb, TCA_U32_CLASSID, n->res.classid)) goto nla_put_failure; - if (n->ht_down && - nla_put_u32(skb, TCA_U32_LINK, n->ht_down->handle)) + + ht_down = rtnl_dereference(n->ht_down); + if (ht_down && + nla_put_u32(skb, TCA_U32_LINK, ht_down->handle)) goto nla_put_failure; #ifdef CONFIG_CLS_U32_MARK - if ((n->mark.val || n->mark.mask) && - nla_put(skb, TCA_U32_MARK, sizeof(n->mark), &n->mark)) - goto nla_put_failure; + if ((n->val || n->mask)) { + struct tc_u32_mark mark = {.val = n->val, + .mask = n->mask, + .success = 0}; + int cpum; + + for_each_possible_cpu(cpum) { + __u32 cnt = *per_cpu_ptr(n->pcpu_success, cpum); + + mark.success += cnt; + } + + if (nla_put(skb, TCA_U32_MARK, sizeof(mark), &mark)) + goto nla_put_failure; + } #endif if (tcf_exts_dump(skb, &n->exts) < 0) @@ -779,10 +975,29 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, } #endif #ifdef CONFIG_CLS_U32_PERF + gpf = kzalloc(sizeof(struct tc_u32_pcnt) + + n->sel.nkeys * sizeof(u64), + GFP_KERNEL); + if (!gpf) + goto nla_put_failure; + + for_each_possible_cpu(cpu) { + int i; + struct tc_u32_pcnt *pf = per_cpu_ptr(n->pf, cpu); + + gpf->rcnt += pf->rcnt; + gpf->rhit += pf->rhit; + for (i = 0; i < n->sel.nkeys; i++) + gpf->kcnts[i] += pf->kcnts[i]; + } + if (nla_put(skb, TCA_U32_PCNT, sizeof(struct tc_u32_pcnt) + n->sel.nkeys*sizeof(u64), - n->pf)) + gpf)) { + kfree(gpf); goto nla_put_failure; + } + kfree(gpf); #endif } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 58bed7599db7..ca6248345937 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1781,7 +1781,7 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp, __be16 protocol = skb->protocol; int err; - for (; tp; tp = tp->next) { + for (; tp; tp = rcu_dereference_bh(tp->next)) { if (tp->protocol != protocol && tp->protocol != htons(ETH_P_ALL)) continue; @@ -1833,15 +1833,15 @@ void tcf_destroy(struct tcf_proto *tp) { tp->ops->destroy(tp); module_put(tp->ops->owner); - kfree(tp); + kfree_rcu(tp, rcu); } -void tcf_destroy_chain(struct tcf_proto **fl) +void tcf_destroy_chain(struct tcf_proto __rcu **fl) { struct tcf_proto *tp; - while ((tp = *fl) != NULL) { - *fl = tp->next; + while ((tp = rtnl_dereference(*fl)) != NULL) { + RCU_INIT_POINTER(*fl, tp->next); tcf_destroy(tp); } } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 8449b337f9e3..c398f9c3dbdd 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -41,7 +41,7 @@ struct atm_flow_data { struct Qdisc *q; /* FIFO, TBF, etc. */ - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */ void (*old_pop)(struct atm_vcc *vcc, struct sk_buff *skb); /* chaining */ @@ -273,7 +273,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, error = -ENOBUFS; goto err_out; } - flow->filter_list = NULL; + RCU_INIT_POINTER(flow->filter_list, NULL); flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; @@ -311,7 +311,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow); if (list_empty(&flow->list)) return -EINVAL; - if (flow->filter_list || flow == &p->link) + if (rcu_access_pointer(flow->filter_list) || flow == &p->link) return -EBUSY; /* * Reference count must be 2: one for "keepalive" (set at class @@ -345,7 +345,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto **atm_tc_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct atm_qdisc_data *p = qdisc_priv(sch); struct atm_flow_data *flow = (struct atm_flow_data *)cl; @@ -369,11 +370,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { + struct tcf_proto *fl; + list_for_each_entry(flow, &p->flows, list) { - if (flow->filter_list) { - result = tc_classify_compat(skb, - flow->filter_list, - &res); + fl = rcu_dereference_bh(flow->filter_list); + if (fl) { + result = tc_classify_compat(skb, fl, &res); if (result < 0) continue; flow = (struct atm_flow_data *)res.class; @@ -544,7 +546,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) if (!p->link.q) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - p->link.filter_list = NULL; + RCU_INIT_POINTER(p->link.filter_list, NULL); p->link.vcc = NULL; p->link.sock = NULL; p->link.classid = sch->handle; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 762a04bb8f6d..a3244a800501 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -133,7 +133,7 @@ struct cbq_class { struct gnet_stats_rate_est64 rate_est; struct tc_cbq_xstats xstats; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; int refcnt; int filters; @@ -221,6 +221,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct cbq_class **defmap; struct cbq_class *cl = NULL; u32 prio = skb->priority; + struct tcf_proto *fl; struct tcf_result res; /* @@ -235,11 +236,12 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) int result = 0; defmap = head->defaults; + fl = rcu_dereference_bh(head->filter_list); /* * Step 2+n. Apply classifier. */ - if (!head->filter_list || - (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) + result = tc_classify_compat(skb, fl, &res); + if (!fl || result < 0) goto fallback; cl = (void *)res.class; @@ -1954,7 +1956,8 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) return 0; } -static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg) +static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch, + unsigned long arg) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *cl = (struct cbq_class *)arg; diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index fb666d1e4de3..8abc2625c3a1 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -57,7 +57,7 @@ struct choke_sched_data { /* Variables */ struct red_vars vars; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; struct { u32 prob_drop; /* Early probability drops */ u32 prob_mark; /* Early probability marks */ @@ -203,9 +203,11 @@ static bool choke_classify(struct sk_buff *skb, { struct choke_sched_data *q = qdisc_priv(sch); struct tcf_result res; + struct tcf_proto *fl; int result; - result = tc_classify(skb, q->filter_list, &res); + fl = rcu_dereference_bh(q->filter_list); + result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -259,7 +261,7 @@ static bool choke_match_random(const struct choke_sched_data *q, return false; oskb = choke_peek_random(q, pidx); - if (q->filter_list) + if (rcu_access_pointer(q->filter_list)) return choke_get_classid(nskb) == choke_get_classid(oskb); return choke_match_flow(oskb, nskb); @@ -267,11 +269,11 @@ static bool choke_match_random(const struct choke_sched_data *q, static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; struct choke_sched_data *q = qdisc_priv(sch); const struct red_parms *p = &q->parms; - int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - if (q->filter_list) { + if (rcu_access_pointer(q->filter_list)) { /* If using external classifiers, get result and record it. */ if (!choke_classify(skb, sch, &ret)) goto other_drop; /* Packet was eaten by filter */ @@ -564,7 +566,8 @@ static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, return 0; } -static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **choke_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct choke_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 7bbbfe112192..d8b5ccfd248a 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -35,7 +35,7 @@ struct drr_class { struct drr_sched { struct list_head active; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; struct Qdisc_class_hash clhash; }; @@ -184,7 +184,8 @@ static void drr_put_class(struct Qdisc *sch, unsigned long arg) drr_destroy_class(sch, cl); } -static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **drr_tcf_chain(struct Qdisc *sch, + unsigned long cl) { struct drr_sched *q = qdisc_priv(sch); @@ -319,6 +320,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, struct drr_sched *q = qdisc_priv(sch); struct drr_class *cl; struct tcf_result res; + struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { @@ -328,7 +330,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, q->filter_list, &res); + fl = rcu_dereference_bh(q->filter_list); + result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 49d6ef338b55..485e456c8139 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -37,7 +37,7 @@ struct dsmark_qdisc_data { struct Qdisc *q; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; u8 *mask; /* "owns" the array */ u8 *value; u16 indices; @@ -186,8 +186,8 @@ ignore: } } -static inline struct tcf_proto **dsmark_find_tcf(struct Qdisc *sch, - unsigned long cl) +static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct dsmark_qdisc_data *p = qdisc_priv(sch); return &p->filter_list; @@ -229,7 +229,8 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) skb->tc_index = TC_H_MIN(skb->priority); else { struct tcf_result res; - int result = tc_classify(skb, p->filter_list, &res); + struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); + int result = tc_classify(skb, fl, &res); pr_debug("result %d class 0x%04x\n", result, res.classid); diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index ba32c2b005d0..e12f997e1b4c 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -416,7 +416,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) static struct sk_buff *fq_dequeue(struct Qdisc *sch) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_ns(); struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; @@ -787,7 +787,7 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_to_ns(ktime_get()); + u64 now = ktime_get_ns(); struct tc_fq_qd_stats st = { .gc_flows = q->stat_gc_flows, .highprio_packets = q->stat_internal_packets, diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 063b726bf1f8..105cf5557630 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -52,7 +52,7 @@ struct fq_codel_flow { }; /* please try to keep this structure <= 64 bytes */ struct fq_codel_sched_data { - struct tcf_proto *filter_list; /* optional external classifier */ + struct tcf_proto __rcu *filter_list; /* optional external classifier */ struct fq_codel_flow *flows; /* Flows table [flows_cnt] */ u32 *backlogs; /* backlog table [flows_cnt] */ u32 flows_cnt; /* number of flows */ @@ -77,13 +77,15 @@ static unsigned int fq_codel_hash(const struct fq_codel_sched_data *q, hash = jhash_3words((__force u32)keys.dst, (__force u32)keys.src ^ keys.ip_proto, (__force u32)keys.ports, q->perturbation); - return ((u64)hash * q->flows_cnt) >> 32; + + return reciprocal_scale(hash, q->flows_cnt); } static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct fq_codel_sched_data *q = qdisc_priv(sch); + struct tcf_proto *filter; struct tcf_result res; int result; @@ -92,11 +94,12 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, TC_H_MIN(skb->priority) <= q->flows_cnt) return TC_H_MIN(skb->priority); - if (!q->filter_list) + filter = rcu_dereference(q->filter_list); + if (!filter) return fq_codel_hash(q, skb) + 1; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, q->filter_list, &res); + result = tc_classify(skb, filter, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -495,7 +498,8 @@ static void fq_codel_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto **fq_codel_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **fq_codel_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct fq_codel_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index fc04fe93c2da..11b28f651ad1 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,15 +63,18 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) if (unlikely(skb)) { /* check the reason of requeuing without tx lock first */ - txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; q->q.qlen--; } else skb = NULL; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) + if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { skb = q->dequeue(q); + if (skb) + skb = validate_xmit_skb(skb, qdisc_dev(q)); + } } return skb; @@ -90,7 +93,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, * detect it by checking xmit owner and drop the packet when * deadloop is detected. Return OK to try the next skb. */ - kfree_skb(skb); + kfree_skb_list(skb); net_warn_ratelimited("Dead loop on netdevice %s, fix it urgently!\n", dev_queue->dev->name); ret = qdisc_qlen(q); @@ -107,9 +110,9 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, } /* - * Transmit one skb, and handle the return status as required. Holding the - * __QDISC___STATE_RUNNING bit guarantees that only one CPU can execute this - * function. + * Transmit possibly several skbs, and handle the return status as + * required. Holding the __QDISC___STATE_RUNNING bit guarantees that + * only one CPU can execute this function. * * Returns to the caller: * 0 - queue is empty or throttled. @@ -126,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq)) - ret = dev_hard_start_xmit(skb, dev, txq); + skb = dev_hard_start_xmit(skb, dev, txq, &ret); HARD_TX_UNLOCK(dev, txq); @@ -183,10 +186,12 @@ static inline int qdisc_restart(struct Qdisc *q) skb = dequeue_skb(q); if (unlikely(!skb)) return 0; + WARN_ON_ONCE(skb_dst_is_noref(skb)); + root_lock = qdisc_lock(q); dev = qdisc_dev(q); - txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + txq = skb_get_tx_queue(dev, skb); return sch_direct_xmit(skb, q, dev, txq, root_lock); } @@ -518,7 +523,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - skb_queue_head_init(band2list(priv, prio)); + __skb_queue_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -616,7 +621,7 @@ void qdisc_reset(struct Qdisc *qdisc) ops->reset(qdisc); if (qdisc->gso_skb) { - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); qdisc->gso_skb = NULL; qdisc->q.qlen = 0; } @@ -652,7 +657,7 @@ void qdisc_destroy(struct Qdisc *qdisc) module_put(ops->owner); dev_put(qdisc_dev(qdisc)); - kfree_skb(qdisc->gso_skb); + kfree_skb_list(qdisc->gso_skb); /* * gen_estimator est_timer() might access qdisc->q.lock, * wait a RCU grace period before freeing qdisc. @@ -778,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; struct Qdisc *qdisc; - qdisc = dev_queue->qdisc; + qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); @@ -871,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, { struct Qdisc *qdisc = _qdisc; - dev_queue->qdisc = qdisc; + rcu_assign_pointer(dev_queue->qdisc, qdisc); dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index ec8aeaac1dd7..04b0de4c68b5 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,7 @@ struct hfsc_class { struct gnet_stats_queue qstats; struct gnet_stats_rate_est64 rate_est; unsigned int level; /* class level in hierarchy */ - struct tcf_proto *filter_list; /* filter list */ + struct tcf_proto __rcu *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ struct hfsc_sched *sched; /* scheduler data */ @@ -1161,7 +1161,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; head = &q->root; - tcf = q->root.filter_list; + tcf = rcu_dereference_bh(q->root.filter_list); while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -1185,7 +1185,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return cl; /* hit leaf class */ /* apply inner filter chain */ - tcf = cl->filter_list; + tcf = rcu_dereference_bh(cl->filter_list); head = cl; } @@ -1285,7 +1285,7 @@ hfsc_unbind_tcf(struct Qdisc *sch, unsigned long arg) cl->filter_cnt--; } -static struct tcf_proto ** +static struct tcf_proto __rcu ** hfsc_tcf_chain(struct Qdisc *sch, unsigned long arg) { struct hfsc_sched *q = qdisc_priv(sch); diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9f949abcacef..14408f262143 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -103,7 +103,7 @@ struct htb_class { u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ - struct tcf_proto *filter_list; /* class attached filters */ + struct tcf_proto __rcu *filter_list; /* class attached filters */ int filter_cnt; int refcnt; /* usage count of this class */ @@ -153,7 +153,7 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ /* filters for qdisc itself */ - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; #define HTB_WARN_TOOMANYEVENTS 0x1 unsigned int warned; /* only one warning */ @@ -223,9 +223,9 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, if (cl->level == 0) return cl; /* Start with inner filter chain if a non-leaf class is selected */ - tcf = cl->filter_list; + tcf = rcu_dereference_bh(cl->filter_list); } else { - tcf = q->filter_list; + tcf = rcu_dereference_bh(q->filter_list); } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; @@ -251,7 +251,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, return cl; /* we hit leaf; return it */ /* we have got inner class; apply inner filter chain */ - tcf = cl->filter_list; + tcf = rcu_dereference_bh(cl->filter_list); } /* classification failed; try to use default class */ cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); @@ -895,7 +895,7 @@ ok: if (!sch->q.qlen) goto fin; - q->now = ktime_to_ns(ktime_get()); + q->now = ktime_get_ns(); start_at = jiffies; next_event = q->now + 5LLU * NSEC_PER_SEC; @@ -1044,7 +1044,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - skb_queue_head_init(&q->direct_queue); + __skb_queue_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); @@ -1225,7 +1225,7 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl, parent->un.leaf.q = new_q ? new_q : &noop_qdisc; parent->tokens = parent->buffer; parent->ctokens = parent->cbuffer; - parent->t_c = ktime_to_ns(ktime_get()); + parent->t_c = ktime_get_ns(); parent->cmode = HTB_CAN_SEND; } @@ -1455,7 +1455,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->tokens = PSCHED_TICKS2NS(hopt->buffer); cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer); cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */ - cl->t_c = ktime_to_ns(ktime_get()); + cl->t_c = ktime_get_ns(); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ @@ -1519,11 +1519,12 @@ failure: return err; } -static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) +static struct tcf_proto __rcu **htb_find_tcf(struct Qdisc *sch, + unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; + struct tcf_proto __rcu **fl = cl ? &cl->filter_list : &q->filter_list; return fl; } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 62871c14e1f9..b351125f3849 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -17,7 +17,7 @@ struct ingress_qdisc_data { - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; }; /* ------------------------- Class/flow operations ------------------------- */ @@ -46,7 +46,8 @@ static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { } -static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **ingress_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct ingress_qdisc_data *p = qdisc_priv(sch); @@ -59,9 +60,10 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); struct tcf_result res; + struct tcf_proto *fl = rcu_dereference_bh(p->filter_list); int result; - result = tc_classify(skb, p->filter_list, &res); + result = tc_classify(skb, fl, &res); qdisc_bstats_update(sch, skb); switch (result) { diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 6749e2f540d0..37e7d25d21f1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) memset(&sch->qstats, 0, sizeof(sch->qstats)); for (i = 0; i < dev->num_tx_queues; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc); spin_lock_bh(qdisc_lock(qdisc)); sch->q.qlen += qdisc->q.qlen; sch->bstats.bytes += qdisc->bstats.bytes; @@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, spin_unlock_bh(d->lock); for (i = tc.offset; i < tc.offset + tc.count; i++) { - qdisc = netdev_get_tx_queue(dev, i)->qdisc; + struct netdev_queue *q = netdev_get_tx_queue(dev, i); + + qdisc = rtnl_dereference(q->qdisc); spin_lock_bh(qdisc_lock(qdisc)); bstats.bytes += qdisc->bstats.bytes; bstats.packets += qdisc->bstats.packets; diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index afb050a735fa..c0466c1840f3 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -31,7 +31,7 @@ struct multiq_sched_data { u16 bands; u16 max_bands; u16 curband; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; struct Qdisc **queues; }; @@ -42,10 +42,11 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct multiq_sched_data *q = qdisc_priv(sch); u32 band; struct tcf_result res; + struct tcf_proto *fl = rcu_dereference_bh(q->filter_list); int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - err = tc_classify(skb, q->filter_list, &res); + err = tc_classify(skb, fl, &res); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: @@ -388,7 +389,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **multiq_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct multiq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 79359b69ad8d..03ef99e52a5c 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -24,7 +24,7 @@ struct prio_sched_data { int bands; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; }; @@ -36,11 +36,13 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; + struct tcf_proto *fl; int err; *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; if (TC_H_MAJ(skb->priority) != sch->handle) { - err = tc_classify(skb, q->filter_list, &res); + fl = rcu_dereference_bh(q->filter_list); + err = tc_classify(skb, fl, &res); #ifdef CONFIG_NET_CLS_ACT switch (err) { case TC_ACT_STOLEN: @@ -50,7 +52,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) return NULL; } #endif - if (!q->filter_list || err < 0) { + if (!fl || err < 0) { if (TC_H_MAJ(band)) band = 0; return q->queues[q->prio2band[band & TC_PRIO_MAX]]; @@ -351,7 +353,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) } } -static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **prio_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct prio_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8056fb4e618a..602ea01a4ddd 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -181,7 +181,7 @@ struct qfq_group { }; struct qfq_sched { - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; struct Qdisc_class_hash clhash; u64 oldV, V; /* Precise virtual times. */ @@ -576,7 +576,8 @@ static void qfq_put_class(struct Qdisc *sch, unsigned long arg) qfq_destroy_class(sch, cl); } -static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **qfq_tcf_chain(struct Qdisc *sch, + unsigned long cl) { struct qfq_sched *q = qdisc_priv(sch); @@ -704,6 +705,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl; struct tcf_result res; + struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { @@ -714,7 +716,8 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, q->filter_list, &res); + fl = rcu_dereference_bh(q->filter_list); + result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 9b0f7093d970..1562fb2b3f46 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -55,7 +55,7 @@ struct sfb_bins { struct sfb_sched_data { struct Qdisc *qdisc; - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; unsigned long rehash_interval; unsigned long warmup_time; /* double buffering warmup time in jiffies */ u32 max; @@ -253,13 +253,13 @@ static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) return false; } -static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, +static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, int *qerr, u32 *salt) { struct tcf_result res; int result; - result = tc_classify(skb, q->filter_list, &res); + result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -281,6 +281,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfb_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; + struct tcf_proto *fl; int i; u32 p_min = ~0; u32 minqlen = ~0; @@ -306,9 +307,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) } } - if (q->filter_list) { + fl = rcu_dereference_bh(q->filter_list); + if (fl) { /* If using external classifiers, get result and record it. */ - if (!sfb_classify(skb, q, &ret, &salt)) + if (!sfb_classify(skb, fl, &ret, &salt)) goto other_drop; keys.src = salt; keys.dst = 0; @@ -660,7 +662,8 @@ static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) } } -static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **sfb_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct sfb_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 1af2f73906d0..80c36bd54abc 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -125,7 +125,7 @@ struct sfq_sched_data { u8 cur_depth; /* depth of longest slot */ u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ - struct tcf_proto *filter_list; + struct tcf_proto __rcu *filter_list; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ @@ -187,6 +187,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, { struct sfq_sched_data *q = qdisc_priv(sch); struct tcf_result res; + struct tcf_proto *fl; int result; if (TC_H_MAJ(skb->priority) == sch->handle && @@ -194,13 +195,14 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, TC_H_MIN(skb->priority) <= q->divisor) return TC_H_MIN(skb->priority); - if (!q->filter_list) { + fl = rcu_dereference_bh(q->filter_list); + if (!fl) { skb_flow_dissect(skb, &sfq_skb_cb(skb)->keys); return sfq_hash(q, skb) + 1; } *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; - result = tc_classify(skb, q->filter_list, &res); + result = tc_classify(skb, fl, &res); if (result >= 0) { #ifdef CONFIG_NET_CLS_ACT switch (result) { @@ -310,11 +312,6 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) slot->skblist_prev = skb; } -#define slot_queue_walk(slot, skb) \ - for (skb = slot->skblist_next; \ - skb != (struct sk_buff *)slot; \ - skb = skb->next) - static unsigned int sfq_drop(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); @@ -841,7 +838,8 @@ static void sfq_put(struct Qdisc *q, unsigned long cl) { } -static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl) +static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch, + unsigned long cl) { struct sfq_sched_data *q = qdisc_priv(sch); diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 18ff63433709..0c39b754083b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -239,7 +239,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) s64 ptoks = 0; unsigned int len = qdisc_pkt_len(skb); - now = ktime_to_ns(ktime_get()); + now = ktime_get_ns(); toks = min_t(s64, now - q->t_c, q->buffer); if (tbf_peak_present(q)) { @@ -292,7 +292,7 @@ static void tbf_reset(struct Qdisc *sch) qdisc_reset(q->qdisc); sch->q.qlen = 0; - q->t_c = ktime_to_ns(ktime_get()); + q->t_c = ktime_get_ns(); q->tokens = q->buffer; q->ptokens = q->mtu; qdisc_watchdog_cancel(&q->watchdog); @@ -431,7 +431,7 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - q->t_c = ktime_to_ns(ktime_get()); + q->t_c = ktime_get_ns(); qdisc_watchdog_init(&q->watchdog, sch); q->qdisc = &noop_qdisc; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index bd33793b527e..5cd291bd00e4 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch) struct teql_sched_data *dat = qdisc_priv(sch); struct netdev_queue *dat_queue; struct sk_buff *skb; + struct Qdisc *q; skb = __skb_dequeue(&dat->q); dat_queue = netdev_get_tx_queue(dat->m->dev, 0); + q = rcu_dereference_bh(dat_queue->qdisc); + if (skb == NULL) { - struct net_device *m = qdisc_dev(dat_queue->qdisc); + struct net_device *m = qdisc_dev(q); if (m) { dat->m->slaves = sch; netif_wake_queue(m); @@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch) } else { qdisc_bstats_update(sch, skb); } - sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; + sch->q.qlen = dat->q.qlen + q->q.qlen; return skb; } @@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch) txq = netdev_get_tx_queue(master->dev, 0); master->slaves = NULL; - root_lock = qdisc_root_sleeping_lock(txq->qdisc); + root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc)); spin_lock_bh(root_lock); - qdisc_reset(txq->qdisc); + qdisc_reset(rtnl_dereference(txq->qdisc)); spin_unlock_bh(root_lock); } } @@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb, struct dst_entry *dst = skb_dst(skb); int res; - if (txq->qdisc == &noop_qdisc) + if (rcu_access_pointer(txq->qdisc) == &noop_qdisc) return -ENODEV; if (!dev->header_ops || !dst) @@ -301,7 +304,6 @@ restart: do { struct net_device *slave = qdisc_dev(q); struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0); - const struct net_device_ops *slave_ops = slave->netdev_ops; if (slave_txq->qdisc_sleeping != q) continue; @@ -317,8 +319,8 @@ restart: unsigned int length = qdisc_pkt_len(skb); if (!netif_xmit_frozen_or_stopped(slave_txq) && - slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { - txq_trans_update(slave_txq); + netdev_start_xmit(skb, slave, slave_txq, false) == + NETDEV_TX_OK) { __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); |