From faf61c3319ea336ed47acd6ca86faaaa3a8f4937 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 23 Aug 2008 13:28:27 +0200 Subject: dccp: Silently ignore options with nonsensical lengths This updates the option-parsing code with regard to RFC 4340, 5.8: "[..] options with nonsensical lengths (length byte less than two or more than the remaining space in the options portion of the header) MUST be ignored, and any option space following an option with nonsensical length MUST likewise be ignored." Hence in the following cases erratic options will be ignored: 1. The type byte of a multi-byte option is the last byte of the header options (i.e. effective option length of 1). 2. The value of the length byte is less than the minimum 2. This has been changed from previously 3: although no multi-byte option with a length less than 3 yet exists (cf. table 3 in 5.8), a length of 2 is valid. (The switch-statement in dccp_parse has further per-option length checks.) 3. The option length exceeds the length of the remaining option space. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index dc7c158a2f4b..4284f0856047 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, /* Check if this isn't a single byte option */ if (opt > DCCPO_MAX_RESERVED) { if (opt_ptr == opt_end) - goto out_invalid_option; + goto out_nonsensical_length; len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; + if (len < 2) + goto out_nonsensical_length; /* * Remove the type and len fields, leaving * just the value size @@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, opt_ptr += len; if (opt_ptr > opt_end) - goto out_invalid_option; + goto out_nonsensical_length; } /* @@ -283,6 +283,8 @@ ignore_option: if (mandatory) goto out_invalid_option; +out_nonsensical_length: + /* RFC 4340, 5.8: ignore option and all remaining option space */ return 0; out_invalid_option: -- cgit v1.2.3 From eac7726bf5cd24440d84b166e0813668d1bf3224 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 23 Aug 2008 13:28:27 +0200 Subject: dccp: Fill in the Data fields for "Option Error" Resets This updates the use of the `out_invalid_option' label, which produces a Reset (code 5, "Option Error"), to fill in the Data1...Data3 fields as specified in RFC 4340, 5.6. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 4284f0856047..0809b63cb055 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -291,6 +291,9 @@ out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; + DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; + DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; return -1; } -- cgit v1.2.3 From 5591d286281fdfb57914f5fad3ca001d44ce8fc6 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Limit feature negotiation to connection setup phase This patch starts the new implementation of feature negotiation: 1. Although it is theoretically possible to perform feature negotiation at any time (and RFC 4340 supports this), in practice this is prohibitively complex, as it requires to put traffic on hold for each new negotiation. 2. As a byproduct of restricting feature negotiation to connection setup, the feature-negotiation retransmit timer is no longer required. This part is now mapped onto the protocol-level retransmission. Details indicating why timers are no longer needed can be found on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/feature_negotiation/\ implementation_notes.html This patch disables anytime negotiation, subsequent patches work out full feature negotiation support for connection setup. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 0809b63cb055..67a171a1268c 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -489,7 +489,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) { - struct dccp_sock *dp = dccp_sk(sk); struct dccp_minisock *dmsk = dccp_msk(sk); struct dccp_opt_pend *opt, *next; int change = 0; @@ -530,23 +529,6 @@ static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) } } - /* Retransmit timer. - * If this is the master listening sock, we don't set a timer on it. It - * should be fine because if the dude doesn't receive our RESPONSE - * [which will contain the CHANGE] he will send another REQUEST which - * will "retrnasmit" the change. - */ - if (change && dp->dccps_role != DCCP_ROLE_LISTEN) { - dccp_pr_debug("reset feat negotiation timer %p\n", sk); - - /* XXX don't reset the timer on re-transmissions. I.e. reset it - * only when sending new stuff i guess. Currently the timer - * never backs off because on re-transmission it just resets it! - */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - } - return 0; } -- cgit v1.2.3 From 17c30b40ed79e9f3955e884632c8f01e577b204a Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Deprecate Ack Ratio sysctl This patch deprecates the Ack Ratio sysctl, since * Ack Ratio is entirely ignored by CCID-3 and CCID-4, * Ack Ratio currently doesn't work in CCID-2 (i.e. is always set to 1); * even if it would work in CCID-2, there is no point for a user to change it: - Ack Ratio is constrained by cwnd (RFC 4341, 6.1.2), - if Ack Ratio > cwnd, the system resorts to spurious RTO timeouts (since waiting for Acks which will never arrive in this window), - cwnd is not a user-configurable value. The only reasonable place for Ack Ratio is to print it for debugging. It is planned to do this later on, as part of e.g. dccp_probe. With this patch Ack Ratio is now under full control of feature negotiation: * Ack Ratio is resolved as a dependency of the selected CCID; * if the chosen CCID supports it (i.e. CCID == CCID-2), Ack Ratio is set to the default of 2, following RFC 4340, 11.3 - "New connections start with Ack Ratio 2 for both endpoints"; * what happens then is part of another patch set, since it concerns the dynamic update of Ack Ratio while the connection is in full flight. Thanks to Tomasz Grobelny for discussion leading up to this patch. Signed-off-by: Gerrit Renker Acked-by: Arnaldo Carvalho de Melo --- net/dccp/options.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 67a171a1268c..515ad45013ad 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,7 +26,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_ack_ratio = DCCPF_INITIAL_ACK_RATIO; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -- cgit v1.2.3 From b9aaac1c538a9c86e8ef3be2579a13ff55580908 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Increase the scope of variable-length htonl/ntohl functions This extends the scope of two available functions, encode|decode_value_var, to work up to 6 (8) bytes, to match maximum requirements in the RFC. These functions are going to be used both by general option processing and feature negotiation code, hence declarations have been put into feat.h. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Acked-by: Arnaldo Carvalho de Melo --- net/dccp/options.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 515ad45013ad..9cb0ff894052 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -29,16 +29,20 @@ int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; -static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +u64 dccp_decode_value_var(const u8 *bf, const u8 len) { - u32 value = 0; + u64 value = 0; + if (len >= DCCP_OPTVAL_MAXLEN) + value += ((u64)*bf++) << 40; + if (len > 4) + value += ((u64)*bf++) << 32; if (len > 3) - value += *bf++ << 24; + value += ((u64)*bf++) << 24; if (len > 2) - value += *bf++ << 16; + value += ((u64)*bf++) << 16; if (len > 1) - value += *bf++ << 8; + value += ((u64)*bf++) << 8; if (len > 0) value += *bf; @@ -298,9 +302,12 @@ out_invalid_option: EXPORT_SYMBOL_GPL(dccp_parse_options); -static void dccp_encode_value_var(const u32 value, unsigned char *to, - const unsigned int len) +void dccp_encode_value_var(const u64 value, u8 *to, const u8 len) { + if (len >= DCCP_OPTVAL_MAXLEN) + *to++ = (value & 0xFF0000000000ull) >> 40; + if (len > 4) + *to++ = (value & 0xFF00000000ull) >> 32; if (len > 3) *to++ = (value & 0xFF000000) >> 24; if (len > 2) -- cgit v1.2.3 From d0440ee6f6903fcde6ed4efb88c910de1dfa18e5 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Support for Mandatory options Support for Mandatory options is provided by this patch, which will be used by subsequent feature-negotiation patches. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Acked-by: Arnaldo Carvalho de Melo --- net/dccp/options.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 9cb0ff894052..676d53065de9 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -467,6 +467,21 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } +/** + * dccp_insert_option_mandatory - Mandatory option (5.8.2) + * Note that since we are using skb_push, this function needs to be called + * _after_ inserting the option it is supposed to influence (stack order). + */ +int dccp_insert_option_mandatory(struct sk_buff *skb) +{ + if (DCCP_SKB_CB(skb)->dccpd_opt_len >= DCCP_MAX_OPT_LEN) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len++; + *skb_push(skb, 1) = DCCPO_MANDATORY; + return 0; +} + static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, u8 *val, u8 len) { -- cgit v1.2.3 From cf9ddf73b9ba21a5cd6d3fcb0a45cfa9ec452033 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Header option insertion routine for feature-negotiation The patch extends existing code: * Confirm options divide into the confirmed value plus an optional preference list for SP values. Previously only the preference list was echoed for SP values, now the confirmed value is added as per RFC 4340, 6.1; * length and sanity checks are added to avoid illegal memory (or NULL) access. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 91 +++++++++++++++++++----------------------------------- 1 file changed, 31 insertions(+), 60 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 676d53065de9..bfa1cb8f3ef1 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -482,23 +482,46 @@ int dccp_insert_option_mandatory(struct sk_buff *skb) return 0; } -static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, - u8 *val, u8 len) +/** + * dccp_insert_fn_opt - Insert single Feature-Negotiation option into @skb + * @type: %DCCPO_CHANGE_L, %DCCPO_CHANGE_R, %DCCPO_CONFIRM_L, %DCCPO_CONFIRM_R + * @feat: one out of %dccp_feature_numbers + * @val: NN value or SP array (preferred element first) to copy + * @len: true length of @val in bytes (excluding first element repetition) + * @repeat_first: whether to copy the first element of @val twice + * The last argument is used to construct Confirm options, where the preferred + * value and the preference list appear separately (RFC 4340, 6.3.1). Preference + * lists are kept such that the preferred entry is always first, so we only need + * to copy twice, and avoid the overhead of cloning into a bigger array. + */ +int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, + u8 *val, u8 len, bool repeat_first) { - u8 *to; + u8 tot_len, *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 3 > DCCP_MAX_OPT_LEN) { - DCCP_WARN("packet too small for feature %d option!\n", feat); + /* take the `Feature' field and possible repetition into account */ + if (len > (DCCP_SINGLE_OPT_MAXLEN - 2)) { + DCCP_WARN("length %u for feature %u too large\n", len, feat); return -1; } - DCCP_SKB_CB(skb)->dccpd_opt_len += len + 3; + if (unlikely(val == NULL || len == 0)) + len = repeat_first = 0; + tot_len = 3 + repeat_first + len; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + tot_len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("packet too small for feature %d option!\n", feat); + return -1; + } + DCCP_SKB_CB(skb)->dccpd_opt_len += tot_len; - to = skb_push(skb, len + 3); + to = skb_push(skb, tot_len); *to++ = type; - *to++ = len + 3; + *to++ = tot_len; *to++ = feat; + if (repeat_first) + *to++ = *val; if (len) memcpy(to, val, len); @@ -508,51 +531,6 @@ static int dccp_insert_feat_opt(struct sk_buff *skb, u8 type, u8 feat, return 0; } -static int dccp_insert_options_feat(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_minisock *dmsk = dccp_msk(sk); - struct dccp_opt_pend *opt, *next; - int change = 0; - - /* confirm any options [NN opts] */ - list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - /* fear empty confirms */ - if (opt->dccpop_val) - kfree(opt->dccpop_val); - kfree(opt); - } - INIT_LIST_HEAD(&dmsk->dccpms_conf); - - /* see which features we need to send */ - list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) { - /* see if we need to send any confirm */ - if (opt->dccpop_sc) { - dccp_insert_feat_opt(skb, opt->dccpop_type + 1, - opt->dccpop_feat, - opt->dccpop_sc->dccpoc_val, - opt->dccpop_sc->dccpoc_len); - - BUG_ON(!opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc->dccpoc_val); - kfree(opt->dccpop_sc); - opt->dccpop_sc = NULL; - } - - /* any option not confirmed, re-send it */ - if (!opt->dccpop_conf) { - dccp_insert_feat_opt(skb, opt->dccpop_type, - opt->dccpop_feat, opt->dccpop_val, - opt->dccpop_len); - change++; - } - } - - return 0; -} - /* The length of all options needs to be a multiple of 4 (5.8) */ static void dccp_insert_option_padding(struct sk_buff *skb) { @@ -589,13 +567,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_rx_insert_options = 0; } - /* Feature negotiation */ - /* Data packets can't do feat negotiation */ - if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA && - DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATAACK && - dccp_insert_options_feat(sk, skb)) - return -1; - /* * Obtain RTT sample from Request/Response exchange. * This is currently used in CCID 3 initialisation. -- cgit v1.2.3 From f8a644c07e6f38b2c3cbaf99990e867d670d207b Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Integrate feature-negotiation insertion code The patch implements insertion of feature negotiation at the server (listening and request socket) and the client (connecting socket). In dccp_insert_options(), several statements have been grouped together now to achieve (I hope) better efficiency by reducing the number of tests each packet has to go through: - Ack Vectors are sent if the packet is neither a Data or a Request packet; - a previous issue is corrected - feature negotiation options are allowed on DataAck packets (5.8). Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index bfa1cb8f3ef1..0e277114cb23 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -554,11 +554,25 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dccp_insert_option_ndp(sk, skb)) return -1; - if (!dccp_packet_without_ack(skb)) { - if (dmsk->dccpms_send_ack_vector && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && - dccp_insert_option_ackvec(sk, skb)) + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { + + /* Feature Negotiation */ + if (dccp_feat_insert_opts(dp, NULL, skb)) return -1; + + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST) { + /* + * Obtain RTT sample from Request/Response exchange. + * This is currently used in CCID 3 initialisation. + */ + if (dccp_insert_option_timestamp(sk, skb)) + return -1; + + } else if (dmsk->dccpms_send_ack_vector && + dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && + dccp_insert_option_ackvec(sk, skb)) { + return -1; + } } if (dp->dccps_hc_rx_insert_options) { @@ -567,14 +581,6 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) dp->dccps_hc_rx_insert_options = 0; } - /* - * Obtain RTT sample from Request/Response exchange. - * This is currently used in CCID 3 initialisation. - */ - if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_REQUEST && - dccp_insert_option_timestamp(sk, skb)) - return -1; - if (dp->dccps_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(dp, NULL, skb)) return -1; @@ -587,6 +593,9 @@ int dccp_insert_options_rsk(struct dccp_request_sock *dreq, struct sk_buff *skb) { DCCP_SKB_CB(skb)->dccpd_opt_len = 0; + if (dccp_feat_insert_opts(NULL, dreq, skb)) + return -1; + if (dreq->dreq_timestamp_echo != 0 && dccp_insert_option_timestamp_echo(NULL, dreq, skb)) return -1; -- cgit v1.2.3 From 5a146b97d5e93db2df075c0d820f492bb996d0e3 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Process incoming Change feature-negotiation options This adds/replaces code for processing incoming ChangeL/R options. The main difference is that: * mandatory FN options are now interpreted inside the function (there are too many individual cases to do this externally); * the function returns an appropriate Reset code or 0, which is then used to fill in the data for the Reset packet. Old code, which is no longer used or referenced, has been removed. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 0e277114cb23..fb8466ea467a 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -135,22 +135,13 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, (unsigned long long)opt_recv->dccpor_ndp); break; case DCCPO_CHANGE_L: - /* fall through */ case DCCPO_CHANGE_R: if (pkt_type == DCCP_PKT_DATA) break; - if (len < 2) - goto out_invalid_option; - rc = dccp_feat_change_recv(sk, opt, *value, value + 1, - len - 1); - /* - * When there is a change error, change_recv is - * responsible for dealing with it. i.e. reply with an - * empty confirm. - * If the change was mandatory, then we need to die. - */ - if (rc && mandatory) - goto out_invalid_option; + rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, + *value, value + 1, len - 1); + if (rc) + goto out_featneg_failed; break; case DCCPO_CONFIRM_L: /* fall through */ @@ -292,8 +283,10 @@ out_nonsensical_length: out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; - DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + rc = DCCP_RESET_CODE_OPTION_ERROR; +out_featneg_failed: + DCCP_WARN("DCCP(%p): Option %d (len=%d) error=%u\n", sk, opt, len, rc); + DCCP_SKB_CB(skb)->dccpd_reset_code = rc; DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; -- cgit v1.2.3 From d2150b7bff3d397692cf0dc890f198d23564de5f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Processing Confirm options Analogous to the previous patch, this adds code to interpret incoming Confirm feature-negotiation options. Both functions operate on the feature-negotiation list of either the request_sock (server) or the dccp_sock (client). Thanks to Wei Yongjun for pointing out that it is overly restrictive to check the entire list of confirmed SP values. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index fb8466ea467a..3a9a22f0ac1a 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -134,26 +134,14 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s opt: NDP count=%llu\n", dccp_role(sk), (unsigned long long)opt_recv->dccpor_ndp); break; - case DCCPO_CHANGE_L: - case DCCPO_CHANGE_R: - if (pkt_type == DCCP_PKT_DATA) + case DCCPO_CHANGE_L ... DCCPO_CONFIRM_R: + if (pkt_type == DCCP_PKT_DATA) /* RFC 4340, 6 */ break; rc = dccp_feat_parse_options(sk, dreq, mandatory, opt, *value, value + 1, len - 1); if (rc) goto out_featneg_failed; break; - case DCCPO_CONFIRM_L: - /* fall through */ - case DCCPO_CONFIRM_R: - if (pkt_type == DCCP_PKT_DATA) - break; - if (len < 2) /* FIXME this disallows empty confirm */ - goto out_invalid_option; - if (dccp_feat_confirm_recv(sk, opt, *value, - value + 1, len - 1)) - goto out_invalid_option; - break; case DCCPO_ACK_VECTOR_0: case DCCPO_ACK_VECTOR_1: if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ -- cgit v1.2.3 From 68e074bfcef269bc61006c2740d7f89ccbbd93d7 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Remove manual influence on NDP Count feature Updating the NDP count feature is handled automatically now: * for CCID-2 it is disabled, since the code does not use NDP counts; * for CCID-3 it is enabled, as NDP counts are used to determine loss lengths. Allowing the user to change NDP values leads to unpredictable and failing behaviour, since it is then possible to disable NDP counts even when they are needed (e.g. in CCID-3). This means that only those user settings are sensible that agree with the values for Send NDP Count implied by the choice of CCID. But those settings are already activated by the feature negotiation (CCID dependency tracking), hence this form of support is redundant. At startup the initialisation of the NDP count feature is with the default value of 0, which is done implicitly by the zeroing-out of the socket when it is allocated. If the choice of CCID or feature negotiation enables NDP count, this will then be updated via the NDP activation handler. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 3a9a22f0ac1a..6b0704497e83 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -27,7 +27,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; -int sysctl_dccp_feat_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT; u64 dccp_decode_value_var(const u8 *bf, const u8 len) { @@ -531,8 +530,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_opt_len = 0; - if (dmsk->dccpms_send_ndp_count && - dccp_insert_option_ndp(sk, skb)) + if (dp->dccps_send_ndp_count && dccp_insert_option_ndp(sk, skb)) return -1; if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { -- cgit v1.2.3 From b235dc4abbc1356284bd0dc730efa711f394e0e2 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Phase out the use of boolean Ack Vector sysctl This removes the use of the sysctl and the minisock variable for the Send Ack Vector feature, which is now handled fully dynamically via feature negotiation; i.e. when CCID2 is enabled, Ack Vectors are automatically enabled (as per RFC 4341, 4.). Using a sysctl in parallel to this implementation would open the door to crashes, since much of the code relies on tests of the boolean minisock / sysctl variable. Thus, this patch replaces all tests of type if (dccp_msk(sk)->dccpms_send_ack_vector) /* ... */ with if (dp->dccps_hc_rx_ackvec != NULL) /* ... */ The dccps_hc_rx_ackvec is allocated by the dccp_hdlr_ackvec() when feature negotiation concluded that Ack Vectors are to be used on the half-connection. Otherwise, it is NULL (due to dccp_init_sock/dccp_create_openreq_child), so that the test is a valid one. The activation handler for Ack Vectors is called as soon as the feature negotiation has concluded at the * server when the Ack marking the transition RESPOND => OPEN arrives; * client after it has sent its ACK, marking the transition REQUEST => PARTOPEN. Adding the sequence number of the Response packet to the Ack Vector has been removed, since (a) connection establishment implies that the Response has been received; (b) the CCIDs only look at packets received in the (PART)OPEN state, i.e. this entry will always be ignored; (c) it can not be used for anything useful - to detect loss for instance, only packets received after the loss can serve as pseudo-dupacks. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 6b0704497e83..aca309e16632 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -26,7 +26,6 @@ int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR; u64 dccp_decode_value_var(const u8 *bf, const u8 len) { @@ -145,8 +144,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_ACK_VECTOR_1: if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ break; - - if (dccp_msk(sk)->dccpms_send_ack_vector && + if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) goto out_invalid_option; break; @@ -526,7 +524,6 @@ static void dccp_insert_option_padding(struct sk_buff *skb) int dccp_insert_options(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); DCCP_SKB_CB(skb)->dccpd_opt_len = 0; @@ -547,7 +544,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (dccp_insert_option_timestamp(sk, skb)) return -1; - } else if (dmsk->dccpms_send_ack_vector && + } else if (dp->dccps_hc_rx_ackvec != NULL && dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && dccp_insert_option_ackvec(sk, skb)) { return -1; -- cgit v1.2.3 From 0a4822679d94e2b0117aeead06a19fad59533905 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly act on the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Further changes: ---------------- Performed s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index aca309e16632..5906e96eedde 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -23,10 +23,6 @@ #include "dccp.h" #include "feat.h" -int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; -int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; - u64 dccp_decode_value_var(const u8 *bf, const u8 len) { u64 value = 0; -- cgit v1.2.3 From 76f738a7950b559a23ab3c692c99a02f35a54f7f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Debugging functions for feature negotiation Since all feature-negotiation processing now takes place in feat.c, functions for producing verbose debugging output are concentrated there. New functions to print out values, entry records, and options are provided, and also a macro is defined to not always have the function name in the output line. Thanks a lot to Wei Yongjun and Giuseppe Galeota for help with errors in an earlier revision of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald --- net/dccp/options.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 5906e96eedde..fd51cc70c63e 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -498,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, *to++ = *val; if (len) memcpy(to, val, len); - - dccp_pr_debug("%s(%s (%d), ...), length %d\n", - dccp_feat_typename(type), - dccp_feat_name(feat), feat, len); return 0; } -- cgit v1.2.3 From 3306c781ff13aea89606435c134ec84e3c608681 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Add packet type information to CCID-specific option parsing This patch ... 1. adds packet type information to ccid_hc_{rx,tx}_parse_options(). This is necessary, since table 3 in RFC 4340, 5.8 leaves it to the CCIDs to state which options may (not) appear on what packet type. 2. adds such a check for CCID-3's {Loss Event, Receive} Rate as specified in RFC 4340 8.3 ("Receive Rate options MUST NOT be sent on DCCP-Data packets") and 8.5 ("Loss Event Rate options MUST NOT be sent on DCCP-Data packets"). 3. removes an unused argument `idx' from ccid_hc_{rx,tx}_parse_options(). This is also no longer necessary, since the CCID-specific option-parsing routines are passed every single parameter of the type-length-value option encoding. Also added documentation and made argument naming scheme consistent. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index fd51cc70c63e..b102774694c6 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -226,23 +226,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", dccp_role(sk), elapsed_time); break; - case 128 ... 191: { - const u16 idx = value - options; - + case 128 ... 191: if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, - opt, len, idx, - value) != 0) + pkt_type, opt, value, len)) goto out_invalid_option; - } break; - case 192 ... 255: { - const u16 idx = value - options; - + case 192 ... 255: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, - opt, len, idx, - value) != 0) + pkt_type, opt, value, len)) goto out_invalid_option; - } break; default: DCCP_CRIT("DCCP(%p): option %d(len=%d) not " -- cgit v1.2.3 From f10ecaee6dc2c6d56783462b2a82e98bc81b55f4 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Replace magic CCID-specific numbers by symbolic constants The constants DCCPO_{MIN,MAX}_CCID_SPECIFIC are nowhere used in the code, but instead for the CCID-specific options numbers are used. This patch unifies the use of CCID-specific option numbers, by adding symbolic names reflecting the definitions in RFC 4340, 10.3. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index b102774694c6..e1c94e2b8be0 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, } /* - * CCID-Specific Options (from RFC 4340, sec. 10.3): - * - * Option numbers 128 through 191 are for options sent from the - * HC-Sender to the HC-Receiver; option numbers 192 through 255 - * are for options sent from the HC-Receiver to the HC-Sender. - * * CCID-specific options are ignored during connection setup, as * negotiation may still be in progress (see RFC 4340, 10.3). * The same applies to Ack Vectors, as these depend on the CCID. - * */ - if (dreq != NULL && (opt >= 128 || + if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC || opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) goto ignore_option; @@ -226,12 +219,12 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", dccp_role(sk), elapsed_time); break; - case 128 ... 191: + case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC: if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, pkt_type, opt, value, len)) goto out_invalid_option; break; - case 192 ... 255: + case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) goto out_invalid_option; -- cgit v1.2.3 From 2975abd251d795810932b20354729ba236d95bf9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp: Schedule an Ack when receiving timestamps This schedules an Ack when receiving a timestamp, exploiting the existing inet_csk_schedule_ack() function, saving one case in the `dccp_ack_pending()' function. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index e1c94e2b8be0..9fe0510f4022 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -163,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, dccp_role(sk), ntohl(opt_val), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); + /* schedule an Ack in case this sender is quiescent */ + inet_csk_schedule_ack(sk); break; case DCCPO_TIMESTAMP_ECHO: if (len != 4 && len != 6 && len != 8) -- cgit v1.2.3 From 4829007c7bc689cbc290fc09eccbe90bd52c2a5e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Separate internals of Ack Vectors from option-parsing code This patch * separates Ack Vector housekeeping code from option-insertion code; * shifts option-specific code from ackvec.c into options.c; * introduces a dedicated routine to take care of the Ack Vector records; * simplifies the dccp_ackvec_insert_avr() routine: the BUG_ON was redundant, since the list is automatically arranged in descending order of ack_seqno. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 9fe0510f4022..392d7db31342 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -428,6 +428,66 @@ static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, return 0; } +static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + /* Figure out how many options do we need to represent the ackvec */ + const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); + u16 len = av->av_vec_len + 2 * nr_opts; + u8 i, nonce = 0; + const unsigned char *tail, *from; + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return -1; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + len = av->av_vec_len; + from = av->av_buf + av->av_buf_head; + tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; + + for (i = 0; i < nr_opts; ++i) { + int copylen = len; + + if (len > DCCP_SINGLE_OPT_MAXLEN) + copylen = DCCP_SINGLE_OPT_MAXLEN; + + /* + * RFC 4340, 12.2: Encode the Nonce Echo for this Ack Vector via + * its type; ack_nonce is the sum of all individual buf_nonce's. + */ + nonce ^= av->av_buf_nonce[i]; + + *to++ = DCCPO_ACK_VECTOR_0 + av->av_buf_nonce[i]; + *to++ = copylen + 2; + + /* Check if buf_head wraps */ + if (from + copylen > tail) { + const u16 tailsize = tail - from; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + copylen -= tailsize; + from = av->av_buf; + } + + memcpy(to, from, copylen); + from += copylen; + to += copylen; + len -= copylen; + } + /* + * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. + */ + if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) + return -ENOBUFS; + return 0; +} + /** * dccp_insert_option_mandatory - Mandatory option (5.8.2) * Note that since we are using skb_push, this function needs to be called -- cgit v1.2.3 From d7dc7e5f49299739e610ea8febf9ea91a4dc1ae9 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Implementation of circular Ack Vector buffer with overflow handling This completes the implementation of a circular buffer for Ack Vectors, by extending the current (linear array-based) implementation. The changes are: (a) An `overflow' flag to deal with the case of overflow. As before, dynamic growth of the buffer will not be supported; but code will be added to deal robustly with overflowing Ack Vector buffers. (b) A `tail_seqno' field. When naively implementing the algorithm of Appendix A in RFC 4340, problems arise whenever subsequent Ack Vector records overlap, which can bring the entire run length calculation completely out of synch. (This is documented on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/\ ack_vectors/tracking_tail_ackno/ .) (c) The buffer lengthi is now computed dynamically (i.e. current fill level), as the span between head to tail. As a result, dccp_ackvec_pending() is now simpler - the #ifdef is no longer necessary since buf_empty is always true when IP_DCCP_ACKVEC is not configured. Note on overflow handling: ------------------------- The Ack Vector code previously simply started to drop packets when the Ack Vector buffer overflowed. This means that the userspace application will not be able to receive, only because of an Ack Vector storage problem. Furthermore, overflow may be transient, so that applications may later recover from the overflow. Recovering from dropped packets is more difficult (e.g. video key frames). Hence the patch uses a different policy: when the buffer overflows, the oldest entries are subsequently overwritten. This has a higher chance of recovery. Details are on http://www.erg.abdn.ac.uk/users/gerrit/dccp/notes/ack_vectors/ Signed-off-by: Gerrit Renker --- net/dccp/options.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 392d7db31342..3163ae980f16 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -432,9 +432,10 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + const u16 buflen = dccp_ackvec_buflen(av); /* Figure out how many options do we need to represent the ackvec */ - const u8 nr_opts = DIV_ROUND_UP(av->av_vec_len, DCCP_SINGLE_OPT_MAXLEN); - u16 len = av->av_vec_len + 2 * nr_opts; + const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); + u16 len = buflen + 2 * nr_opts; u8 i, nonce = 0; const unsigned char *tail, *from; unsigned char *to; @@ -445,7 +446,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_opt_len += len; to = skb_push(skb, len); - len = av->av_vec_len; + len = buflen; from = av->av_buf + av->av_buf_head; tail = av->av_buf + DCCPAV_MAX_ACKVEC_LEN; @@ -583,8 +584,7 @@ int dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (dccp_insert_option_timestamp(sk, skb)) return -1; - } else if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec) && + } else if (dccp_ackvec_pending(sk) && dccp_insert_option_ackvec(sk, skb)) { return -1; } -- cgit v1.2.3 From 68b1de15765f2b0e0925e692dab2b2fa2abd93fc Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Algorithm to update buffer state This provides a routine to consistently update the buffer state when the peer acknowledges receipt of Ack Vectors; updating state in the list of Ack Vectors as well as in the circular buffer. While based on RFC 4340, several additional (and necessary) precautions were added to protect the consistency of the buffer state. These additions are essential, since analysis and experience showed that the basic algorithm was insufficient for this task (which lead to problems that were hard to debug). The algorithm now * deals with HC-sender acknowledging to HC-receiver and vice versa, * keeps track of the last unacknowledged but received seqno in tail_ackno, * has special cases to reset the overflow condition when appropriate, * is protected against receiving older information (would mess up buffer state). Note: The older code performed an unnecessary step, where the sender cleared Ack Vector state by parsing the Ack Vector received by the HC-receiver. Doing this was entirely redundant, since * the receiver always puts the full acknowledgment window (groups 2,3 in 11.4.2) into the Ack Vectors it sends; hence the HC-receiver is only interested in the highest state that the HC-sender received; * this means that the acknowledgment number on the (Data)Ack from the HC-sender is sufficient; and work done in parsing earlier state is not necessary, since the later state subsumes the earlier one (see also RFC 4340, A.4). This older interface (dccp_ackvec_parse()) is therefore removed. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 3163ae980f16..b11d7b7167f0 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -54,7 +54,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct dccp_sock *dp = dccp_sk(sk); const struct dccp_hdr *dh = dccp_hdr(skb); const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; - u64 ackno = DCCP_SKB_CB(skb)->dccpd_ack_seq; unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); unsigned char *opt_ptr = options; const unsigned char *opt_end = (unsigned char *)dh + @@ -133,9 +132,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, case DCCPO_ACK_VECTOR_1: if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ break; - if (dp->dccps_hc_rx_ackvec != NULL && - dccp_ackvec_parse(sk, skb, &ackno, opt, value, len)) - goto out_invalid_option; + dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk), + len); break; case DCCPO_TIMESTAMP: if (len != 4) -- cgit v1.2.3 From c2f42077bd06f300ae959204f3c007f820f5e769 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Schedule Sync as out-of-band mechanism The problem with Ack Vectors is that i) their length is variable and can in principle grow quite large, ii) it is hard to predict exactly how large they will be. Due to the second point it seems not a good idea to reduce the MPS; in particular when on average there is enough room for the Ack Vector and an increase in length is momentarily due to some burst loss, after which the Ack Vector returns to its normal/average length. The solution taken by this patch is to subtract a minimum-expected Ack Vector length from the MPS (previous patch), and to defer any larger Ack Vectors onto a separate Sync - but only if indeed there is no space left on the skb. This patch provides the infrastructure to schedule Sync-packets for transporting (urgent) out-of-band data. Its signalling is quicker than scheduling an Ack, since it does not need to wait for new application data. It can thus serve other parts of the DCCP code as well. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index b11d7b7167f0..791e07853a79 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -430,6 +430,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const u16 buflen = dccp_ackvec_buflen(av); /* Figure out how many options do we need to represent the ackvec */ const u8 nr_opts = DIV_ROUND_UP(buflen, DCCP_SINGLE_OPT_MAXLEN); @@ -438,10 +439,25 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) const unsigned char *tail, *from; unsigned char *to; - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + if (dcb->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + DCCP_WARN("Lacking space for %u bytes on %s packet\n", len, + dccp_packet_name(dcb->dccpd_type)); return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; + } + /* + * Since Ack Vectors are variable-length, we can not always predict + * their size. To catch exception cases where the space is running out + * on the skb, a separate Sync is scheduled to carry the Ack Vector. + */ + if (len > DCCPAV_MIN_OPTLEN && + len + dcb->dccpd_opt_len + skb->len > dp->dccps_mss_cache) { + DCCP_WARN("No space left for Ack Vector (%u) on skb (%u+%u), " + "MPS=%u ==> reduce payload size?\n", len, skb->len, + dcb->dccpd_opt_len, dp->dccps_mss_cache); + dp->dccps_sync_scheduled = 1; + return 0; + } + dcb->dccpd_opt_len += len; to = skb_push(skb, len); len = buflen; @@ -482,7 +498,7 @@ static int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) /* * Each sent Ack Vector is recorded in the list, as per A.2 of RFC 4340. */ - if (dccp_ackvec_update_records(av, DCCP_SKB_CB(skb)->dccpd_seq, nonce)) + if (dccp_ackvec_update_records(av, dcb->dccpd_seq, nonce)) return -ENOBUFS; return 0; } -- cgit v1.2.3 From c8bf462bc567c3dcb083ff95cc13060dd06f138c Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Thu, 4 Sep 2008 07:30:19 +0200 Subject: dccp ccid-2: Separate option parsing from CCID processing This patch replaces an almost identical replication of code: large parts of dccp_parse_options() re-appeared as ccid2_ackvector() in ccid2.c. Apart from the duplication, this caused two more problems: 1. CCIDs should not need to be concerned with parsing header options; 2. one can not assume that Ack Vectors appear as a contiguous area within an skb, it is legal to insert other options and/or padding in between. The current code would throw an error and stop reading in such a case. The patch provides a new data structure and associated list housekeeping. Only small changes were necessary to integrate with CCID-2: data structure initialisation, adapt list traversal routine, and add call to the provided cleanup routine. The latter also lead to fixing the following BUG: CCID-2 so far ignored Ack Vectors on all packets other than Ack/DataAck, which is incorrect, since Ack Vectors can be present on any packet that has an Ack field. Details: -------- * received Ack Vectors are parsed by dccp_parse_options() alone, which passes the result on to the CCID-specific routine ccid_hc_tx_parse_options(); * CCIDs interested in using/decoding Ack Vector information will add code to fetch parsed Ack Vectors via this interface; * a data structure, `struct dccp_ackvec_parsed' is provided as interface; * this structure arranges Ack Vectors of the same skb into a FIFO order; * a doubly-linked list is used to keep the required FIFO code small. Signed-off-by: Gerrit Renker --- net/dccp/options.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'net/dccp/options.c') diff --git a/net/dccp/options.c b/net/dccp/options.c index 791e07853a79..e5a32979d7d7 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -128,13 +128,6 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, if (rc) goto out_featneg_failed; break; - case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: - if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ - break; - dccp_pr_debug("%s Ack Vector (len=%u)\n", dccp_role(sk), - len); - break; case DCCPO_TIMESTAMP: if (len != 4) goto out_invalid_option; @@ -224,6 +217,16 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, pkt_type, opt, value, len)) goto out_invalid_option; break; + case DCCPO_ACK_VECTOR_0: + case DCCPO_ACK_VECTOR_1: + if (dccp_packet_without_ack(skb)) /* RFC 4340, 11.4 */ + break; + /* + * Ack vectors are processed by the TX CCID if it is + * interested. The RX CCID need not parse Ack Vectors, + * since it is only interested in clearing old state. + * Fall through. + */ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) -- cgit v1.2.3