summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@linux-foundation.org>2007-03-22 12:10:58 -0700
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-25 22:27:19 -0700
commit7e58886b45bc4a309aeaa8178ef89ff767daaf7f (patch)
treecd375c5d4a6ec5584531992f8bdbcae775659ed6
parent22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c (diff)
downloadlinux-7e58886b45bc4a309aeaa8178ef89ff767daaf7f.tar.gz
linux-7e58886b45bc4a309aeaa8178ef89ff767daaf7f.tar.bz2
linux-7e58886b45bc4a309aeaa8178ef89ff767daaf7f.zip
[TCP]: cubic optimization
Use willy's work in optimizing cube root by having table for small values. Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_cubic.c50
1 files changed, 39 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0e6cdfeb207a..15c580375002 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/*
- * calculate the cubic root of x using Newton-Raphson
+/* calculate the cubic root of x using a table lookup followed by one
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
*/
static u32 cubic_root(u64 a)
{
- u32 x;
-
- /* Initial estimate is based on:
- * cbrt(x) = exp(log(x) / 3)
+ u32 x, b, shift;
+ /*
+ * cbrt(x) MSB values for x MSB values in [0..63].
+ * Precomputed then refined by hand - Willy Tarreau
+ *
+ * For x in [0..63],
+ * v = cbrt(x << 18) - 1
+ * cbrt(x) = (v[x] + 10) >> 6
*/
- x = 1u << (fls64(a)/3);
+ static const u8 v[] = {
+ /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
+ /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
+ /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
+ /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
+ /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
+ /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
+ /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
+ /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
+ };
+
+ b = fls64(a);
+ if (b < 7) {
+ /* a in [0..63] */
+ return ((u32)v[(u32)a] + 35) >> 6;
+ }
+
+ b = ((b * 84) >> 8) - 1;
+ shift = (a >> (b * 3));
- /* converges to 32 bits in 3 iterations */
- x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
- x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
- x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+ x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
+ /*
+ * Newton-Raphson iteration
+ * 2
+ * x = ( 2 * x + a / x ) / 3
+ * k+1 k k
+ */
+ x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+ x = ((x * 341) >> 10);
return x;
}