diff options
author | David S. Miller <davem@davemloft.net> | 2019-04-25 23:52:29 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-04-25 23:52:29 -0400 |
commit | 8b4483658364f05b2e32845c8f445cdfd9452286 (patch) | |
tree | 4734ef83378b1e90475ec4776ac72dfe16a7249b /arch/x86/crypto/poly1305-sse2-x86_64.S | |
parent | c049d56eb219661c9ae48d596c3e633973f89d1f (diff) | |
parent | cd8dead0c39457e58ec1d36db93aedca811d48f1 (diff) | |
download | linux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.gz linux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.bz2 linux-8b4483658364f05b2e32845c8f445cdfd9452286.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Two easy cases of overlapping changes.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86/crypto/poly1305-sse2-x86_64.S')
-rw-r--r-- | arch/x86/crypto/poly1305-sse2-x86_64.S | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index e6add74d78a5..6f0be7a86964 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx @@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2) paddq t2,t1 movq t1,d4 + # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 -> + # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small + # amount. Careful: we must not assume the carry bits 'd0 >> 26', + # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit + # integers. It's true in a single-block implementation, but not here. + # d1 += d0 >> 26 mov d0,%rax shr $26,%rax @@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2) # h0 += (d4 >> 26) * 5 mov d4,%rax shr $26,%rax - lea (%eax,%eax,4),%eax - add %eax,%ebx + lea (%rax,%rax,4),%rax + add %rax,%rbx # h4 = d4 & 0x3ffffff mov d4,%rax and $0x3ffffff,%eax mov %eax,h4 # h1 += h0 >> 26 - mov %ebx,%eax - shr $26,%eax + mov %rbx,%rax + shr $26,%rax add %eax,h1 # h0 = h0 & 0x3ffffff andl $0x3ffffff,%ebx |