summaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/poly1305-sse2-x86_64.S
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-04-25 23:52:29 -0400
committerDavid S. Miller <davem@davemloft.net>2019-04-25 23:52:29 -0400
commit8b4483658364f05b2e32845c8f445cdfd9452286 (patch)
tree4734ef83378b1e90475ec4776ac72dfe16a7249b /arch/x86/crypto/poly1305-sse2-x86_64.S
parentc049d56eb219661c9ae48d596c3e633973f89d1f (diff)
parentcd8dead0c39457e58ec1d36db93aedca811d48f1 (diff)
downloadlinux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.gz
linux-8b4483658364f05b2e32845c8f445cdfd9452286.tar.bz2
linux-8b4483658364f05b2e32845c8f445cdfd9452286.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Two easy cases of overlapping changes. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch/x86/crypto/poly1305-sse2-x86_64.S')
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S22
1 files changed, 14 insertions, 8 deletions
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index e6add74d78a5..6f0be7a86964 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -253,16 +253,16 @@ ENTRY(poly1305_block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx
@@ -524,6 +524,12 @@ ENTRY(poly1305_2block_sse2)
paddq t2,t1
movq t1,d4
+ # Now do a partial reduction mod (2^130)-5, carrying h0 -> h1 -> h2 ->
+ # h3 -> h4 -> h0 -> h1 to get h0,h2,h3,h4 < 2^26 and h1 < 2^26 + a small
+ # amount. Careful: we must not assume the carry bits 'd0 >> 26',
+ # 'd1 >> 26', 'd2 >> 26', 'd3 >> 26', and '(d4 >> 26) * 5' fit in 32-bit
+ # integers. It's true in a single-block implementation, but not here.
+
# d1 += d0 >> 26
mov d0,%rax
shr $26,%rax
@@ -562,16 +568,16 @@ ENTRY(poly1305_2block_sse2)
# h0 += (d4 >> 26) * 5
mov d4,%rax
shr $26,%rax
- lea (%eax,%eax,4),%eax
- add %eax,%ebx
+ lea (%rax,%rax,4),%rax
+ add %rax,%rbx
# h4 = d4 & 0x3ffffff
mov d4,%rax
and $0x3ffffff,%eax
mov %eax,h4
# h1 += h0 >> 26
- mov %ebx,%eax
- shr $26,%eax
+ mov %rbx,%rax
+ shr $26,%rax
add %eax,h1
# h0 = h0 & 0x3ffffff
andl $0x3ffffff,%ebx