From 7552eb210c1681747bfac6030af9f74d25330595 Mon Sep 17 00:00:00 2001 From: Arthur Heymans Date: Wed, 30 Nov 2022 23:24:31 +0100 Subject: lib/lzmadecode: Allow for 8 byte reads on 64bit This adds an optimization to lzma decode to also read from the boot medium in chunks of 8 bytes if that is the general purpose register length instead of always 4 bytes. It depends on the cache / memory / spi controller whether this is faster, but it's likely to be either the same or faster. TESTED - google/vilboz: cached boot medium 64bit before - 32bit - 64bit after load FSP-M: 35,674 - 35,595 - 34,690 load ramstage: 42,134 - 43,378 - 40,882 load FSP-S: 24,954 - 25,496 - 24,368 - foxconn/g41m: uncached boot medium for testing 64bit before - 32bit - 64bit after load ramstage: 51,164 - 51,872 - 51,894 Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa Signed-off-by: Arthur Heymans Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175 Tested-by: build bot (Jenkins) Reviewed-by: Lean Sheng Tan Reviewed-by: Julius Werner --- src/lib/lzmadecode.c | 18 +++++++++--------- src/lib/lzmadecode.h | 4 +++- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c index cb868290aaaa..5c6baa4160bf 100644 --- a/src/lib/lzmadecode.c +++ b/src/lib/lzmadecode.c @@ -35,15 +35,15 @@ #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 -/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back - * to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim +/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back + * to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim * is *reached* (not surpassed!), meaning we can't allow that to happen while * there are still bytes to decode from the algorithm's point of view. */ #define RC_READ_BYTE \ - (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \ - : ((((uintptr_t) Buffer & 3) \ - || ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \ - : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \ + (look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \ + : ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \ + || ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \ + : ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \ (look_ahead_ptr = 1), look_ahead.raw[0]))) #define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \ @@ -207,10 +207,10 @@ int LzmaDecode(CLzmaDecoderState *vs, int len = 0; const Byte *Buffer; const Byte *BufferLim; - int look_ahead_ptr = 4; + int look_ahead_ptr = sizeof(SizeT); union { - Byte raw[4]; - UInt32 dw; + Byte raw[sizeof(SizeT)]; + SizeT dw; } look_ahead; UInt32 Range; UInt32 Code; diff --git a/src/lib/lzmadecode.h b/src/lib/lzmadecode.h index 9ed352a564a7..5498061762a8 100644 --- a/src/lib/lzmadecode.h +++ b/src/lib/lzmadecode.h @@ -22,10 +22,12 @@ #ifndef __LZMADECODE_H #define __LZMADECODE_H +#include + typedef unsigned char Byte; typedef unsigned short UInt16; typedef unsigned int UInt32; -typedef UInt32 SizeT; +typedef size_t SizeT; #define CProb UInt16 -- cgit v1.2.3