summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArthur Heymans <arthur@aheymans.xyz>2022-11-30 23:24:31 +0100
committerLean Sheng Tan <sheng.tan@9elements.com>2024-02-21 16:19:05 +0000
commit7552eb210c1681747bfac6030af9f74d25330595 (patch)
tree0c2d40efda2474f4edaedb7d77a8a42ce30ecf6a
parentf317068fc3138162a59b802773795edfe3e8a8ef (diff)
downloadcoreboot-7552eb210c1681747bfac6030af9f74d25330595.tar.gz
coreboot-7552eb210c1681747bfac6030af9f74d25330595.tar.bz2
coreboot-7552eb210c1681747bfac6030af9f74d25330595.zip
lib/lzmadecode: Allow for 8 byte reads on 64bit
This adds an optimization to lzma decode to also read from the boot medium in chunks of 8 bytes if that is the general purpose register length instead of always 4 bytes. It depends on the cache / memory / spi controller whether this is faster, but it's likely to be either the same or faster. TESTED - google/vilboz: cached boot medium 64bit before - 32bit - 64bit after load FSP-M: 35,674 - 35,595 - 34,690 load ramstage: 42,134 - 43,378 - 40,882 load FSP-S: 24,954 - 25,496 - 24,368 - foxconn/g41m: uncached boot medium for testing 64bit before - 32bit - 64bit after load ramstage: 51,164 - 51,872 - 51,894 Change-Id: I890c075307c0aec877618d9902ea352ae42a3bfa Signed-off-by: Arthur Heymans <arthur@aheymans.xyz> Reviewed-on: https://review.coreboot.org/c/coreboot/+/70175 Tested-by: build bot (Jenkins) <no-reply@coreboot.org> Reviewed-by: Lean Sheng Tan <sheng.tan@9elements.com> Reviewed-by: Julius Werner <jwerner@chromium.org>
-rw-r--r--src/lib/lzmadecode.c18
-rw-r--r--src/lib/lzmadecode.h4
2 files changed, 12 insertions, 10 deletions
diff --git a/src/lib/lzmadecode.c b/src/lib/lzmadecode.c
index cb868290aaaa..5c6baa4160bf 100644
--- a/src/lib/lzmadecode.c
+++ b/src/lib/lzmadecode.c
@@ -35,15 +35,15 @@
#define kBitModelTotal (1 << kNumBitModelTotalBits)
#define kNumMoveBits 5
-/* Use 32-bit reads whenever possible to avoid bad flash performance. Fall back
- * to byte reads for last 4 bytes since RC_TEST returns an error when BufferLim
+/* Use sizeof(SizeT) sized reads whenever possible to avoid bad flash performance. Fall back
+ * to byte reads for last sizeof(SizeT) bytes since RC_TEST returns an error when BufferLim
* is *reached* (not surpassed!), meaning we can't allow that to happen while
* there are still bytes to decode from the algorithm's point of view. */
#define RC_READ_BYTE \
- (look_ahead_ptr < 4 ? look_ahead.raw[look_ahead_ptr++] \
- : ((((uintptr_t) Buffer & 3) \
- || ((SizeT) (BufferLim - Buffer) <= 4)) ? (*Buffer++) \
- : ((look_ahead.dw = *(UInt32 *)Buffer), (Buffer += 4), \
+ (look_ahead_ptr < sizeof(SizeT) ? look_ahead.raw[look_ahead_ptr++] \
+ : ((((uintptr_t) Buffer & (sizeof(SizeT) - 1)) \
+ || ((SizeT) (BufferLim - Buffer) <= sizeof(SizeT))) ? (*Buffer++) \
+ : ((look_ahead.dw = *(SizeT *)Buffer), (Buffer += sizeof(SizeT)), \
(look_ahead_ptr = 1), look_ahead.raw[0])))
#define RC_INIT2 Code = 0; Range = 0xFFFFFFFF; \
@@ -207,10 +207,10 @@ int LzmaDecode(CLzmaDecoderState *vs,
int len = 0;
const Byte *Buffer;
const Byte *BufferLim;
- int look_ahead_ptr = 4;
+ int look_ahead_ptr = sizeof(SizeT);
union {
- Byte raw[4];
- UInt32 dw;
+ Byte raw[sizeof(SizeT)];
+ SizeT dw;
} look_ahead;
UInt32 Range;
UInt32 Code;
diff --git a/src/lib/lzmadecode.h b/src/lib/lzmadecode.h
index 9ed352a564a7..5498061762a8 100644
--- a/src/lib/lzmadecode.h
+++ b/src/lib/lzmadecode.h
@@ -22,10 +22,12 @@
#ifndef __LZMADECODE_H
#define __LZMADECODE_H
+#include <types.h>
+
typedef unsigned char Byte;
typedef unsigned short UInt16;
typedef unsigned int UInt32;
-typedef UInt32 SizeT;
+typedef size_t SizeT;
#define CProb UInt16