From b5265c813ce4efbfa2e46fd27cdf9a7f44a35d2e Mon Sep 17 00:00:00 2001 From: Dave Rodgman Date: Thu, 11 Jun 2020 17:34:54 -0700 Subject: lib/lzo: fix ambiguous encoding bug in lzo-rle In some rare cases, for input data over 32 KB, lzo-rle could encode two different inputs to the same compressed representation, so that decompression is then ambiguous (i.e. data may be corrupted - although zram is not affected because it operates over 4 KB pages). This modifies the compressor without changing the decompressor or the bitstream format, such that: - there is no change to how data produced by the old compressor is decompressed - an old decompressor will correctly decode data from the updated compressor - performance and compression ratio are not affected - we avoid introducing a new bitstream format In testing over 12.8M real-world files totalling 903 GB, three files were affected by this bug. I also constructed 37M semi-random 64 KB files totalling 2.27 TB, and saw no affected files. Finally I tested over files constructed to contain each of the ~1024 possible bad input sequences; for all of these cases, updated lzo-rle worked correctly. There is no significant impact to performance or compression ratio. Signed-off-by: Dave Rodgman Signed-off-by: Andrew Morton Cc: Mark Rutland Cc: Dave Rodgman Cc: Willy Tarreau Cc: Sergey Senozhatsky Cc: Markus F.X.J. Oberhumer Cc: Minchan Kim Cc: Nitin Gupta Cc: Chao Yu Cc: Link: http://lkml.kernel.org/r/20200507100203.29785-1-dave.rodgman@arm.com Signed-off-by: Linus Torvalds --- lib/lzo/lzo1x_compress.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'lib') diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c index 717c940112f9..8ad5ba2b86e2 100644 --- a/lib/lzo/lzo1x_compress.c +++ b/lib/lzo/lzo1x_compress.c @@ -268,6 +268,19 @@ m_len_done: *op++ = (M4_MARKER | ((m_off >> 11) & 8) | (m_len - 2)); else { + if (unlikely(((m_off & 0x403f) == 0x403f) + && (m_len >= 261) + && (m_len <= 264)) + && likely(bitstream_version)) { + // Under lzo-rle, block copies + // for 261 <= length <= 264 and + // (distance & 0x80f3) == 0x80f3 + // can result in ambiguous + // output. Adjust length + // to 260 to prevent ambiguity. + ip -= m_len - 260; + m_len = 260; + } m_len -= M4_MAX_LEN; *op++ = (M4_MARKER | ((m_off >> 11) & 8)); while (unlikely(m_len > 255)) { -- cgit v1.2.3