#!/bin/sh # # SPDX-License-Identifier: GPL-2.0-only # DESCR: Check for non-ASCII and unprintable characters LC_ALL=C export LC_ALL INCLUDED_FILES='\.[chsS]$\|\.asl$\|\.cb$\|\.inc$\|Kconfig\|\.ld$|\.txt\|\.hex' EXCLUDED_DIRS='^payloads/external/\|^src/vendorcode/\|^Documentation/' EXCLUDED_FILES='to-wiki/towiki\.sh$\|vga/vga_font\|video/font\|PDCurses.*x11' EXCLUDED_PHRASES='Copyright\|Ported to\|Intel®\|°C\|°F\|Athlon™\|Copyright.*©' # Exit if git isn't present or the code isn't in a git repo if [ -z "$(command -v git)" ] || \ [ "$(git rev-parse --is-inside-work-tree 2>/dev/null)" != "true" ] then exit fi # 1. Get the list of files to parse and send them through grep # 2. Find any characters that aren't TAB, or space (0x20) to ~ (0x7F) # LF (0x10) isn't included, as it ends the grep line # 3. Remove common phrases and names that have been found # 4. Run the result through grep again to highlight the issues that were # found. Without this step, the characters can be difficult to see. # shellcheck disable=SC2046 git grep -lP "[^\t-~]" | \ grep "$INCLUDED_FILES" | \ grep -v "$EXCLUDED_DIRS" | \ grep -v "$EXCLUDED_FILES" | \ xargs -I % \ grep -n "[^ -~]" % | \ grep -iv "$EXCLUDED_PHRASES" | \ grep --color='auto' "[^ -~]"