## @file
# Check a patch for various format issues
#
# Copyright (c) 2015 - 2021, Intel Corporation. All rights reserved.
# Copyright (C) 2020, Red Hat, Inc.
# Copyright (c) 2020 - 2023, Arm Limited. All rights reserved.
#
# SPDX-License-Identifier: BSD-2-Clause-Patent
#
from __future__ import print_function
VersionNumber = '0.1'
__copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation All rights reserved."
import email
import argparse
import os
import re
import subprocess
import sys
import email.header
class Verbose:
SILENT, ONELINE, NORMAL = range(3)
level = NORMAL
class PatchCheckConf:
ignore_change_id = False
ignore_multi_package = False
class EmailAddressCheck:
"""Checks an email address."""
def __init__(self, email, description):
self.ok = True
if email is None:
self.error('Email address is missing!')
return
if description is None:
self.error('Email description is missing!')
return
self.description = "'" + description + "'"
self.check_email_address(email)
def error(self, *err):
if self.ok and Verbose.level > Verbose.ONELINE:
print('The ' + self.description + ' email address is not valid:')
self.ok = False
if Verbose.level < Verbose.NORMAL:
return
count = 0
for line in err:
prefix = (' *', ' ')[count > 0]
print(prefix, line)
count += 1
email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
re.MULTILINE|re.IGNORECASE)
def check_email_address(self, email):
email = email.strip()
mo = self.email_re1.match(email)
if mo is None:
self.error("Email format is invalid: " + email.strip())
return
name = mo.group(1).strip()
if name == '':
self.error("Name is not provided with email address: " +
email)
else:
quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
if name.find(',') >= 0 and not quoted:
self.error('Add quotes (") around name with a comma: ' +
name)
if mo.group(2) == '':
self.error("There should be a space between the name and " +
"email address: " + email)
if mo.group(3).find(' ') >= 0:
self.error("The email address cannot contain a space: " +
mo.group(3))
if mo.group(3) == 'devel@edk2.groups.io':
self.error("Email rewritten by lists DMARC / DKIM / SPF: " +
email)
if ' via groups.io' in name.lower() and mo.group(3).endswith('@groups.io'):
self.error("Email rewritten by lists DMARC / DKIM / SPF: " +
email)
class CommitMessageCheck:
"""Checks the contents of a git commit message."""
def __init__(self, subject, message, author_email):
self.ok = True
self.ignore_multi_package = False
if subject is None and message is None:
self.error('Commit message is missing!')
return
MergifyMerge = False
if "mergify[bot]@users.noreply.github.com" in author_email:
if "Merge branch" in subject:
MergifyMerge = True
self.subject = subject
self.msg = message
print (subject)
self.check_contributed_under()
if not MergifyMerge:
self.check_signed_off_by()
self.check_misc_signatures()
self.check_overall_format()
if not PatchCheckConf.ignore_change_id:
self.check_change_id_format()
self.check_ci_options_format()
self.report_message_result()
url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
def report_message_result(self):
if Verbose.level < Verbose.NORMAL:
return
if self.ok:
# All checks passed
return_code = 0
print('The commit message format passed all checks.')
else:
return_code = 1
if not self.ok:
print(self.url)
def error(self, *err):
if self.ok and Verbose.level > Verbose.ONELINE:
print('The commit message format is not valid:')
self.ok = False
if Verbose.level < Verbose.NORMAL:
return
count = 0
for line in err:
prefix = (' *', ' ')[count > 0]
print(prefix, line)
count += 1
# Find 'contributed-under:' at the start of a line ignoring case and
# requires ':' to be present. Matches if there is white space before
# the tag or between the tag and the ':'.
contributed_under_re = \
re.compile(r'^\s*contributed-under\s*:', re.MULTILINE|re.IGNORECASE)
def check_contributed_under(self):
match = self.contributed_under_re.search(self.msg)
if match is not None:
self.error('Contributed-under! (Note: this must be ' +
'removed by the code contributor!)')
@staticmethod
def make_signature_re(sig, re_input=False):
if re_input:
sub_re = sig
else:
sub_re = sig.replace('-', r'[-\s]+')
re_str = (r'^(?P' + sub_re +
r')(\s*):(\s*)(?P\S.*?)(?:\s*)$')
try:
return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
except Exception:
print("Tried to compile re:", re_str)
raise
sig_block_re = \
re.compile(r'''^
(?: (?P[^:]+) \s* : \s*
(?P\S.*?) )
|
(?: \[ (?P[^:]+) \s* : \s*
(?P.+?) \s* \] )
\s* $''',
re.VERBOSE | re.MULTILINE)
def find_signatures(self, sig):
if not sig.endswith('-by') and sig != 'Cc':
sig += '-by'
regex = self.make_signature_re(sig)
sigs = regex.findall(self.msg)
bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
for s in bad_case_sigs:
self.error("'" +s[0] + "' should be '" + sig + "'")
for s in sigs:
if s[1] != '':
self.error('There should be no spaces between ' + sig +
" and the ':'")
if s[2] != ' ':
self.error("There should be a space after '" + sig + ":'")
self.ok &= EmailAddressCheck(s[3], sig).ok
return sigs
def check_signed_off_by(self):
sob='Signed-off-by'
if self.msg.find(sob) < 0:
self.error('Missing Signed-off-by! (Note: this must be ' +
'added by the code contributor!)')
return
sobs = self.find_signatures('Signed-off')
if len(sobs) == 0:
self.error('Invalid Signed-off-by format!')
return
sig_types = (
'Reviewed',
'Reported',
'Tested',
'Suggested',
'Acked',
'Cc'
)
def check_misc_signatures(self):
for sigtype in self.sig_types:
sigs = self.find_signatures(sigtype)
cve_re = re.compile('CVE-[0-9]{4}-[0-9]{5}[^0-9]')
def check_overall_format(self):
lines = self.msg.splitlines()
if len(lines) >= 1 and lines[0].endswith('\r\n'):
empty_line = '\r\n'
else:
empty_line = '\n'
lines.insert(0, empty_line)
lines.insert(0, self.subject + empty_line)
count = len(lines)
if count <= 0:
self.error('Empty commit message!')
return
if count >= 1 and re.search(self.cve_re, lines[0]):
#
# If CVE-xxxx-xxxxx is present in subject line, then limit length of
# subject line to 92 characters
#
if len(lines[0].rstrip()) >= 93:
self.error(
'First line of commit message (subject line) is too long (%d >= 93).' %
(len(lines[0].rstrip()))
)
else:
#
# If CVE-xxxx-xxxxx is not present in subject line, then limit
# length of subject line to 75 characters
#
if len(lines[0].rstrip()) >= 76:
self.error(
'First line of commit message (subject line) is too long (%d >= 76).' %
(len(lines[0].rstrip()))
)
if count >= 1 and len(lines[0].strip()) == 0:
self.error('First line of commit message (subject line) ' +
'is empty.')
if count >= 2 and lines[1].strip() != '':
self.error('Second line of commit message should be ' +
'empty.')
for i in range(2, count):
if (len(lines[i]) >= 76 and
len(lines[i].split()) > 1 and
not lines[i].startswith('git-svn-id:') and
not lines[i].startswith('Reviewed-by') and
not lines[i].startswith('Acked-by:') and
not lines[i].startswith('Tested-by:') and
not lines[i].startswith('Reported-by:') and
not lines[i].startswith('Suggested-by:') and
not lines[i].startswith('Signed-off-by:') and
not lines[i].startswith('Cc:')):
#
# Print a warning if body line is longer than 75 characters
#
print(
'WARNING - Line %d of commit message is too long (%d >= 76).' %
(i + 1, len(lines[i]))
)
print(lines[i])
last_sig_line = None
for i in range(count - 1, 0, -1):
line = lines[i]
mo = self.sig_block_re.match(line)
if mo is None:
if line.strip() == '':
break
elif last_sig_line is not None:
err2 = 'Add empty line before "%s"?' % last_sig_line
self.error('The line before the signature block ' +
'should be empty', err2)
else:
self.error('The signature block was not found')
break
last_sig_line = line.strip()
def check_change_id_format(self):
cid='Change-Id:'
if self.msg.find(cid) != -1:
self.error('\"%s\" found in commit message:' % cid)
return
def check_ci_options_format(self):
cio='Continuous-integration-options:'
for line in self.msg.splitlines():
if not line.startswith(cio):
continue
options = line.split(':', 1)[1].split()
if 'PatchCheck.ignore-multi-package' in options:
self.ignore_multi_package = True
(START, PRE_PATCH, PATCH) = range(3)
class GitDiffCheck:
"""Checks the contents of a git diff."""
def __init__(self, diff):
self.ok = True
self.format_ok = True
self.lines = diff.splitlines(True)
self.count = len(self.lines)
self.line_num = 0
self.state = START
self.new_bin = []
while self.line_num < self.count and self.format_ok:
line_num = self.line_num
self.run()
assert(self.line_num > line_num)
self.report_message_result()
def report_message_result(self):
if Verbose.level < Verbose.NORMAL:
return
if self.ok:
print('The code passed all checks.')
if self.new_bin:
print('\nWARNING - The following binary files will be added ' +
'into the repository:')
for binary in self.new_bin:
print(' ' + binary)
def run(self):
line = self.lines[self.line_num]
if self.state in (PRE_PATCH, PATCH):
if line.startswith('diff --git'):
self.state = START
if self.state == PATCH:
if line.startswith('@@ '):
self.state = PRE_PATCH
elif len(line) >= 1 and line[0] not in ' -+' and \
not line.startswith('\r\n') and \
not line.startswith(r'\ No newline ') and not self.binary:
for line in self.lines[self.line_num + 1:]:
if line.startswith('diff --git'):
self.format_error('diff found after end of patch')
break
self.line_num = self.count
return
if self.state == START:
if line.startswith('diff --git'):
self.state = PRE_PATCH
self.filename = line[13:].split(' ', 1)[0]
self.is_newfile = False
self.force_crlf = True
self.force_notabs = True
if self.filename.endswith('.rtf'):
self.force_crlf = False
self.force_notabs = False
if self.filename.endswith('.sh') or \
self.filename.startswith('BaseTools/BinWrappers/PosixLike/') or \
self.filename == 'BaseTools/BuildEnv':
#
# Do not enforce CR/LF line endings for linux shell scripts.
# Some linux shell scripts don't end with the ".sh" extension,
# they are identified by their path.
#
self.force_crlf = False
if self.filename == '.gitmodules' or \
self.filename == 'BaseTools/Conf/diff.order':
#
# .gitmodules and diff orderfiles are used internally by git
# use tabs and LF line endings. Do not enforce no tabs and
# do not enforce CR/LF line endings.
#
self.force_crlf = False
self.force_notabs = False
if os.path.basename(self.filename) == 'GNUmakefile' or \
os.path.basename(self.filename).lower() == 'makefile' or \
os.path.splitext(self.filename)[1] == '.makefile' or \
self.filename.startswith(
'BaseTools/Source/C/VfrCompile/Pccts/'):
self.force_notabs = False
elif len(line.rstrip()) != 0:
self.format_error("didn't find diff command")
self.line_num += 1
elif self.state == PRE_PATCH:
if line.startswith('@@ '):
self.state = PATCH
self.binary = False
elif line.startswith('GIT binary patch') or \
line.startswith('Binary files'):
self.state = PATCH
self.binary = True
if self.is_newfile:
self.new_bin.append(self.filename)
elif line.startswith('new file mode 160000'):
#
# New submodule. Do not enforce CR/LF line endings
#
self.force_crlf = False
else:
ok = False
self.is_newfile = self.newfile_prefix_re.match(line)
for pfx in self.pre_patch_prefixes:
if line.startswith(pfx):
ok = True
if not ok:
self.format_error("didn't find diff hunk marker (@@)")
self.line_num += 1
elif self.state == PATCH:
if self.binary or self.filename.endswith(".rtf"):
pass
elif line.startswith('-'):
pass
elif line.startswith('+'):
self.check_added_line(line[1:])
elif line.startswith('\r\n'):
pass
elif line.startswith(r'\ No newline '):
pass
elif not line.startswith(' '):
self.format_error("unexpected patch line")
self.line_num += 1
pre_patch_prefixes = (
'--- ',
'+++ ',
'index ',
'new file ',
'deleted file ',
'old mode ',
'new mode ',
'similarity index ',
'copy from ',
'copy to ',
'rename ',
)
line_endings = ('\r\n', '\n\r', '\n', '\r')
newfile_prefix_re = \
re.compile(r'''^
index\ 0+\.\.
''',
re.VERBOSE)
def added_line_error(self, msg, line):
lines = [ msg ]
if self.filename is not None:
lines.append('File: ' + self.filename)
lines.append('Line: ' + line)
self.error(*lines)
old_debug_re = \
re.compile(r'''
DEBUG \s* \( \s* \( \s*
(?: DEBUG_[A-Z_]+ \s* \| \s*)*
EFI_D_ ([A-Z_]+)
''',
re.VERBOSE)
def check_added_line(self, line):
eol = ''
for an_eol in self.line_endings:
if line.endswith(an_eol):
eol = an_eol
line = line[:-len(eol)]
stripped = line.rstrip()
if self.force_crlf and eol != '\r\n' and (line.find('Subproject commit') == -1):
self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
line)
if self.force_notabs and '\t' in line:
self.added_line_error('Tab character used', line)
if len(stripped) < len(line):
self.added_line_error('Trailing whitespace found', line)
mo = self.old_debug_re.search(line)
if mo is not None:
self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
'but DEBUG_' + mo.group(1) +
' is now recommended', line)
rp_file = os.path.realpath(self.filename)
rp_script = os.path.realpath(__file__)
if line.find('__FUNCTION__') != -1 and rp_file != rp_script:
self.added_line_error('__FUNCTION__ was used, but __func__ '
'is now recommended', line)
split_diff_re = re.compile(r'''
(?P
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
)
(?P
^ index \s+ .+ $
)
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
def format_error(self, err):
self.format_ok = False
err = 'Patch format error: ' + err
err2 = 'Line: ' + self.lines[self.line_num].rstrip()
self.error(err, err2)
def error(self, *err):
if self.ok and Verbose.level > Verbose.ONELINE:
print('Code format is not valid:')
self.ok = False
if Verbose.level < Verbose.NORMAL:
return
count = 0
for line in err:
prefix = (' *', ' ')[count > 0]
print(prefix, line)
count += 1
class CheckOnePatch:
"""Checks the contents of a git email formatted patch.
Various checks are performed on both the commit message and the
patch content.
"""
def __init__(self, name, patch):
self.patch = patch
self.find_patch_pieces()
email_check = EmailAddressCheck(self.author_email, 'Author')
email_ok = email_check.ok
msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg, self.author_email)
msg_ok = msg_check.ok
self.ignore_multi_package = msg_check.ignore_multi_package
diff_ok = True
if self.diff is not None:
diff_check = GitDiffCheck(self.diff)
diff_ok = diff_check.ok
self.ok = email_ok and msg_ok and diff_ok
if Verbose.level == Verbose.ONELINE:
if self.ok:
result = 'ok'
else:
result = list()
if not msg_ok:
result.append('commit message')
if not diff_ok:
result.append('diff content')
result = 'bad ' + ' and '.join(result)
print(name, result)
git_diff_re = re.compile(r'''
^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
stat_re = \
re.compile(r'''
(?P [\s\S\r\n]* )
(?P
^ --- $ [\r\n]+
(?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
$ [\r\n]+ )+
[\s\S\r\n]+
)
''',
re.IGNORECASE | re.VERBOSE | re.MULTILINE)
subject_prefix_re = \
re.compile(r'''^
\s* (\[
[^\[\]]* # Allow all non-brackets
\])* \s*
''',
re.VERBOSE)
def find_patch_pieces(self):
if sys.version_info < (3, 0):
patch = self.patch.encode('ascii', 'ignore')
else:
patch = self.patch
self.commit_msg = None
self.stat = None
self.commit_subject = None
self.commit_prefix = None
self.diff = None
if patch.startswith('diff --git'):
self.diff = patch
return
pmail = email.message_from_string(patch)
parts = list(pmail.walk())
assert(len(parts) == 1)
assert(parts[0].get_content_type() == 'text/plain')
content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
mo = self.git_diff_re.search(content)
if mo is not None:
self.diff = content[mo.start():]
content = content[:mo.start()]
mo = self.stat_re.search(content)
if mo is None:
self.commit_msg = content
else:
self.stat = mo.group('stat')
self.commit_msg = mo.group('commit_message')
#
# Parse subject line from email header. The subject line may be
# composed of multiple parts with different encodings. Decode and
# combine all the parts to produce a single string with the contents of
# the decoded subject line.
#
parts = email.header.decode_header(pmail.get('subject'))
subject = ''
for (part, encoding) in parts:
if encoding:
part = part.decode(encoding)
else:
try:
part = part.decode()
except:
pass
subject = subject + part
self.commit_subject = subject.replace('\r\n', '')
self.commit_subject = self.commit_subject.replace('\n', '')
self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
self.author_email = pmail['from']
class CheckGitCommits:
"""Reads patches from git based on the specified git revision range.
The patches are read from git, and then checked.
"""
def __init__(self, rev_spec, max_count):
dec_files = self.read_dec_files_from_git()
commits = self.read_commit_list_from_git(rev_spec, max_count)
self.ok = True
blank_line = False
for commit in commits:
if Verbose.level > Verbose.ONELINE:
if blank_line:
print()
else:
blank_line = True
print('Checking git commit:', commit)
email = self.read_committer_email_address_from_git(commit)
self.ok &= EmailAddressCheck(email, 'Committer').ok
patch = self.read_patch_from_git(commit)
check_patch = CheckOnePatch(commit, patch)
self.ok &= check_patch.ok
ignore_multi_package = check_patch.ignore_multi_package
if PatchCheckConf.ignore_multi_package:
ignore_multi_package = True
prefix = 'WARNING: ' if ignore_multi_package else ''
check_parent = self.check_parent_packages (dec_files, commit, prefix)
if not ignore_multi_package:
self.ok &= check_parent
if not commits:
print("Couldn't find commit matching: '{}'".format(rev_spec))
def check_parent_packages(self, dec_files, commit, prefix):
ok = True
modified = self.get_parent_packages (dec_files, commit, 'AM')
if len (modified) > 1:
print("{}The commit adds/modifies files in multiple packages:".format(prefix))
print(" *", '\n * '.join(modified))
ok = False
deleted = self.get_parent_packages (dec_files, commit, 'D')
if len (deleted) > 1:
print("{}The commit deletes files from multiple packages:".format(prefix))
print(" *", '\n * '.join(deleted))
ok = False
return ok
def get_parent_packages(self, dec_files, commit, filter):
filelist = self.read_files_modified_from_git (commit, filter)
parents = set()
for file in filelist:
dec_found = False
for dec_file in dec_files:
if os.path.commonpath([dec_file, file]):
dec_found = True
parents.add(dec_file)
if not dec_found and os.path.dirname (file):
# No DEC file found and file is in a subdir
# Covers BaseTools, .github, .azurepipelines, .pytool
parents.add(file.split('/')[0])
return list(parents)
def read_dec_files_from_git(self):
# run git ls-files *.dec
out = self.run_git('ls-files', '*.dec')
# return list of .dec files
try:
return out.split()
except:
return []
def read_files_modified_from_git(self, commit, filter):
# run git diff-tree --no-commit-id --name-only -r
out = self.run_git('diff-tree', '--no-commit-id', '--name-only',
'--diff-filter=' + filter, '-r', commit)
try:
return out.split()
except:
return []
def read_commit_list_from_git(self, rev_spec, max_count):
# Run git to get the commit patch
cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
if max_count is not None:
cmd.append('--max-count=' + str(max_count))
cmd.append(rev_spec)
out = self.run_git(*cmd)
return out.split() if out else []
def read_patch_from_git(self, commit):
# Run git to get the commit patch
return self.run_git('show', '--pretty=email', '--no-textconv',
'--no-use-mailmap', commit)
def read_committer_email_address_from_git(self, commit):
# Run git to get the committer email
return self.run_git('show', '--pretty=%cn <%ce>', '--no-patch',
'--no-use-mailmap', commit)
def run_git(self, *args):
cmd = [ 'git' ]
cmd += args
p = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
Result = p.communicate()
return Result[0].decode('utf-8', 'ignore') if Result[0] and Result[0].find(b"fatal")!=0 else None
class CheckOnePatchFile:
"""Performs a patch check for a single file.
stdin is used when the filename is '-'.
"""
def __init__(self, patch_filename):
if patch_filename == '-':
patch = sys.stdin.read()
patch_filename = 'stdin'
else:
f = open(patch_filename, 'rb')
patch = f.read().decode('utf-8', 'ignore')
f.close()
if Verbose.level > Verbose.ONELINE:
print('Checking patch file:', patch_filename)
self.ok = CheckOnePatch(patch_filename, patch).ok
class CheckOneArg:
"""Performs a patch check for a single command line argument.
The argument will be handed off to a file or git-commit based
checker.
"""
def __init__(self, param, max_count=None):
self.ok = True
if param == '-' or os.path.exists(param):
checker = CheckOnePatchFile(param)
else:
checker = CheckGitCommits(param, max_count)
self.ok = checker.ok
class PatchCheckApp:
"""Checks patches based on the command line arguments."""
def __init__(self):
self.parse_options()
patches = self.args.patches
if len(patches) == 0:
patches = [ 'HEAD' ]
self.ok = True
self.count = None
for patch in patches:
self.process_one_arg(patch)
if self.count is not None:
self.process_one_arg('HEAD')
if self.ok:
self.retval = 0
else:
self.retval = -1
def process_one_arg(self, arg):
if len(arg) >= 2 and arg[0] == '-':
try:
self.count = int(arg[1:])
return
except ValueError:
pass
self.ok &= CheckOneArg(arg, self.count).ok
self.count = None
def parse_options(self):
parser = argparse.ArgumentParser(description=__copyright__)
parser.add_argument('--version', action='version',
version='%(prog)s ' + VersionNumber)
parser.add_argument('patches', nargs='*',
help='[patch file | git rev list]')
group = parser.add_mutually_exclusive_group()
group.add_argument("--oneline",
action="store_true",
help="Print one result per line")
group.add_argument("--silent",
action="store_true",
help="Print nothing")
group.add_argument("--ignore-change-id",
action="store_true",
help="Ignore the presence of 'Change-Id:' tags in commit message")
group.add_argument("--ignore-multi-package",
action="store_true",
help="Ignore if commit modifies files in multiple packages")
self.args = parser.parse_args()
if self.args.oneline:
Verbose.level = Verbose.ONELINE
if self.args.silent:
Verbose.level = Verbose.SILENT
if self.args.ignore_change_id:
PatchCheckConf.ignore_change_id = True
if self.args.ignore_multi_package:
PatchCheckConf.ignore_multi_package = True
if __name__ == "__main__":
sys.exit(PatchCheckApp().retval)