summaryrefslogtreecommitdiffstats
path: root/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py
diff options
context:
space:
mode:
Diffstat (limited to 'BaseTools/Plugin/CodeQL/analyze/analyze_filter.py')
-rw-r--r--BaseTools/Plugin/CodeQL/analyze/analyze_filter.py184
1 files changed, 184 insertions, 0 deletions
diff --git a/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py b/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py
new file mode 100644
index 0000000000..f363dd378f
--- /dev/null
+++ b/BaseTools/Plugin/CodeQL/analyze/analyze_filter.py
@@ -0,0 +1,184 @@
+# @file analyze_filter.py
+#
+# Filters results in a SARIF file.
+#
+# Apache License
+# Version 2.0, January 2004
+# http://www.apache.org/licenses/
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file has been altered from its original form. Based on code in:
+# https://github.com/advanced-security/filter-sarif
+#
+# It primarily contains modifications made to integrate with the CodeQL plugin.
+#
+# Specifically:
+# https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
+#
+# View the full and complete license as provided by that repository here:
+# https://github.com/advanced-security/filter-sarif/blob/main/LICENSE
+#
+# SPDX-License-Identifier: Apache-2.0
+##
+
+import json
+import logging
+import re
+from os import PathLike
+from typing import Iterable, List, Tuple
+
+from analyze.globber import match
+
+
+def _match_path_and_rule(
+ path: str, rule: str, patterns: Iterable[str]) -> bool:
+ """Returns whether a given path matches a given rule.
+
+ Args:
+ path (str): A file path string.
+ rule (str): A rule file path string.
+ patterns (Iterable[str]): An iterable of pattern strings.
+
+ Returns:
+ bool: True if the path matches a rule. Otherwise, False.
+ """
+ result = True
+ for s, fp, rp in patterns:
+ if match(rp, rule) and match(fp, path):
+ result = s
+ return result
+
+
+def _parse_pattern(line: str) -> Tuple[str]:
+ """Parses a given pattern line.
+
+ Args:
+ line (str): The line string that contains the rule.
+
+ Returns:
+ Tuple[str]: The parsed sign, file pattern, and rule pattern from the
+ line.
+ """
+ sep_char = ':'
+ esc_char = '\\'
+ file_pattern = ''
+ rule_pattern = ''
+ seen_separator = False
+ sign = True
+
+ # inclusion or exclusion pattern?
+ u_line = line
+ if line:
+ if line[0] == '-':
+ sign = False
+ u_line = line[1:]
+ elif line[0] == '+':
+ u_line = line[1:]
+
+ i = 0
+ while i < len(u_line):
+ c = u_line[i]
+ i = i + 1
+ if c == sep_char:
+ if seen_separator:
+ raise Exception(
+ 'Invalid pattern: "' + line + '" Contains more than one '
+ 'separator!')
+ seen_separator = True
+ continue
+ elif c == esc_char:
+ next_c = u_line[i] if (i < len(u_line)) else None
+ if next_c in ['+' , '-', esc_char, sep_char]:
+ i = i + 1
+ c = next_c
+ if seen_separator:
+ rule_pattern = rule_pattern + c
+ else:
+ file_pattern = file_pattern + c
+
+ if not rule_pattern:
+ rule_pattern = '**'
+
+ return sign, file_pattern, rule_pattern
+
+
+def filter_sarif(input_sarif: PathLike,
+ output_sarif: PathLike,
+ patterns: List[str],
+ split_lines: bool) -> None:
+ """Filters a SARIF file with a given set of filter patterns.
+
+ Args:
+ input_sarif (PathLike): Input SARIF file path.
+ output_sarif (PathLike): Output SARIF file path.
+ patterns (PathLike): List of filter pattern strings.
+ split_lines (PathLike): Whether to split lines in individual patterns.
+ """
+ if split_lines:
+ tmp = []
+ for p in patterns:
+ tmp = tmp + re.split('\r?\n', p)
+ patterns = tmp
+
+ patterns = [_parse_pattern(p) for p in patterns if p]
+
+ logging.debug('Given patterns:')
+ for s, fp, rp in patterns:
+ logging.debug(
+ 'files: {file_pattern} rules: {rule_pattern} ({sign})'.format(
+ file_pattern=fp,
+ rule_pattern=rp,
+ sign='positive' if s else 'negative'))
+
+ with open(input_sarif, 'r') as f:
+ s = json.load(f)
+
+ for run in s.get('runs', []):
+ if run.get('results', []):
+ new_results = []
+ for r in run['results']:
+ if r.get('locations', []):
+ new_locations = []
+ for l in r['locations']:
+ # TODO: The uri field is optional. We might have to
+ # fetch the actual uri from "artifacts" via
+ # "index"
+ # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#-linking-results-to-artifacts)
+ uri = l.get(
+ 'physicalLocation', {}).get(
+ 'artifactLocation', {}).get(
+ 'uri', None)
+
+ # TODO: The ruleId field is optional and potentially
+ # ambiguous. We might have to fetch the actual
+ # ruleId from the rule metadata via the ruleIndex
+ # field.
+ # (see https://github.com/microsoft/sarif-tutorials/blob/main/docs/2-Basics.md#rule-metadata)
+ ruleId = r['ruleId']
+
+ if (uri is None or
+ _match_path_and_rule(uri, ruleId, patterns)):
+ new_locations.append(l)
+ r['locations'] = new_locations
+ if new_locations:
+ new_results.append(r)
+ else:
+ # locations array doesn't exist or is empty, so we can't
+ # match on anything. Therefore, we include the result in
+ # the output.
+ new_results.append(r)
+ run['results'] = new_results
+
+ with open(output_sarif, 'w') as f:
+ json.dump(s, f, indent=2)