summaryrefslogtreecommitdiffstats
path: root/BaseTools/Plugin/CodeQL/analyze/globber.py
blob: 5d45abaa1f3addf0cf8a97f50c0969a196165636 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# @file globber.py
#
# Provides global functionality for use by the CodeQL plugin.
#
# Copyright 2019 Jaakko Kangasharju
#
#            Apache License
#      Version 2.0, January 2004
#   http://www.apache.org/licenses/
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# This file has been altered from its original form. Based on code in:
#   https://github.com/advanced-security/filter-sarif
#
# Specifically:
#   https://github.com/advanced-security/filter-sarif/blob/main/filter_sarif.py
#
# It primarily contains modifications made to integrate with the CodeQL plugin.
#
# SPDX-License-Identifier: Apache-2.0
##

import re

_double_star_after_invalid_regex = re.compile(r'[^/\\]\*\*')
_double_star_first_before_invalid_regex = re.compile('^\\*\\*[^/]')
_double_star_middle_before_invalid_regex = re.compile(r'[^\\]\*\*[^/]')


def _match_component(pattern_component, file_name_component):
    if len(pattern_component) == 0 and len(file_name_component) == 0:
        return True
    elif len(pattern_component) == 0:
        return False
    elif len(file_name_component) == 0:
        return pattern_component == '*'
    elif pattern_component[0] == '*':
        return (_match_component(pattern_component, file_name_component[1:]) or
                _match_component(pattern_component[1:], file_name_component))
    elif pattern_component[0] == '?':
        return _match_component(pattern_component[1:], file_name_component[1:])
    elif pattern_component[0] == '\\':
        return (len(pattern_component) >= 2 and
                pattern_component[1] == file_name_component[0] and
                _match_component(
                    pattern_component[2:], file_name_component[1:]))
    elif pattern_component[0] != file_name_component[0]:
        return False
    else:
        return _match_component(pattern_component[1:], file_name_component[1:])


def _match_components(pattern_components, file_name_components):
    if len(pattern_components) == 0 and len(file_name_components) == 0:
        return True
    if len(pattern_components) == 0:
        return False
    if len(file_name_components) == 0:
        return len(pattern_components) == 1 and pattern_components[0] == '**'
    if pattern_components[0] == '**':
        return (_match_components(pattern_components, file_name_components[1:])
                or _match_components(
                    pattern_components[1:], file_name_components))
    else:
        return (
            _match_component(
                pattern_components[0], file_name_components[0]) and
            _match_components(
                pattern_components[1:], file_name_components[1:]))


def match(pattern: str, file_name: str):
    """Match a glob pattern against a file name.

    Glob pattern matching is for file names, which do not need to exist as
    files on the file system.

    A file name is a sequence of directory names, possibly followed by the name
    of a file, with the components separated by a path separator. A glob
    pattern is similar, except it may contain special characters: A '?' matches
    any character in a name. A '*' matches any sequence of characters (possibly
    empty) in a name. Both of these match only within a single component, i.e.,
    they will not match a path separator. A component in a pattern may also be
    a literal '**', which matches zero or more components in the complete file
    name. A backslash '\\' in a pattern acts as an escape character, and
    indicates that the following character is to be matched literally, even if
    it is a special character.

    Args:
        pattern (str): The pattern to match. The path separator in patterns is
                       always '/'.
        file_name (str): The file name to match against. The path separator in
                         file names is the platform separator

    Returns:
        bool: True if the pattern matches, False otherwise.
    """
    if (_double_star_after_invalid_regex.search(pattern) is not None or
        _double_star_first_before_invalid_regex.search(
            pattern) is not None or
        _double_star_middle_before_invalid_regex.search(pattern) is not None):
        raise ValueError(
            '** in {} not alone between path separators'.format(pattern))

    pattern = pattern.rstrip('/')
    file_name = file_name.rstrip('/')

    while '**/**' in pattern:
        pattern = pattern.replace('**/**', '**')

    pattern_components = pattern.split('/')

    # We split on '\' as well as '/' to support unix and windows-style paths
    file_name_components = re.split(r'[\\/]', file_name)

    return _match_components(pattern_components, file_name_components)