Skip to content

Commit e1ad354

Browse files
authored
↪️ Merge pull request #229 from Yelp/cleanup_keyword_detector
[Keyword Plugin] Various accuracy improvements
2 parents cba0446 + 5de43e2 commit e1ad354

File tree

3 files changed

+243
-128
lines changed

3 files changed

+243
-128
lines changed
Lines changed: 35 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,38 @@
1+
import os
12
from enum import Enum
23

34

45
class FileType(Enum):
56
CLS = 0
6-
GO = 1
7-
JAVA = 2
8-
JAVASCRIPT = 3
9-
PHP = 4
10-
PYTHON = 5
11-
YAML = 6
12-
OTHER = 7
7+
EXAMPLE = 1
8+
GO = 2
9+
JAVA = 3
10+
JAVASCRIPT = 4
11+
PHP = 5
12+
OBJECTIVE_C = 6
13+
PYTHON = 7
14+
SWIFT = 8
15+
TERRAFORM = 9
16+
YAML = 10
17+
OTHER = 11
18+
19+
20+
EXTENSION_TO_FILETYPE = {
21+
'.cls': FileType.CLS,
22+
'.example': FileType.EXAMPLE,
23+
'.eyaml': FileType.YAML,
24+
'.go': FileType.GO,
25+
'.java': FileType.JAVA,
26+
'.js': FileType.JAVASCRIPT,
27+
'.m': FileType.OBJECTIVE_C,
28+
'.php': FileType.PHP,
29+
'.py': FileType.PYTHON,
30+
'.pyi': FileType.PYTHON,
31+
'.swift': FileType.SWIFT,
32+
'.tf': FileType.TERRAFORM,
33+
'.yaml': FileType.YAML,
34+
'.yml': FileType.YAML,
35+
}
1336

1437

1538
def determine_file_type(filename):
@@ -18,22 +41,8 @@ def determine_file_type(filename):
1841
1942
:rtype: FileType
2043
"""
21-
if filename.endswith('.cls'):
22-
return FileType.CLS
23-
elif filename.endswith('.go'):
24-
return FileType.GO
25-
elif filename.endswith('.java'):
26-
return FileType.JAVA
27-
elif filename.endswith('.js'):
28-
return FileType.JAVASCRIPT
29-
elif filename.endswith('.php'):
30-
return FileType.PHP
31-
elif filename.endswith('.py'):
32-
return FileType.PYTHON
33-
elif (
34-
filename.endswith(
35-
('.eyaml', '.yaml', '.yml'),
36-
)
37-
):
38-
return FileType.YAML
39-
return FileType.OTHER
44+
_, file_extension = os.path.splitext(filename)
45+
return EXTENSION_TO_FILETYPE.get(
46+
file_extension,
47+
FileType.OTHER,
48+
)

detect_secrets/plugins/keyword.py

Lines changed: 75 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,9 @@
6666
"'this",
6767
'(nsstring',
6868
'-default}',
69-
'/etc/passwd:ro',
7069
'::',
70+
'<%=',
71+
'<?php',
7172
'<a',
7273
'<aws_secret_access_key>',
7374
'<input',
@@ -80,53 +81,81 @@
8081
"\\k.*'",
8182
'`cat',
8283
'`grep',
84+
'`sudo',
8385
'account_password',
86+
'api_key',
87+
'disable',
8488
'dummy_secret',
8589
'dummy_value',
8690
'false',
8791
'false):',
8892
'false,',
8993
'false;',
94+
'login_password',
9095
'none',
9196
'none,',
9297
'none}',
9398
'not',
99+
'not_real_key',
94100
'null',
95101
'null,',
96102
'null.*"',
97103
"null.*'",
98104
'null;',
105+
'pass',
106+
'pass)',
99107
'password',
100108
'password)',
109+
'password))',
101110
'password,',
102111
'password},',
103112
'prompt',
104113
'redacted',
114+
'secret',
105115
'some_key',
116+
'str',
106117
'str_to_sign',
118+
'string',
119+
'string)',
107120
'string,',
108121
'string;',
109122
'string?',
123+
'string?)',
110124
'string}',
111125
'string}}',
126+
'test',
112127
'test-access-key',
113128
'thisisnottherealsecret',
114129
'todo',
115130
'true',
116131
'true):',
117132
'true,',
118133
'true;',
134+
'undef',
135+
'undef,',
119136
'{',
137+
'{{',
120138
}
121-
QUOTE = r'[\'"]'
122-
# includes ], ', " as closing
139+
# Includes ], ', " as closing
123140
CLOSING = r'[]\'"]{0,2}'
124-
# non-greedy match
141+
DENYLIST_REGEX = r'|'.join(DENYLIST)
142+
# Non-greedy match
125143
OPTIONAL_WHITESPACE = r'\s*?'
126144
OPTIONAL_NON_WHITESPACE = r'[^\s]*?'
145+
QUOTE = r'[\'"]'
127146
SECRET = r'[^\s]+'
128-
DENYLIST_REGEX = r'|'.join(DENYLIST)
147+
SQUARE_BRACKETS = r'(\[\])'
129148

149+
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
150+
# e.g. my_password := "bar" or my_password := bar
151+
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
152+
denylist=DENYLIST_REGEX,
153+
closing=CLOSING,
154+
quote=QUOTE,
155+
whitespace=OPTIONAL_WHITESPACE,
156+
secret=SECRET,
157+
),
158+
)
130159
FOLLOWED_BY_COLON_REGEX = re.compile(
131160
# e.g. api_key: foo
132161
r'({denylist})({closing})?:{whitespace}({quote}?)({secret})(\3)'.format(
@@ -147,6 +176,17 @@
147176
secret=SECRET,
148177
),
149178
)
179+
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX = re.compile(
180+
# e.g. my_password = "bar"
181+
# e.g. my_password = @"bar"
182+
# e.g. my_password[] = "bar";
183+
r'({denylist})({square_brackets})?{optional_whitespace}={optional_whitespace}(@)?(")({secret})(\5)'.format( # noqa: E501
184+
denylist=DENYLIST_REGEX,
185+
square_brackets=SQUARE_BRACKETS,
186+
optional_whitespace=OPTIONAL_WHITESPACE,
187+
secret=SECRET,
188+
),
189+
)
150190
FOLLOWED_BY_EQUAL_SIGNS_REGEX = re.compile(
151191
# e.g. my_password = bar
152192
r'({denylist})({closing})?{whitespace}={whitespace}({quote}?)({secret})(\3)'.format(
@@ -178,35 +218,31 @@
178218
secret=SECRET,
179219
),
180220
)
181-
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX = re.compile(
182-
# e.g. my_password := "bar" or my_password := bar
183-
r'({denylist})({closing})?{whitespace}:=?{whitespace}({quote}?)({secret})(\3)'.format(
184-
denylist=DENYLIST_REGEX,
185-
closing=CLOSING,
186-
quote=QUOTE,
187-
whitespace=OPTIONAL_WHITESPACE,
188-
secret=SECRET,
189-
),
190-
)
191221
DENYLIST_REGEX_TO_GROUP = {
192222
FOLLOWED_BY_COLON_REGEX: 4,
193223
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
194224
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
195225
}
226+
GOLANG_DENYLIST_REGEX_TO_GROUP = {
227+
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
228+
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
229+
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
230+
}
231+
OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP = {
232+
FOLLOWED_BY_EQUAL_SIGNS_OPTIONAL_BRACKETS_OPTIONAL_AT_SIGN_QUOTES_REQUIRED_REGEX: 6,
233+
}
196234
QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP = {
197235
FOLLOWED_BY_COLON_QUOTES_REQUIRED_REGEX: 5,
198236
FOLLOWED_BY_EQUAL_SIGNS_QUOTES_REQUIRED_REGEX: 4,
199237
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
200238
}
201-
GOLANG_DENYLIST_REGEX_TO_GROUP = {
202-
FOLLOWED_BY_EQUAL_SIGNS_REGEX: 4,
203-
FOLLOWED_BY_QUOTES_AND_SEMICOLON_REGEX: 3,
204-
FOLLOWED_BY_COLON_EQUAL_SIGNS_REGEX: 4,
205-
}
206239
QUOTES_REQUIRED_FILETYPES = {
207240
FileType.CLS,
208241
FileType.JAVA,
242+
FileType.JAVASCRIPT,
209243
FileType.PYTHON,
244+
FileType.SWIFT,
245+
FileType.TERRAFORM,
210246
}
211247

212248

@@ -257,6 +293,8 @@ def secret_generator(self, string, filetype):
257293
denylist_regex_to_group = QUOTES_REQUIRED_DENYLIST_REGEX_TO_GROUP
258294
elif filetype == FileType.GO:
259295
denylist_regex_to_group = GOLANG_DENYLIST_REGEX_TO_GROUP
296+
elif filetype == FileType.OBJECTIVE_C:
297+
denylist_regex_to_group = OBJECTIVE_C_DENYLIST_REGEX_TO_GROUP
260298
else:
261299
denylist_regex_to_group = DENYLIST_REGEX_TO_GROUP
262300

@@ -275,24 +313,27 @@ def secret_generator(self, string, filetype):
275313

276314
def probably_false_positive(lowered_secret, filetype):
277315
if (
278-
'fake' in lowered_secret
279-
or 'forgot' in lowered_secret
280-
or lowered_secret in FALSE_POSITIVES
281-
or (
282-
filetype == FileType.JAVASCRIPT
283-
and (
284-
lowered_secret.startswith('this.')
285-
or lowered_secret.startswith('fs.read')
286-
or lowered_secret.startswith('options.')
287-
or lowered_secret == 'new'
316+
any(
317+
false_positive in lowered_secret
318+
for false_positive in (
319+
'/etc/',
320+
'fake',
321+
'forgot',
288322
)
323+
) or lowered_secret in FALSE_POSITIVES
324+
# For e.g. private_key "some/dir/that/is/not/a/secret";
325+
or lowered_secret.count('/') >= 3
326+
# For e.g. "secret": "{secret}"
327+
or (
328+
lowered_secret[0] == '{'
329+
and lowered_secret[-1] == '}'
289330
) or (
290-
filetype == FileType.PHP
331+
filetype not in QUOTES_REQUIRED_FILETYPES
291332
and lowered_secret[0] == '$'
292333
) or (
293-
filetype == FileType.YAML
294-
and lowered_secret.startswith('{{')
295-
and lowered_secret.endswith('}}')
334+
filetype == FileType.EXAMPLE
335+
and lowered_secret[0] == '<'
336+
and lowered_secret[-1] == '>'
296337
)
297338
):
298339
return True

0 commit comments

Comments
 (0)