Skip to content

Commit 58d035a

Browse files
committed
🎉 [pre-commit-hook] Automatically update the baseline
When no secrets are present, that is. (pre_commit_hook.py) Do not raise exception when baseline out-of-date Replace baseline `version`/`plugins` with the current ones. Replaced `raise_exception_if_baseline_version_is_outdated` with if statement Renamed `raise_exception_if_baseline_file_is_not_up_to_date` to `raise_exception_if_baseline_file_is_unstaged` Improve comment (core/secrets_collection.py) Make `version` not required for baseline's any more (tests/pre_commit_hook_test.py): Replaced `test_fails_if_baseline_version_is_outdated` with `test_that_baseline_gets_updated` Clean up: (core/usage.py) Static list into a tuple (core/baseline.py) `initialize()` docstring accurate Renamed `update_baseline_with_removed_secrets` to `trim_baseline_of_removed_secrets` Renamed `git_files` to `files_to_scan` Fix typo (propogate -> propagate)
1 parent 7e8d909 commit 58d035a

File tree

6 files changed

+103
-117
lines changed

6 files changed

+103
-117
lines changed

detect_secrets/core/baseline.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8,39 +8,47 @@
88
from detect_secrets.core.secrets_collection import SecretsCollection
99

1010

11-
def initialize(plugins, exclude_regex=None, rootdir='.', scan_all_files=False):
12-
"""Scans the entire codebase for high entropy strings, and returns a
11+
def initialize(
12+
plugins,
13+
exclude_regex=None,
14+
path='.',
15+
scan_all_files=False,
16+
):
17+
"""Scans the entire codebase for secrets, and returns a
1318
SecretsCollection object.
1419
1520
:type plugins: tuple of detect_secrets.plugins.base.BasePlugin
1621
:param plugins: rules to initialize the SecretsCollection with.
1722
1823
:type exclude_regex: str|None
19-
:type rootdir: str
24+
:type path: str
25+
:type scan_all_files: bool
2026
2127
:rtype: SecretsCollection
2228
"""
2329
output = SecretsCollection(plugins, exclude_regex)
2430

25-
if os.path.isfile(rootdir):
31+
if os.path.isfile(path):
2632
# This option allows for much easier adhoc usage.
27-
git_files = [rootdir]
33+
files_to_scan = [path]
2834
elif scan_all_files:
29-
git_files = _get_files_recursively(rootdir)
35+
files_to_scan = _get_files_recursively(path)
3036
else:
31-
git_files = _get_git_tracked_files(rootdir)
37+
files_to_scan = _get_git_tracked_files(path)
3238

33-
if not git_files:
39+
if not files_to_scan:
3440
return output
3541

3642
if exclude_regex:
3743
regex = re.compile(exclude_regex, re.IGNORECASE)
38-
git_files = filter(
39-
lambda x: not regex.search(x),
40-
git_files,
44+
files_to_scan = filter(
45+
lambda file: (
46+
not regex.search(file)
47+
),
48+
files_to_scan,
4149
)
4250

43-
for file in git_files:
51+
for file in files_to_scan:
4452
output.scan_file(file)
4553

4654
return output
@@ -86,7 +94,7 @@ def get_secrets_not_in_baseline(results, baseline):
8694
return new_secrets
8795

8896

89-
def update_baseline_with_removed_secrets(results, baseline, filelist):
97+
def trim_baseline_of_removed_secrets(results, baseline, filelist):
9098
"""
9199
NOTE: filelist is not a comprehensive list of all files in the repo
92100
(because we can't be sure whether --all-files is passed in as a
@@ -200,7 +208,7 @@ def merge_results(old_results, new_results):
200208
continue
201209

202210
old_secret = old_secrets_mapping[new_secret['hashed_secret']]
203-
# Only propogate 'is_secret' if it's not already there
211+
# Only propagate 'is_secret' if it's not already there
204212
if 'is_secret' in old_secret and 'is_secret' not in new_secret:
205213
new_secret['is_secret'] = old_secret['is_secret']
206214

detect_secrets/core/secrets_collection.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ def _load_baseline_from_dict(cls, data):
6666
'exclude_regex',
6767
'plugins_used',
6868
'results',
69-
'version',
7069
)):
7170
raise IOError
7271

@@ -95,7 +94,11 @@ def _load_baseline_from_dict(cls, data):
9594
secret.secret_hash = item['hashed_secret']
9695
result.data[filename][secret] = secret
9796

98-
result.version = data['version']
97+
result.version = (
98+
data['version']
99+
if 'version' in data
100+
else '0.0.0'
101+
)
99102

100103
return result
101104

detect_secrets/core/usage.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def add_console_use_arguments(self):
3232
dest='action',
3333
)
3434

35-
for action_parser in [ScanOptions, AuditOptions]:
35+
for action_parser in (ScanOptions, AuditOptions):
3636
action_parser(subparser).add_arguments()
3737

3838
return self
@@ -62,7 +62,11 @@ def _add_verbosity_argument(self):
6262
return self
6363

6464
def _add_filenames_argument(self):
65-
self.parser.add_argument('filenames', nargs='*', help='Filenames to check')
65+
self.parser.add_argument(
66+
'filenames',
67+
nargs='*',
68+
help='Filenames to check',
69+
)
6670
return self
6771

6872
def _add_set_baseline_argument(self):

detect_secrets/pre_commit_hook.py

Lines changed: 19 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
from __future__ import absolute_import
22

3-
import json
43
import subprocess
54
import sys
65
import textwrap
76

87
from detect_secrets import VERSION
98
from detect_secrets.core.baseline import format_baseline_for_output
109
from detect_secrets.core.baseline import get_secrets_not_in_baseline
11-
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
10+
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
1211
from detect_secrets.core.log import get_logger
1312
from detect_secrets.core.secrets_collection import SecretsCollection
1413
from detect_secrets.core.usage import ParserBuilder
@@ -36,7 +35,8 @@ def main(argv=None):
3635
# Error logs handled within logic.
3736
return 1
3837

39-
results = find_secrets_in_files(args)
38+
plugins = initialize.from_parser_builder(args.plugins)
39+
results = find_secrets_in_files(args, plugins)
4040
if baseline_collection:
4141
original_results = results
4242
results = get_secrets_not_in_baseline(
@@ -52,12 +52,18 @@ def main(argv=None):
5252
return 0
5353

5454
# Only attempt baseline modifications if we don't find any new secrets
55-
successful_update = update_baseline_with_removed_secrets(
55+
baseline_modified = trim_baseline_of_removed_secrets(
5656
original_results,
5757
baseline_collection,
5858
args.filenames,
5959
)
60-
if successful_update:
60+
61+
if VERSION != baseline_collection.version:
62+
baseline_collection.plugins = plugins
63+
baseline_collection.version = VERSION
64+
baseline_modified = True
65+
66+
if baseline_modified:
6167
_write_to_baseline_file(
6268
args.baseline[0],
6369
baseline_collection.format_for_baseline_output(),
@@ -87,31 +93,13 @@ def get_baseline(baseline_filename):
8793
if not baseline_filename:
8894
return
8995

90-
raise_exception_if_baseline_file_is_not_up_to_date(baseline_filename)
96+
raise_exception_if_baseline_file_is_unstaged(baseline_filename)
9197

92-
baseline_string = _get_baseline_string_from_file(baseline_filename)
93-
baseline_version = json.loads(baseline_string).get('version')
94-
95-
try:
96-
raise_exception_if_baseline_version_is_outdated(
97-
baseline_version,
98-
)
99-
except ValueError:
100-
log.error(
101-
'The supplied baseline may be incompatible with the current\n'
102-
'version of detect-secrets. Please recreate your baseline to\n'
103-
'avoid potential mis-configurations.\n\n'
104-
'$ detect-secrets scan --update %s\n\n'
105-
'Current Version: %s\n'
106-
'Baseline Version: %s',
98+
return SecretsCollection.load_baseline_from_string(
99+
_get_baseline_string_from_file(
107100
baseline_filename,
108-
VERSION,
109-
baseline_version if baseline_version else '0.0.0',
110-
)
111-
112-
raise
113-
114-
return SecretsCollection.load_baseline_from_string(baseline_string)
101+
),
102+
)
115103

116104

117105
def _get_baseline_string_from_file(filename): # pragma: no cover
@@ -130,7 +118,7 @@ def _get_baseline_string_from_file(filename): # pragma: no cover
130118
raise
131119

132120

133-
def raise_exception_if_baseline_file_is_not_up_to_date(filename):
121+
def raise_exception_if_baseline_file_is_unstaged(filename):
134122
"""We want to make sure that if there are changes to the baseline
135123
file, they will be included in the commit. This way, we can keep
136124
our baselines up-to-date.
@@ -161,44 +149,12 @@ def raise_exception_if_baseline_file_is_not_up_to_date(filename):
161149
raise ValueError
162150

163151

164-
def raise_exception_if_baseline_version_is_outdated(version):
165-
"""
166-
Version changes may cause breaking changes with past baselines.
167-
Due to this, we want to make sure that the version that the
168-
baseline was created with is compatible with the current version
169-
of the scanner.
170-
171-
We use semantic versioning, and check for bumps in the MINOR
172-
version (a good compromise, so we can release patches for other
173-
non-baseline-related issues, without having all our users
174-
recreate their baselines again).
175-
176-
:type version: str|None
177-
:param version: version of baseline
178-
:raises: ValueError
179-
"""
180-
if not version:
181-
# Baselines created before this change, so by definition,
182-
# would be outdated.
183-
raise ValueError
184-
185-
baseline_version = version.split('.')
186-
current_version = VERSION.split('.')
187-
188-
if int(current_version[0]) > int(baseline_version[0]):
189-
raise ValueError
190-
elif current_version[0] == baseline_version[0] and \
191-
int(current_version[1]) > int(baseline_version[1]):
192-
raise ValueError
193-
194-
195-
def find_secrets_in_files(args):
196-
plugins = initialize.from_parser_builder(args.plugins)
152+
def find_secrets_in_files(args, plugins):
197153
collection = SecretsCollection(plugins)
198154

199155
for filename in args.filenames:
156+
# Don't scan the baseline file
200157
if filename == args.baseline[0]:
201-
# Obviously, don't detect the baseline file
202158
continue
203159

204160
collection.scan_file(filename)

tests/core/baseline_test.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from detect_secrets.core.baseline import get_secrets_not_in_baseline
1212
from detect_secrets.core.baseline import merge_baseline
1313
from detect_secrets.core.baseline import merge_results
14-
from detect_secrets.core.baseline import update_baseline_with_removed_secrets
14+
from detect_secrets.core.baseline import trim_baseline_of_removed_secrets
1515
from detect_secrets.core.potential_secret import PotentialSecret
1616
from detect_secrets.plugins.high_entropy_strings import Base64HighEntropyString
1717
from detect_secrets.plugins.high_entropy_strings import HexHighEntropyString
@@ -31,28 +31,28 @@ def setup(self):
3131

3232
def get_results(
3333
self,
34-
rootdir='./test_data/files',
34+
path='./test_data/files',
3535
exclude_regex=None,
3636
scan_all_files=False,
3737
):
3838
return baseline.initialize(
3939
self.plugins,
40-
rootdir=rootdir,
40+
path=path,
4141
exclude_regex=exclude_regex,
4242
scan_all_files=scan_all_files,
4343
).json()
4444

4545
@pytest.mark.parametrize(
46-
'rootdir',
46+
'path',
4747
[
4848
'./test_data/files',
4949
5050
# Test relative paths
5151
'test_data/../test_data/files/tmp/..',
5252
],
5353
)
54-
def test_basic_usage(self, rootdir):
55-
results = self.get_results(rootdir=rootdir)
54+
def test_basic_usage(self, path):
55+
results = self.get_results(path=path)
5656

5757
assert len(results.keys()) == 2
5858
assert len(results['test_data/files/file_with_secrets.py']) == 1
@@ -82,7 +82,7 @@ def test_no_files_in_git_repo(self):
8282
),
8383
),
8484
):
85-
results = self.get_results(rootdir='will_be_mocked')
85+
results = self.get_results(path='will_be_mocked')
8686

8787
assert not results
8888

@@ -99,7 +99,7 @@ def test_single_non_tracked_git_file_should_work(self):
9999
assert len(results['will_be_mocked']) == 1
100100

101101
def test_scan_all_files(self):
102-
results = self.get_results(rootdir='test_data/files', scan_all_files=True)
102+
results = self.get_results(path='test_data/files', scan_all_files=True)
103103
assert len(results.keys()) == 2
104104

105105

@@ -229,7 +229,7 @@ def test_deleted_secret(self):
229229
},
230230
])
231231

232-
is_successful = update_baseline_with_removed_secrets(
232+
is_successful = trim_baseline_of_removed_secrets(
233233
new_findings,
234234
baseline,
235235
['filename'],
@@ -247,7 +247,7 @@ def test_deleted_secret_file(self):
247247
},
248248
])
249249

250-
is_successful = update_baseline_with_removed_secrets(
250+
is_successful = trim_baseline_of_removed_secrets(
251251
new_findings,
252252
baseline,
253253
[
@@ -272,7 +272,7 @@ def test_same_secret_new_location(self):
272272
},
273273
])
274274

275-
is_successful = update_baseline_with_removed_secrets(
275+
is_successful = trim_baseline_of_removed_secrets(
276276
new_findings,
277277
baseline,
278278
['filename'],
@@ -303,7 +303,7 @@ def test_no_baseline_modifications(self, results_dict, baseline_dict):
303303
new_findings = secrets_collection_factory([results_dict])
304304
baseline = secrets_collection_factory([baseline_dict])
305305

306-
assert not update_baseline_with_removed_secrets(
306+
assert not trim_baseline_of_removed_secrets(
307307
new_findings,
308308
baseline,
309309
['filename'],

0 commit comments

Comments
 (0)