Skip to content

Commit 1506621

Browse files
Do not return empty strings in license data
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 6e14d8a commit 1506621

40 files changed

+682
-884
lines changed

docs/source/cli-reference/output-format.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ following options.
183183
"text_url": "http://fedoraproject.org/wiki/Licensing:MIT#Old_Style",
184184
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:mit-old-style",
185185
"spdx_license_key": null,
186-
"spdx_url": "",
186+
"spdx_url": null,
187187
"start_line": 9,
188188
"end_line": 15,
189189
"matched_rule": {

docs/source/cli-reference/synopsis.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ A sample JSON output for an individual file will look like::
234234
"text_url": "http://fedoraproject.org/wiki/Licensing:MIT#Old_Style",
235235
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:mit-old-style",
236236
"spdx_license_key": null,
237-
"spdx_url": "",
237+
"spdx_url": null,
238238
"start_line": 9,
239239
"end_line": 15,
240240
"matched_rule": {

src/licensedcode/plugin_license.py

Lines changed: 40 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
# See https://aboutcode.org for more information about nexB OSS projects.
88
#
99

10+
import logging
11+
import os
1012
import posixpath
1113
from functools import partial
1214

@@ -20,28 +22,22 @@
2022

2123
from scancode.api import SCANCODE_LICENSEDB_URL
2224

23-
TRACE = False
25+
TRACE = os.environ.get('SCANCODE_DEBUG_LICENSE_PLUGIN', False)
2426

2527

26-
def logger_debug(*args): pass
28+
def logger_debug(*args):
29+
pass
2730

2831

32+
logger = logging.getLogger(__name__)
33+
2934
if TRACE:
30-
use_print = True
31-
if use_print:
32-
prn = print
33-
else:
34-
import logging
35-
import sys
36-
logger = logging.getLogger(__name__)
37-
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
38-
logging.basicConfig(stream=sys.stdout)
39-
logger.setLevel(logging.DEBUG)
40-
prn = logger.debug
35+
import sys
36+
logging.basicConfig(stream=sys.stdout)
37+
logger.setLevel(logging.DEBUG)
4138

4239
def logger_debug(*args):
43-
return prn(' '.join(isinstance(a, str) and a or repr(a) for a in args))
44-
40+
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
4541

4642
@scan_impl
4743
class LicenseScanner(ScanPlugin):
@@ -156,28 +152,37 @@ def process_codebase(self, codebase, unknown_licenses, **kwargs):
156152
cle.extra_data['additional_license_plugins'] = cche.additional_license_plugins
157153
has_additional_licenses = True
158154

159-
if unknown_licenses:
160-
if codebase.has_single_resource:
161-
return
155+
if TRACE and has_additional_licenses:
156+
logger_debug(
157+
f'add_referenced_filenames_license_matches: additional_licenses',
158+
f'has_additional_licenses: {has_additional_licenses}\n',
159+
f'additional_license_directory: {cche.additional_license_directory}\n',
160+
f'additional_license_plugins : {cche.additional_license_plugins}'
161+
)
162162

163-
for resource in codebase.walk(topdown=False):
164-
# follow license references to other files
165-
if TRACE:
166-
license_expressions_before = list(resource.license_expressions)
163+
if codebase.has_single_resource and not codebase.root.is_file:
164+
return
167165

166+
modified = False
167+
for resource in codebase.walk(topdown=False):
168+
# follow license references to other files
169+
if TRACE:
170+
license_expressions_before = list(resource.license_expressions)
171+
172+
if unknown_licenses:
168173
modified = add_referenced_filenames_license_matches(resource, codebase)
169174

170-
if has_additional_licenses and resource.is_file and resource.license_detections:
171-
add_builtin_license_flag(resource, licenses)
175+
if has_additional_licenses and resource.is_file and resource.licenses:
176+
add_builtin_license_flag(resource, licenses)
172177

173-
if TRACE and modified:
174-
license_expressions_after = list(resource.license_expressions)
175-
logger_debug(
176-
f'add_referenced_filenames_license_matches: Modfied:',
177-
f'{resource.path} with license_expressions:\n'
178-
f'before: {license_expressions_before}\n'
179-
f'after : {license_expressions_after}'
180-
)
178+
if TRACE and modified:
179+
license_expressions_after = list(resource.license_expressions)
180+
logger_debug(
181+
f'add_referenced_filenames_license_matches: Modfied:',
182+
f'{resource.path} with license_expressions:\n'
183+
f'before: {license_expressions_before}\n'
184+
f'after : {license_expressions_after}'
185+
)
181186

182187

183188
def add_builtin_license_flag(resource, licenses):
@@ -186,22 +191,17 @@ def add_builtin_license_flag(resource, licenses):
186191
additional licenses present in the cache, either through an additional
187192
license directory or additional license plugins.
188193
"""
189-
for detection in resource.license_detections:
190-
matches = detection['matches']
191-
for match in matches:
192-
add_builtin_value(license_match=match, licenses=licenses)
193-
194-
for match in resource.license_clues:
194+
for match in resource.licenses:
195195
add_builtin_value(license_match=match, licenses=licenses)
196196

197197

198198
def add_builtin_value(license_match, licenses):
199199
license_key = license_match['key']
200200
lic = licenses.get(license_key)
201201
if lic.is_builtin:
202-
license_match['is_builtin'] = True
202+
license_match['matched_rule']['is_builtin'] = True
203203
else:
204-
license_match['is_builtin'] = False
204+
license_match['matched_rule']['is_builtin'] = False
205205

206206

207207
def add_referenced_filenames_license_matches(resource, codebase):

src/scancode/api.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -249,19 +249,17 @@ def _licenses_data_from_match(
249249
result['is_exception'] = lic.is_exception
250250
result['is_unknown'] = lic.is_unknown
251251
result['owner'] = lic.owner
252+
result['homepage_url'] = lic.homepage_url
253+
result['text_url'] = lic.text_urls[0] if lic.text_urls else None
252254
# if the license is not builtin these should all be empty
253255
if lic.is_builtin:
254-
result['homepage_url'] = lic.homepage_url
255-
result['text_url'] = lic.text_urls[0] if lic.text_urls else ''
256256
result['reference_url'] = license_url_template.format(lic.key)
257257
result['scancode_text_url'] = SCANCODE_LICENSE_TEXT_URL.format(lic.key)
258258
result['scancode_data_url'] = SCANCODE_LICENSE_DATA_URL.format(lic.key)
259259
else:
260-
result['homepage_url'] = ''
261-
result['text_url'] = ''
262-
result['reference_url'] = ''
263-
result['scancode_text_url'] = ''
264-
result['scancode_data_url'] = ''
260+
result['reference_url'] = None
261+
result['scancode_text_url'] = None
262+
result['scancode_data_url'] = None
265263
spdx_key = lic.spdx_license_key
266264
result['spdx_license_key'] = spdx_key
267265

@@ -273,7 +271,7 @@ def _licenses_data_from_match(
273271
spdx_key = lic.spdx_license_key.rstrip('+')
274272
spdx_url = SPDX_LICENSE_URL.format(spdx_key)
275273
else:
276-
spdx_url = ''
274+
spdx_url = None
277275
result['spdx_url'] = spdx_url
278276
result['start_line'] = match.start_line
279277
result['end_line'] = match.end_line
@@ -293,7 +291,6 @@ def _licenses_data_from_match(
293291
matched_rule['matched_length'] = match.len()
294292
matched_rule['match_coverage'] = match.coverage()
295293
matched_rule['rule_relevance'] = match.rule.relevance
296-
# FIXME: for sanity this should always be included?????
297294
if include_text:
298295
result['matched_text'] = matched_text
299296
return detected_licenses

tests/formattedcode/data/csv/flatten_scan/full.json

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -258,11 +258,11 @@
258258
"short_name": "JBoss EULA",
259259
"category": "Proprietary Free",
260260
"owner": "JBoss Community",
261-
"homepage_url": "",
261+
"homepage_url": null,
262262
"text_url": "http://repository.jboss.org/licenses/jbossorg-eula.txt",
263263
"reference_url": "https://scancode-licensedb.aboutcode.org/jboss-eula",
264264
"spdx_license_key": "",
265-
"spdx_url": "",
265+
"spdx_url": null,
266266
"start_line": 3,
267267
"end_line": 108,
268268
"matched_rule": {
@@ -1296,10 +1296,10 @@
12961296
"category": "Public Domain",
12971297
"owner": "Unspecified",
12981298
"homepage_url": "http://www.linfo.org/publicdomain.html",
1299-
"text_url": "",
1299+
"text_url": null,
13001300
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
13011301
"spdx_license_key": "",
1302-
"spdx_url": "",
1302+
"spdx_url": null,
13031303
"start_line": 1649,
13041304
"end_line": 1649,
13051305
"matched_rule": {
@@ -1322,10 +1322,10 @@
13221322
"category": "Public Domain",
13231323
"owner": "Unspecified",
13241324
"homepage_url": "http://www.linfo.org/publicdomain.html",
1325-
"text_url": "",
1325+
"text_url": null,
13261326
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
13271327
"spdx_license_key": "",
1328-
"spdx_url": "",
1328+
"spdx_url": null,
13291329
"start_line": 1692,
13301330
"end_line": 1692,
13311331
"matched_rule": {
@@ -1478,10 +1478,10 @@
14781478
"category": "Permissive",
14791479
"owner": "OpenSSL",
14801480
"homepage_url": "http://openssl.org/source/license.html",
1481-
"text_url": "",
1481+
"text_url": null,
14821482
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
14831483
"spdx_license_key": "",
1484-
"spdx_url": "",
1484+
"spdx_url": null,
14851485
"start_line": 4,
14861486
"end_line": 7,
14871487
"matched_rule": {
@@ -1555,10 +1555,10 @@
15551555
"category": "Permissive",
15561556
"owner": "OpenSSL",
15571557
"homepage_url": "http://openssl.org/source/license.html",
1558-
"text_url": "",
1558+
"text_url": null,
15591559
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
15601560
"spdx_license_key": "",
1561-
"spdx_url": "",
1561+
"spdx_url": null,
15621562
"start_line": 4,
15631563
"end_line": 7,
15641564
"matched_rule": {
@@ -2562,11 +2562,11 @@
25622562
"short_name": "Ada linking exception to GPL 2.0 or later",
25632563
"category": "Copyleft Limited",
25642564
"owner": "Dmitriy Anisimkov",
2565-
"homepage_url": "",
2566-
"text_url": "",
2565+
"homepage_url": null,
2566+
"text_url": null,
25672567
"reference_url": "https://scancode-licensedb.aboutcode.org/ada-linking-exception",
25682568
"spdx_license_key": "",
2569-
"spdx_url": "",
2569+
"spdx_url": null,
25702570
"start_line": 20,
25712571
"end_line": 25,
25722572
"matched_rule": {
@@ -3268,11 +3268,11 @@
32683268
"short_name": "CMR License",
32693269
"category": "Permissive",
32703270
"owner": "CMR - Christian Michelsen Research AS",
3271-
"homepage_url": "",
3272-
"text_url": "",
3271+
"homepage_url": null,
3272+
"text_url": null,
32733273
"reference_url": "https://scancode-licensedb.aboutcode.org/cmr-no",
32743274
"spdx_license_key": "",
3275-
"spdx_url": "",
3275+
"spdx_url": null,
32763276
"start_line": 9,
32773277
"end_line": 15,
32783278
"matched_rule": {

tests/formattedcode/data/csv/flatten_scan/minimal.json

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
"category": "Permissive",
3434
"owner": "OpenSSL",
3535
"homepage_url": "http://openssl.org/source/license.html",
36-
"text_url": "",
36+
"text_url": null,
3737
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
3838
"spdx_license_key": "",
39-
"spdx_url": "",
39+
"spdx_url": null,
4040
"start_line": 4,
4141
"end_line": 7,
4242
"matched_rule": {
@@ -78,10 +78,10 @@
7878
"category": "Permissive",
7979
"owner": "OpenSSL",
8080
"homepage_url": "http://openssl.org/source/license.html",
81-
"text_url": "",
81+
"text_url": null,
8282
"reference_url": "https://scancode-licensedb.aboutcode.org/openssl",
8383
"spdx_license_key": "",
84-
"spdx_url": "",
84+
"spdx_url": null,
8585
"start_line": 4,
8686
"end_line": 7,
8787
"matched_rule": {

tests/formattedcode/data/csv/non-standard/identified.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@
6969
"license_choices_expression": null,
7070
"license_choices": [],
7171
"reference_notes": "",
72-
"homepage_url": "",
72+
"homepage_url": null,
7373
"notice_text": "",
7474
"components": [
7575
{

tests/formattedcode/data/reuse/vb.json

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -541,11 +541,11 @@
541541
"is_exception": false,
542542
"is_unknown": false,
543543
"owner": "JBoss Community",
544-
"homepage_url": "",
544+
"homepage_url": null,
545545
"text_url": "http://repository.jboss.org/licenses/jbossorg-eula.txt",
546546
"reference_url": "https://scancode-licensedb.aboutcode.org/jboss-eula",
547547
"spdx_license_key": "",
548-
"spdx_url": "",
548+
"spdx_url": null,
549549
"start_line": 3,
550550
"end_line": 108,
551551
"matched_rule": {
@@ -1557,10 +1557,10 @@
15571557
"is_unknown": false,
15581558
"owner": "Unspecified",
15591559
"homepage_url": "http://www.linfo.org/publicdomain.html",
1560-
"text_url": "",
1560+
"text_url": null,
15611561
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
15621562
"spdx_license_key": "",
1563-
"spdx_url": "",
1563+
"spdx_url": null,
15641564
"start_line": 1649,
15651565
"end_line": 1649,
15661566
"matched_rule": {
@@ -1585,10 +1585,10 @@
15851585
"is_unknown": false,
15861586
"owner": "Unspecified",
15871587
"homepage_url": "http://www.linfo.org/publicdomain.html",
1588-
"text_url": "",
1588+
"text_url": null,
15891589
"reference_url": "https://scancode-licensedb.aboutcode.org/public-domain",
15901590
"spdx_license_key": "",
1591-
"spdx_url": "",
1591+
"spdx_url": null,
15921592
"start_line": 1692,
15931593
"end_line": 1692,
15941594
"matched_rule": {
@@ -2270,11 +2270,11 @@
22702270
"is_exception": true,
22712271
"is_unknown": false,
22722272
"owner": "Dmitriy Anisimkov",
2273-
"homepage_url": "",
2274-
"text_url": "",
2273+
"homepage_url": null,
2274+
"text_url": null,
22752275
"reference_url": "https://scancode-licensedb.aboutcode.org/ada-linking-exception",
22762276
"spdx_license_key": "",
2277-
"spdx_url": "",
2277+
"spdx_url": null,
22782278
"start_line": 6,
22792279
"end_line": 25,
22802280
"matched_rule": {
@@ -2986,11 +2986,11 @@
29862986
"is_exception": false,
29872987
"is_unknown": false,
29882988
"owner": "CMR - Christian Michelsen Research AS",
2989-
"homepage_url": "",
2990-
"text_url": "",
2989+
"homepage_url": null,
2990+
"text_url": null,
29912991
"reference_url": "https://scancode-licensedb.aboutcode.org/cmr-no",
29922992
"spdx_license_key": "",
2993-
"spdx_url": "",
2993+
"spdx_url": null,
29942994
"start_line": 9,
29952995
"end_line": 15,
29962996
"matched_rule": {

0 commit comments

Comments
 (0)