This adds support to generate SPDX 2.2 tag-value documents via the
new west spdx command. The CMake file-based APIs are leveraged to
create relationships from source files to the corresponding
generated build files. SPDX-License-Identifier comments in source
files are scanned and filled into the SPDX documents.
Before `west build` is run, a specific file must be created in the
build directory so that the CMake API reply will run. This can be
done by running:
west spdx --init -d BUILD_DIR
After `west build` is run, SPDX generation is then activated by
calling `west spdx`; currently this requires passing the build
directory as a parameter again:
west spdx -d BUILD_DIR
This will generate three SPDX documents in `BUILD_DIR/spdx/`:
1) `app.spdx`: This contains the bill-of-materials for the
application source files used for the build.
2) `zephyr.spdx`: This contains the bill-of-materials for the
specific Zephyr source code files that are used for the build.
3) `build.spdx`: This contains the bill-of-materials for the built
output files.
Each file in the bill-of-materials is scanned, so that its hashes
(SHA256 and SHA1) can be recorded, along with any detected licenses
if an `SPDX-License-Identifier` appears in the file.
SPDX Relationships are created to indicate dependencies between
CMake build targets; build targets that are linked together; and
source files that are compiled to generate the built library files.
`west spdx` can be called with optional parameters for further
configuration:
* `-n PREFIX`: specifies a prefix for the Document Namespaces that
will be included in the generated SPDX documents. See SPDX spec 2.2
section 2.5 at
https://spdx.github.io/spdx-spec/2-document-creation-information/.
If -n is omitted, a default namespace will be generated according
to the default format described in section 2.5 using a random UUID.
* `-s SPDX_DIR`: specifies an alternate directory where the SPDX
documents should be written. If not specified, they will be saved
in `BUILD_DIR/spdx/`.
* `--analyze-includes`: in addition to recording the compiled
source code files (e.g. `.c`, `.S`) in the bills-of-materials, if
this flag is specified, `west spdx` will attempt to determine the
specific header files that are included for each `.c` file. This
will take longer, as it performs a dry run using the C compiler
for each `.c` file (using the same arguments that were passed to it
for the actual build).
* `--include-sdk`: if `--analyze-includes` is used, then adding
`--include-sdk` will create a fourth SPDX document, `sdk.spdx`,
which will list any header files included from the SDK.
Signed-off-by: Steve Winslow <steve@swinslow.net>
219 lines
6.7 KiB
Python
219 lines
6.7 KiB
Python
# Copyright (c) 2020, 2021 The Linux Foundation
|
|
#
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
import hashlib
|
|
import os
|
|
import re
|
|
|
|
from west import log
|
|
|
|
from zspdx.licenses import LICENSES
|
|
from zspdx.util import getHashes
|
|
|
|
# ScannerConfig contains settings used to configure how the SPDX
|
|
# Document scanning should occur.
|
|
class ScannerConfig:
|
|
def __init__(self):
|
|
super(ScannerConfig, self).__init__()
|
|
|
|
# when assembling a Package's data, should we auto-conclude the
|
|
# Package's license, based on the licenses of its Files?
|
|
self.shouldConcludePackageLicense = True
|
|
|
|
# when assembling a Package's Files' data, should we auto-conclude
|
|
# each File's license, based on its detected license(s)?
|
|
self.shouldConcludeFileLicenses = True
|
|
|
|
# number of lines to scan for SPDX-License-Identifier (0 = all)
|
|
# defaults to 20
|
|
self.numLinesScanned = 20
|
|
|
|
# should we calculate SHA256 hashes for each Package's Files?
|
|
# note that SHA1 hashes are mandatory, per SPDX 2.2
|
|
self.doSHA256 = True
|
|
|
|
# should we calculate MD5 hashes for each Package's Files?
|
|
self.doMD5 = False
|
|
|
|
def parseLineForExpression(line):
|
|
"""Return parsed SPDX expression if tag found in line, or None otherwise."""
|
|
p = line.partition("SPDX-License-Identifier:")
|
|
if p[2] == "":
|
|
return None
|
|
# strip away trailing comment marks and whitespace, if any
|
|
expression = p[2].strip()
|
|
expression = expression.rstrip("/*")
|
|
expression = expression.strip()
|
|
return expression
|
|
|
|
def getExpressionData(filePath, numLines):
|
|
"""
|
|
Scans the specified file for the first SPDX-License-Identifier:
|
|
tag in the file.
|
|
|
|
Arguments:
|
|
- filePath: path to file to scan.
|
|
- numLines: number of lines to scan for an expression before
|
|
giving up. If 0, will scan the entire file.
|
|
Returns: parsed expression if found; None if not found.
|
|
"""
|
|
log.dbg(f" - getting licenses for {filePath}")
|
|
|
|
with open(filePath, "r") as f:
|
|
try:
|
|
lineno = 0
|
|
for line in f:
|
|
lineno += 1
|
|
if lineno > numLines > 0:
|
|
break
|
|
expression = parseLineForExpression(line)
|
|
if expression is not None:
|
|
return expression
|
|
except UnicodeDecodeError:
|
|
# invalid UTF-8 content
|
|
return None
|
|
|
|
# if we get here, we didn't find an expression
|
|
return None
|
|
|
|
def splitExpression(expression):
|
|
"""
|
|
Parse a license expression into its constituent identifiers.
|
|
|
|
Arguments:
|
|
- expression: SPDX license expression
|
|
Returns: array of split identifiers
|
|
"""
|
|
# remove parens and plus sign
|
|
e2 = re.sub(r'\(|\)|\+', "", expression, flags=re.IGNORECASE)
|
|
|
|
# remove word operators, ignoring case, leaving a blank space
|
|
e3 = re.sub(r' AND | OR | WITH ', " ", e2, flags=re.IGNORECASE)
|
|
|
|
# and split on space
|
|
e4 = e3.split(" ")
|
|
|
|
return sorted(e4)
|
|
|
|
def calculateVerificationCode(pkg):
|
|
"""
|
|
Calculate the SPDX Package Verification Code for all files in the package.
|
|
|
|
Arguments:
|
|
- pkg: Package
|
|
Returns: verification code as string
|
|
"""
|
|
hashes = []
|
|
for f in pkg.files.values():
|
|
hashes.append(f.sha1)
|
|
hashes.sort()
|
|
filelist = "".join(hashes)
|
|
|
|
hSHA1 = hashlib.sha1()
|
|
hSHA1.update(filelist.encode('utf-8'))
|
|
return hSHA1.hexdigest()
|
|
|
|
def checkLicenseValid(lic, doc):
|
|
"""
|
|
Check whether this license ID is a valid SPDX license ID, and add it
|
|
to the custom license IDs set for this Document if it isn't.
|
|
|
|
Arguments:
|
|
- lic: detected license ID
|
|
- doc: Document
|
|
"""
|
|
if lic not in LICENSES:
|
|
doc.customLicenseIDs.add(lic)
|
|
|
|
def getPackageLicenses(pkg):
|
|
"""
|
|
Extract lists of all concluded and infoInFile licenses seen.
|
|
|
|
Arguments:
|
|
- pkg: Package
|
|
Returns: sorted list of concluded license exprs,
|
|
sorted list of infoInFile ID's
|
|
"""
|
|
licsConcluded = set()
|
|
licsFromFiles = set()
|
|
for f in pkg.files.values():
|
|
licsConcluded.add(f.concludedLicense)
|
|
for licInfo in f.licenseInfoInFile:
|
|
licsFromFiles.add(licInfo)
|
|
return sorted(list(licsConcluded)), sorted(list(licsFromFiles))
|
|
|
|
def normalizeExpression(licsConcluded):
|
|
"""
|
|
Combine array of license expressions into one AND'd expression,
|
|
adding parens where needed.
|
|
|
|
Arguments:
|
|
- licsConcluded: array of license expressions
|
|
Returns: string with single AND'd expression.
|
|
"""
|
|
# return appropriate for simple cases
|
|
if len(licsConcluded) == 0:
|
|
return "NOASSERTION"
|
|
if len(licsConcluded) == 1:
|
|
return licsConcluded[0]
|
|
|
|
# more than one, so we'll need to combine them
|
|
# iff an expression has spaces, it needs parens
|
|
revised = []
|
|
for lic in licsConcluded:
|
|
if lic in ["NONE", "NOASSERTION"]:
|
|
continue
|
|
if " " in lic:
|
|
revised.append(f"({lic})")
|
|
else:
|
|
revised.append(lic)
|
|
return " AND ".join(revised)
|
|
|
|
def scanDocument(cfg, doc):
|
|
"""
|
|
Scan for licenses and calculate hashes for all Files and Packages
|
|
in this Document.
|
|
|
|
Arguments:
|
|
- cfg: ScannerConfig
|
|
- doc: Document
|
|
"""
|
|
for pkg in doc.pkgs.values():
|
|
log.inf(f"scanning files in package {pkg.cfg.name} in document {doc.cfg.name}")
|
|
|
|
# first, gather File data for this package
|
|
for f in pkg.files.values():
|
|
# set relpath based on package's relativeBaseDir
|
|
f.relpath = os.path.relpath(f.abspath, pkg.cfg.relativeBaseDir)
|
|
|
|
# get hashes for file
|
|
hashes = getHashes(f.abspath)
|
|
if not hashes:
|
|
log.wrn("unable to get hashes for file {f.abspath}; skipping")
|
|
continue
|
|
hSHA1, hSHA256, hMD5 = hashes
|
|
f.sha1 = hSHA1
|
|
if cfg.doSHA256:
|
|
f.sha256 = hSHA256
|
|
if cfg.doMD5:
|
|
f.md5 = hMD5
|
|
|
|
# get licenses for file
|
|
expression = getExpressionData(f.abspath, cfg.numLinesScanned)
|
|
if expression:
|
|
if cfg.shouldConcludeFileLicenses:
|
|
f.concludedLicense = expression
|
|
f.licenseInfoInFile = splitExpression(expression)
|
|
|
|
# check if any custom license IDs should be flagged for document
|
|
for lic in f.licenseInfoInFile:
|
|
checkLicenseValid(lic, doc)
|
|
|
|
# now, assemble the Package data
|
|
licsConcluded, licsFromFiles = getPackageLicenses(pkg)
|
|
if cfg.shouldConcludePackageLicense:
|
|
pkg.concludedLicense = normalizeExpression(licsConcluded)
|
|
pkg.licenseInfoFromFiles = licsFromFiles
|
|
pkg.verificationCode = calculateVerificationCode(pkg)
|