west: spdx: Generate SPDX 2.2 tag-value documents

This adds support to generate SPDX 2.2 tag-value documents via the
new west spdx command. The CMake file-based APIs are leveraged to
create relationships from source files to the corresponding
generated build files. SPDX-License-Identifier comments in source
files are scanned and filled into the SPDX documents.

Before `west build` is run, a specific file must be created in the
build directory so that the CMake API reply will run. This can be
done by running:

    west spdx --init -d BUILD_DIR

After `west build` is run, SPDX generation is then activated by
calling `west spdx`; currently this requires passing the build
directory as a parameter again:

    west spdx -d BUILD_DIR

This will generate three SPDX documents in `BUILD_DIR/spdx/`:

1) `app.spdx`: This contains the bill-of-materials for the
application source files used for the build.

2) `zephyr.spdx`: This contains the bill-of-materials for the
specific Zephyr source code files that are used for the build.

3) `build.spdx`: This contains the bill-of-materials for the built
output files.

Each file in the bill-of-materials is scanned, so that its hashes
(SHA256 and SHA1) can be recorded, along with any detected licenses
if an `SPDX-License-Identifier` appears in the file.

SPDX Relationships are created to indicate dependencies between
CMake build targets; build targets that are linked together; and
source files that are compiled to generate the built library files.

`west spdx` can be called with optional parameters for further
configuration:

* `-n PREFIX`: specifies a prefix for the Document Namespaces that
will be included in the generated SPDX documents. See SPDX spec 2.2
section 2.5 at
https://spdx.github.io/spdx-spec/2-document-creation-information/.
If -n is omitted, a default namespace will be generated according
to the default format described in section 2.5 using a random UUID.

* `-s SPDX_DIR`: specifies an alternate directory where the SPDX
documents should be written. If not specified, they will be saved
in `BUILD_DIR/spdx/`.

* `--analyze-includes`: in addition to recording the compiled
source code files (e.g. `.c`, `.S`) in the bills-of-materials, if
this flag is specified, `west spdx` will attempt to determine the
specific header files that are included for each `.c` file. This
will take longer, as it performs a dry run using the C compiler
for each `.c` file (using the same arguments that were passed to it
for the actual build).

* `--include-sdk`: if `--analyze-includes` is used, then adding
`--include-sdk` will create a fourth SPDX document, `sdk.spdx`,
which will list any header files included from the SDK.

Signed-off-by: Steve Winslow <steve@swinslow.net>
This commit is contained in:
Steve Winslow 2021-04-20 23:04:52 -04:00 committed by Anas Nashif
commit fd31b9b4ac
16 changed files with 2946 additions and 0 deletions

View file

@ -1506,6 +1506,7 @@ West:
- mbolivar-nordic
collaborators:
- carlescufi
- swinslow
files:
- scripts/west-commands.yml
- scripts/west_commands/

View file

@ -41,3 +41,8 @@ west-commands:
- name: zephyr-export
class: ZephyrExport
help: export Zephyr installation as a CMake config package
- file: scripts/west_commands/spdx.py
commands:
- name: spdx
class: ZephyrSpdx
help: create SPDX bill of materials

View file

@ -0,0 +1,112 @@
# Copyright (c) 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import os
import uuid
from west.commands import WestCommand
from west import log
from zspdx.sbom import SBOMConfig, makeSPDX, setupCmakeQuery
SPDX_DESCRIPTION = """\
This command creates an SPDX 2.2 tag-value bill of materials
following the completion of a Zephyr build.
Prior to the build, an empty file must be created at
BUILDDIR/.cmake/api/v1/query/codemodel-v2 in order to enable
the CMake file-based API, which the SPDX command relies upon.
This can be done by calling `west spdx --init` prior to
calling `west build`."""
class ZephyrSpdx(WestCommand):
def __init__(self):
super().__init__(
'spdx',
'create SPDX bill of materials',
SPDX_DESCRIPTION)
def do_add_parser(self, parser_adder):
parser = parser_adder.add_parser(self.name,
help=self.help,
description = self.description)
parser.add_argument('-i', '--init', action="store_true",
help="initialize CMake file-based API")
parser.add_argument('-d', '--build-dir',
help="build directory")
parser.add_argument('-n', '--namespace-prefix',
help="namespace prefix")
parser.add_argument('-s', '--spdx-dir',
help="SPDX output directory")
parser.add_argument('--analyze-includes', action="store_true",
help="also analyze included header files")
parser.add_argument('--include-sdk', action="store_true",
help="also generate SPDX document for SDK")
return parser
def do_run(self, args, unknown_args):
log.dbg(f"running zephyr SPDX generator")
log.dbg(f" --init is", args.init)
log.dbg(f" --build-dir is", args.build_dir)
log.dbg(f" --namespace-prefix is", args.namespace_prefix)
log.dbg(f" --spdx-dir is", args.spdx_dir)
log.dbg(f" --analyze-includes is", args.analyze_includes)
log.dbg(f" --include-sdk is", args.include_sdk)
if args.init:
do_run_init(args)
else:
do_run_spdx(args)
def do_run_init(args):
log.inf("initializing Cmake file-based API prior to build")
if not args.build_dir:
log.die("Build directory not specified; call `west spdx --init --build-dir=BUILD_DIR`")
# initialize CMake file-based API - empty query file
query_ready = setupCmakeQuery(args.build_dir)
if query_ready:
log.inf("initialized; run `west build` then run `west spdx`")
else:
log.err("Couldn't create Cmake file-based API query directory")
log.err("You can manually create an empty file at $BUILDDIR/.cmake/api/v1/query/codemodel-v2")
def do_run_spdx(args):
if not args.build_dir:
log.die("Build directory not specified; call `west spdx --build-dir=BUILD_DIR`")
# create the SPDX files
cfg = SBOMConfig()
cfg.buildDir = args.build_dir
if args.namespace_prefix:
cfg.namespacePrefix = args.namespace_prefix
else:
# create default namespace according to SPDX spec
# note that this is intentionally _not_ an actual URL where
# this document will be stored
cfg.namespacePrefix = f"http://spdx.org/spdxdocs/zephyr-{str(uuid.uuid4())}"
if args.spdx_dir:
cfg.spdxDir = args.spdx_dir
else:
cfg.spdxDir = os.path.join(args.build_dir, "spdx")
if args.analyze_includes:
cfg.analyzeIncludes = True
if args.include_sdk:
cfg.includeSDK = True
# make sure SPDX directory exists, or create it if it doesn't
if os.path.exists(cfg.spdxDir):
if not os.path.isdir(cfg.spdxDir):
log.err(f'SPDX output directory {cfg.spdxDir} exists but is not a directory')
return
# directory exists, we're good
else:
# create the directory
os.makedirs(cfg.spdxDir, exist_ok=False)
makeSPDX(cfg)

View file

@ -0,0 +1,3 @@
# Copyright (c) 2020 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0

View file

@ -0,0 +1,38 @@
# Copyright (c) 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
from west import log
# Parse a CMakeCache file and return a dict of key:value (discarding
# type hints).
def parseCMakeCacheFile(filePath):
log.dbg(f"parsing CMake cache file at {filePath}")
kv = {}
try:
with open(filePath, "r") as f:
# should be a short file, so we'll use readlines
lines = f.readlines()
# walk through and look for non-comment, non-empty lines
for line in lines:
sline = line.strip()
if sline == "":
continue
if sline.startswith("#") or sline.startswith("//"):
continue
# parse out : and = characters
pline1 = sline.split(":", maxsplit=1)
if len(pline1) != 2:
continue
pline2 = pline1[1].split("=", maxsplit=1)
if len(pline2) != 2:
continue
kv[pline1[0]] = pline2[1]
return kv
except OSError as e:
log.err(f"Error loading {filePath}: {str(e)}")
return {}

View file

@ -0,0 +1,306 @@
# Copyright (c) 2020 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
class Codemodel:
def __init__(self):
super(Codemodel, self).__init__()
self.paths_source = ""
self.paths_build = ""
self.configurations = []
def __repr__(self):
return f"Codemodel: source {self.paths_source}, build {self.paths_build}"
# A member of the codemodel configurations array
class Config:
def __init__(self):
super(Config, self).__init__()
self.name = ""
self.directories = []
self.projects = []
self.configTargets = []
def __repr__(self):
if self.name == "":
return f"Config: [no name]"
else:
return f"Config: {self.name}"
# A member of the configuration.directories array
class ConfigDir:
def __init__(self):
super(ConfigDir, self).__init__()
self.source = ""
self.build = ""
self.parentIndex = -1
self.childIndexes = []
self.projectIndex = -1
self.targetIndexes = []
self.minimumCMakeVersion = ""
self.hasInstallRule = False
# actual items, calculated from indices after loading
self.parent = None
self.children = []
self.project = None
self.targets = []
def __repr__(self):
return f"ConfigDir: source {self.source}, build {self.build}"
# A member of the configuration.projects array
class ConfigProject:
def __init__(self):
super(ConfigProject, self).__init__()
self.name = ""
self.parentIndex = -1
self.childIndexes = []
self.directoryIndexes = []
self.targetIndexes = []
# actual items, calculated from indices after loading
self.parent = None
self.children = []
self.directories = []
self.targets = []
def __repr__(self):
return f"ConfigProject: {self.name}"
# A member of the configuration.configTargets array
class ConfigTarget:
def __init__(self):
super(ConfigTarget, self).__init__()
self.name = ""
self.id = ""
self.directoryIndex = -1
self.projectIndex = -1
self.jsonFile = ""
# actual target data, loaded from self.jsonFile
self.target = None
# actual items, calculated from indices after loading
self.directory = None
self.project = None
def __repr__(self):
return f"ConfigTarget: {self.name}"
# The available values for Target.type
class TargetType(Enum):
UNKNOWN = 0
EXECUTABLE = 1
STATIC_LIBRARY = 2
SHARED_LIBRARY = 3
MODULE_LIBRARY = 4
OBJECT_LIBRARY = 5
UTILITY = 6
# A member of the target.install_destinations array
class TargetInstallDestination:
def __init__(self):
super(TargetInstallDestination, self).__init__()
self.path = ""
self.backtrace = -1
def __repr__(self):
return f"TargetInstallDestination: {self.path}"
# A member of the target.link_commandFragments and
# archive_commandFragments array
class TargetCommandFragment:
def __init__(self):
super(TargetCommandFragment, self).__init__()
self.fragment = ""
self.role = ""
def __repr__(self):
return f"TargetCommandFragment: {self.fragment}"
# A member of the target.dependencies array
class TargetDependency:
def __init__(self):
super(TargetDependency, self).__init__()
self.id = ""
self.backtrace = -1
def __repr__(self):
return f"TargetDependency: {self.id}"
# A member of the target.sources array
class TargetSource:
def __init__(self):
super(TargetSource, self).__init__()
self.path = ""
self.compileGroupIndex = -1
self.sourceGroupIndex = -1
self.isGenerated = False
self.backtrace = -1
# actual items, calculated from indices after loading
self.compileGroup = None
self.sourceGroup = None
def __repr__(self):
return f"TargetSource: {self.path}"
# A member of the target.sourceGroups array
class TargetSourceGroup:
def __init__(self):
super(TargetSourceGroup, self).__init__()
self.name = ""
self.sourceIndexes = []
# actual items, calculated from indices after loading
self.sources = []
def __repr__(self):
return f"TargetSourceGroup: {self.name}"
# A member of the target.compileGroups.includes array
class TargetCompileGroupInclude:
def __init__(self):
super(TargetCompileGroupInclude, self).__init__()
self.path = ""
self.isSystem = False
self.backtrace = -1
def __repr__(self):
return f"TargetCompileGroupInclude: {self.path}"
# A member of the target.compileGroups.precompileHeaders array
class TargetCompileGroupPrecompileHeader:
def __init__(self):
super(TargetCompileGroupPrecompileHeader, self).__init__()
self.header = ""
self.backtrace = -1
def __repr__(self):
return f"TargetCompileGroupPrecompileHeader: {self.header}"
# A member of the target.compileGroups.defines array
class TargetCompileGroupDefine:
def __init__(self):
super(TargetCompileGroupDefine, self).__init__()
self.define = ""
self.backtrace = -1
def __repr__(self):
return f"TargetCompileGroupDefine: {self.define}"
# A member of the target.compileGroups array
class TargetCompileGroup:
def __init__(self):
super(TargetCompileGroup, self).__init__()
self.sourceIndexes = []
self.language = ""
self.compileCommandFragments = []
self.includes = []
self.precompileHeaders = []
self.defines = []
self.sysroot = ""
# actual items, calculated from indices after loading
self.sources = []
def __repr__(self):
return f"TargetCompileGroup: {self.sources}"
# A member of the target.backtraceGraph_nodes array
class TargetBacktraceGraphNode:
def __init__(self):
super(TargetBacktraceGraphNode, self).__init__()
self.file = -1
self.line = -1
self.command = -1
self.parent = -1
def __repr__(self):
return f"TargetBacktraceGraphNode: {self.command}"
# Actual data in config.target.target, loaded from
# config.target.jsonFile
class Target:
def __init__(self):
super(Target, self).__init__()
self.name = ""
self.id = ""
self.type = TargetType.UNKNOWN
self.backtrace = -1
self.folder = ""
self.paths_source = ""
self.paths_build = ""
self.nameOnDisk = ""
self.artifacts = []
self.isGeneratorProvided = False
# only if install rule is present
self.install_prefix = ""
self.install_destinations = []
# only for executables and shared library targets that link into
# a runtime binary
self.link_language = ""
self.link_commandFragments = []
self.link_lto = False
self.link_sysroot = ""
# only for static library targets
self.archive_commandFragments = []
self.archive_lto = False
# only if the target depends on other targets
self.dependencies = []
# corresponds to target's source files
self.sources = []
# only if sources are grouped together by source_group() or by default
self.sourceGroups = []
# only if target has sources that compile
self.compileGroups = []
# graph of backtraces referenced from elsewhere
self.backtraceGraph_nodes = []
self.backtraceGraph_commands = []
self.backtraceGraph_files = []
def __repr__(self):
return f"Target: {self.name}"

View file

@ -0,0 +1,436 @@
# Copyright (c) 2020 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import json
import os
from west import log
import zspdx.cmakefileapi
def parseReply(replyIndexPath):
replyDir, _ = os.path.split(replyIndexPath)
# first we need to find the codemodel reply file
try:
with open(replyIndexPath, 'r') as indexFile:
js = json.load(indexFile)
# get reply object
reply_dict = js.get("reply", {})
if reply_dict == {}:
log.err(f"no \"reply\" field found in index file")
return None
# get codemodel object
cm_dict = reply_dict.get("codemodel-v2", {})
if cm_dict == {}:
log.err(f"no \"codemodel-v2\" field found in \"reply\" object in index file")
return None
# and get codemodel filename
jsonFile = cm_dict.get("jsonFile", "")
if jsonFile == "":
log.err(f"no \"jsonFile\" field found in \"codemodel-v2\" object in index file")
return None
return parseCodemodel(replyDir, jsonFile)
except OSError as e:
log.err(f"Error loading {replyIndexPath}: {str(e)}")
return None
except json.decoder.JSONDecodeError as e:
log.err(f"Error parsing JSON in {replyIndexPath}: {str(e)}")
return None
def parseCodemodel(replyDir, codemodelFile):
codemodelPath = os.path.join(replyDir, codemodelFile)
try:
with open(codemodelPath, 'r') as cmFile:
js = json.load(cmFile)
cm = zspdx.cmakefileapi.Codemodel()
# for correctness, check kind and version
kind = js.get("kind", "")
if kind != "codemodel":
log.err(f"Error loading CMake API reply: expected \"kind\":\"codemodel\" in {codemodelPath}, got {kind}")
return None
version = js.get("version", {})
versionMajor = version.get("major", -1)
if versionMajor != 2:
if versionMajor == -1:
log.err(f"Error loading CMake API reply: expected major version 2 in {codemodelPath}, no version found")
return None
log.err(f"Error loading CMake API reply: expected major version 2 in {codemodelPath}, got {versionMajor}")
return None
# get paths
paths_dict = js.get("paths", {})
cm.paths_source = paths_dict.get("source", "")
cm.paths_build = paths_dict.get("build", "")
# get configurations
configs_arr = js.get("configurations", [])
for cfg_dict in configs_arr:
cfg = parseConfig(cfg_dict, replyDir)
if cfg:
cm.configurations.append(cfg)
# and after parsing is done, link all the indices
linkCodemodel(cm)
return cm
except OSError as e:
log.err(f"Error loading {codemodelPath}: {str(e)}")
return None
except json.decoder.JSONDecodeError as e:
log.err(f"Error parsing JSON in {codemodelPath}: {str(e)}")
return None
def parseConfig(cfg_dict, replyDir):
cfg = zspdx.cmakefileapi.Config()
cfg.name = cfg_dict.get("name", "")
# parse and add each directory
dirs_arr = cfg_dict.get("directories", [])
for dir_dict in dirs_arr:
if dir_dict != {}:
cfgdir = zspdx.cmakefileapi.ConfigDir()
cfgdir.source = dir_dict.get("source", "")
cfgdir.build = dir_dict.get("build", "")
cfgdir.parentIndex = dir_dict.get("parentIndex", -1)
cfgdir.childIndexes = dir_dict.get("childIndexes", [])
cfgdir.projectIndex = dir_dict.get("projecttIndex", -1)
cfgdir.targetIndexes = dir_dict.get("targetIndexes", [])
minCMakeVer_dict = dir_dict.get("minimumCMakeVersion", {})
cfgdir.minimumCMakeVersion = minCMakeVer_dict.get("string", "")
cfgdir.hasInstallRule = dir_dict.get("hasInstallRule", False)
cfg.directories.append(cfgdir)
# parse and add each project
projects_arr = cfg_dict.get("projects", [])
for prj_dict in projects_arr:
if prj_dict != {}:
prj = zspdx.cmakefileapi.ConfigProject()
prj.name = prj_dict.get("name", "")
prj.parentIndex = prj_dict.get("parentIndex", -1)
prj.childIndexes = prj_dict.get("childIndexes", [])
prj.directoryIndexes = prj_dict.get("directoryIndexes", [])
prj.targetIndexes = prj_dict.get("targetIndexes", [])
cfg.projects.append(prj)
# parse and add each target
cfgTargets_arr = cfg_dict.get("targets", [])
for cfgTarget_dict in cfgTargets_arr:
if cfgTarget_dict != {}:
cfgTarget = zspdx.cmakefileapi.ConfigTarget()
cfgTarget.name = cfgTarget_dict.get("name", "")
cfgTarget.id = cfgTarget_dict.get("id", "")
cfgTarget.directoryIndex = cfgTarget_dict.get("directoryIndex", -1)
cfgTarget.projectIndex = cfgTarget_dict.get("projectIndex", -1)
cfgTarget.jsonFile = cfgTarget_dict.get("jsonFile", "")
if cfgTarget.jsonFile != "":
cfgTarget.target = parseTarget(os.path.join(replyDir, cfgTarget.jsonFile))
else:
cfgTarget.target = None
cfg.configTargets.append(cfgTarget)
return cfg
def parseTarget(targetPath):
try:
with open(targetPath, 'r') as targetFile:
js = json.load(targetFile)
target = zspdx.cmakefileapi.Target()
target.name = js.get("name", "")
target.id = js.get("id", "")
target.type = parseTargetType(js.get("type", "UNKNOWN"))
target.backtrace = js.get("backtrace", -1)
target.folder = js.get("folder", "")
# get paths
paths_dict = js.get("paths", {})
target.paths_source = paths_dict.get("source", "")
target.paths_build = paths_dict.get("build", "")
target.nameOnDisk = js.get("nameOnDisk", "")
# parse artifacts if present
artifacts_arr = js.get("artifacts", [])
target.artifacts = []
for artifact_dict in artifacts_arr:
artifact_path = artifact_dict.get("path", "")
if artifact_path != "":
target.artifacts.append(artifact_path)
target.isGeneratorProvided = js.get("isGeneratorProvided", False)
# call separate functions to parse subsections
parseTargetInstall(target, js)
parseTargetLink(target, js)
parseTargetArchive(target, js)
parseTargetDependencies(target, js)
parseTargetSources(target, js)
parseTargetSourceGroups(target, js)
parseTargetCompileGroups(target, js)
parseTargetBacktraceGraph(target, js)
return target
except OSError as e:
log.err(f"Error loading {targetPath}: {str(e)}")
return None
except json.decoder.JSONDecodeError as e:
log.err(f"Error parsing JSON in {targetPath}: {str(e)}")
return None
def parseTargetType(targetType):
if targetType == "EXECUTABLE":
return zspdx.cmakefileapi.TargetType.EXECUTABLE
elif targetType == "STATIC_LIBRARY":
return zspdx.cmakefileapi.TargetType.STATIC_LIBRARY
elif targetType == "SHARED_LIBRARY":
return zspdx.cmakefileapi.TargetType.SHARED_LIBRARY
elif targetType == "MODULE_LIBRARY":
return zspdx.cmakefileapi.TargetType.MODULE_LIBRARY
elif targetType == "OBJECT_LIBRARY":
return zspdx.cmakefileapi.TargetType.OBJECT_LIBRARY
elif targetType == "UTILITY":
return zspdx.cmakefileapi.TargetType.UTILITY
else:
return zspdx.cmakefileapi.TargetType.UNKNOWN
def parseTargetInstall(target, js):
install_dict = js.get("install", {})
if install_dict == {}:
return
prefix_dict = install_dict.get("prefix", {})
target.install_prefix = prefix_dict.get("path", "")
destinations_arr = install_dict.get("destinations", [])
for destination_dict in destinations_arr:
dest = zspdx.cmakefileapi.TargetInstallDestination()
dest.path = destination_dict.get("path", "")
dest.backtrace = destination_dict.get("backtrace", -1)
target.install_destinations.append(dest)
def parseTargetLink(target, js):
link_dict = js.get("link", {})
if link_dict == {}:
return
target.link_language = link_dict.get("language", {})
target.link_lto = link_dict.get("lto", False)
sysroot_dict = link_dict.get("sysroot", {})
target.link_sysroot = sysroot_dict.get("path", "")
fragments_arr = link_dict.get("commandFragments", [])
for fragment_dict in fragments_arr:
fragment = zspdx.cmakefileapi.TargetCommandFragment()
fragment.fragment = fragment_dict.get("fragment", "")
fragment.role = fragment_dict.get("role", "")
target.link_commandFragments.append(fragment)
def parseTargetArchive(target, js):
archive_dict = js.get("archive", {})
if archive_dict == {}:
return
target.archive_lto = archive_dict.get("lto", False)
fragments_arr = archive_dict.get("commandFragments", [])
for fragment_dict in fragments_arr:
fragment = zspdx.cmakefileapi.TargetCommandFragment()
fragment.fragment = fragment_dict.get("fragment", "")
fragment.role = fragment_dict.get("role", "")
target.archive_commandFragments.append(fragment)
def parseTargetDependencies(target, js):
dependencies_arr = js.get("dependencies", [])
for dependency_dict in dependencies_arr:
dep = zspdx.cmakefileapi.TargetDependency()
dep.id = dependency_dict.get("id", "")
dep.backtrace = dependency_dict.get("backtrace", -1)
target.dependencies.append(dep)
def parseTargetSources(target, js):
sources_arr = js.get("sources", [])
for source_dict in sources_arr:
src = zspdx.cmakefileapi.TargetSource()
src.path = source_dict.get("path", "")
src.compileGroupIndex = source_dict.get("compileGroupIndex", -1)
src.sourceGroupIndex = source_dict.get("sourceGroupIndex", -1)
src.isGenerated = source_dict.get("isGenerated", False)
src.backtrace = source_dict.get("backtrace", -1)
target.sources.append(src)
def parseTargetSourceGroups(target, js):
sourceGroups_arr = js.get("sourceGroups", [])
for sourceGroup_dict in sourceGroups_arr:
srcgrp = zspdx.cmakefileapi.TargetSourceGroup()
srcgrp.name = sourceGroup_dict.get("name", "")
srcgrp.sourceIndexes = sourceGroup_dict.get("sourceIndexes", [])
target.sourceGroups.append(srcgrp)
def parseTargetCompileGroups(target, js):
compileGroups_arr = js.get("compileGroups", [])
for compileGroup_dict in compileGroups_arr:
cmpgrp = zspdx.cmakefileapi.TargetCompileGroup()
cmpgrp.sourceIndexes = compileGroup_dict.get("sourceIndexes", [])
cmpgrp.language = compileGroup_dict.get("language", "")
cmpgrp.sysroot = compileGroup_dict.get("sysroot", "")
commandFragments_arr = compileGroup_dict.get("compileCommandFragments", [])
for commandFragment_dict in commandFragments_arr:
fragment = commandFragment_dict.get("fragment", "")
if fragment != "":
cmpgrp.compileCommandFragments.append(fragment)
includes_arr = compileGroup_dict.get("includes", [])
for include_dict in includes_arr:
grpInclude = zspdx.cmakefileapi.TargetCompileGroupInclude()
grpInclude.path = include_dict.get("path", "")
grpInclude.isSystem = include_dict.get("isSystem", False)
grpInclude.backtrace = include_dict.get("backtrace", -1)
cmpgrp.includes.append(grpInclude)
precompileHeaders_arr = compileGroup_dict.get("precompileHeaders", [])
for precompileHeader_dict in precompileHeaders_arr:
grpHeader = zspdx.cmakefileapi.TargetCompileGroupPrecompileHeader()
grpHeader.header = precompileHeader_dict.get("header", "")
grpHeader.backtrace = precompileHeader_dict.get("backtrace", -1)
cmpgrp.precompileHeaders.append(grpHeader)
defines_arr = compileGroup_dict.get("defines", [])
for define_dict in defines_arr:
grpDefine = zspdx.cmakefileapi.TargetCompileGroupDefine()
grpDefine.define = define_dict.get("define", "")
grpDefine.backtrace = define_dict.get("backtrace", -1)
cmpgrp.defines.append(grpDefine)
target.compileGroups.append(cmpgrp)
def parseTargetBacktraceGraph(target, js):
backtraceGraph_dict = js.get("backtraceGraph", {})
if backtraceGraph_dict == {}:
return
target.backtraceGraph_commands = backtraceGraph_dict.get("commands", [])
target.backtraceGraph_files = backtraceGraph_dict.get("files", [])
nodes_arr = backtraceGraph_dict.get("nodes", [])
for node_dict in nodes_arr:
node = zspdx.cmakefileapi.TargetBacktraceGraphNode()
node.file = node_dict.get("file", -1)
node.line = node_dict.get("line", -1)
node.command = node_dict.get("command", -1)
node.parent = node_dict.get("parent", -1)
target.backtraceGraph_nodes.append(node)
# Create direct pointers for all Configs in Codemodel
# takes: Codemodel
def linkCodemodel(cm):
for cfg in cm.configurations:
linkConfig(cfg)
# Create direct pointers for all contents of Config
# takes: Config
def linkConfig(cfg):
for cfgDir in cfg.directories:
linkConfigDir(cfg, cfgDir)
for cfgPrj in cfg.projects:
linkConfigProject(cfg, cfgPrj)
for cfgTarget in cfg.configTargets:
linkConfigTarget(cfg, cfgTarget)
# Create direct pointers for ConfigDir indices
# takes: Config and ConfigDir
def linkConfigDir(cfg, cfgDir):
if cfgDir.parentIndex == -1:
cfgDir.parent = None
else:
cfgDir.parent = cfg.directories[cfgDir.parentIndex]
if cfgDir.projectIndex == -1:
cfgDir.project = None
else:
cfgDir.project = cfg.projects[cfgDir.projectIndex]
cfgDir.children = []
for childIndex in cfgDir.childIndexes:
cfgDir.children.append(cfg.directories[childIndex])
cfgDir.targets = []
for targetIndex in cfgDir.targetIndexes:
cfgDir.targets.append(cfg.configTargets[targetIndex])
# Create direct pointers for ConfigProject indices
# takes: Config and ConfigProject
def linkConfigProject(cfg, cfgPrj):
if cfgPrj.parentIndex == -1:
cfgPrj.parent = None
else:
cfgPrj.parent = cfg.projects[cfgPrj.parentIndex]
cfgPrj.children = []
for childIndex in cfgPrj.childIndexes:
cfgPrj.children.append(cfg.projects[childIndex])
cfgPrj.directories = []
for dirIndex in cfgPrj.directoryIndexes:
cfgPrj.directories.append(cfg.directories[dirIndex])
cfgPrj.targets = []
for targetIndex in cfgPrj.targetIndexes:
cfgPrj.targets.append(cfg.configTargets[targetIndex])
# Create direct pointers for ConfigTarget indices
# takes: Config and ConfigTarget
def linkConfigTarget(cfg, cfgTarget):
if cfgTarget.directoryIndex == -1:
cfgTarget.directory = None
else:
cfgTarget.directory = cfg.directories[cfgTarget.directoryIndex]
if cfgTarget.projectIndex == -1:
cfgTarget.project = None
else:
cfgTarget.project = cfg.projects[cfgTarget.projectIndex]
# and link target's sources and source groups
for ts in cfgTarget.target.sources:
linkTargetSource(cfgTarget.target, ts)
for tsg in cfgTarget.target.sourceGroups:
linkTargetSourceGroup(cfgTarget.target, tsg)
for tcg in cfgTarget.target.compileGroups:
linkTargetCompileGroup(cfgTarget.target, tcg)
# Create direct pointers for TargetSource indices
# takes: Target and TargetSource
def linkTargetSource(target, targetSrc):
if targetSrc.compileGroupIndex == -1:
targetSrc.compileGroup = None
else:
targetSrc.compileGroup = target.compileGroups[targetSrc.compileGroupIndex]
if targetSrc.sourceGroupIndex == -1:
targetSrc.sourceGroup = None
else:
targetSrc.sourceGroup = target.sourceGroups[targetSrc.sourceGroupIndex]
# Create direct pointers for TargetSourceGroup indices
# takes: Target and TargetSourceGroup
def linkTargetSourceGroup(target, targetSrcGrp):
targetSrcGrp.sources = []
for srcIndex in targetSrcGrp.sourceIndexes:
targetSrcGrp.sources.append(target.sources[srcIndex])
# Create direct pointers for TargetCompileGroup indices
# takes: Target and TargetCompileGroup
def linkTargetCompileGroup(target, targetCmpGrp):
targetCmpGrp.sources = []
for srcIndex in targetCmpGrp.sourceIndexes:
targetCmpGrp.sources.append(target.sources[srcIndex])

View file

@ -0,0 +1,231 @@
# Copyright (c) 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
from enum import Enum
# DocumentConfig contains settings used to configure how the SPDX Document
# should be built.
class DocumentConfig:
def __init__(self):
super(DocumentConfig, self).__init__()
# name of document
self.name = ""
# namespace for this document
self.namespace = ""
# standardized DocumentRef- (including that prefix) that the other
# docs will use to refer to this one
self.docRefID = ""
# Document contains the data assembled by the SBOM builder, to be used to
# create the actual SPDX Document.
class Document:
# initialize with a DocumentConfig
def __init__(self, cfg):
super(Document, self).__init__()
# configuration - DocumentConfig
self.cfg = cfg
# dict of SPDX ID => Package
self.pkgs = {}
# relationships "owned" by this Document, _not_ those "owned" by its
# Packages or Files; will likely be just DESCRIBES
self.relationships = []
# dict of filename (ignoring its directory) => number of times it has
# been seen while adding files to this Document; used to calculate
# useful SPDX IDs
self.timesSeen = {}
# dict of absolute path on disk => File
self.fileLinks = {}
# set of other Documents that our elements' Relationships refer to
self.externalDocuments = set()
# set of LicenseRef- custom licenses to be declared
# may or may not include "LicenseRef-" license prefix
self.customLicenseIDs = set()
# this Document's SHA1 hash, filled in _after_ the Document has been
# written to disk, so that others can refer to it
self.myDocSHA1 = ""
# PackageConfig contains settings used to configure how an SPDX Package should
# be built.
class PackageConfig:
def __init__(self):
super(PackageConfig, self).__init__()
# package name
self.name = ""
# SPDX ID, including "SPDXRef-"
self.spdxID = ""
# the Package's declared license
self.declaredLicense = "NOASSERTION"
# the Package's copyright text
self.copyrightText = "NOASSERTION"
# absolute path of the "root" directory on disk, to be used as the
# base directory from which this Package's Files will calculate their
# relative paths
# may want to note this in a Package comment field
self.relativeBaseDir = ""
# Package contains the data assembled by the SBOM builder, to be used to
# create the actual SPDX Package.
class Package:
# initialize with:
# 1) PackageConfig
# 2) the Document that owns this Package
def __init__(self, cfg, doc):
super(Package, self).__init__()
# configuration - PackageConfig
self.cfg = cfg
# Document that owns this Package
self.doc = doc
# verification code, calculated per section 3.9 of SPDX spec v2.2
self.verificationCode = ""
# concluded license for this Package, if
# cfg.shouldConcludePackageLicense == True; NOASSERTION otherwise
self.concludedLicense = "NOASSERTION"
# list of licenses found in this Package's Files
self.licenseInfoFromFiles = []
# Files in this Package
# dict of SPDX ID => File
self.files = {}
# Relationships "owned" by this Package (e.g., this Package is left
# side)
self.rlns = []
# If this Package was a target, which File was its main build product?
self.targetBuildFile = None
# RelationshipDataElementType defines whether a RelationshipData element
# (e.g., the "owner" or the "other" element) is a File, a target Package,
# a Package's ID (as other only, and only where owner type is DOCUMENT),
# or the SPDX document itself (as owner only).
class RelationshipDataElementType(Enum):
UNKNOWN = 0
FILENAME = 1
TARGETNAME = 2
PACKAGEID = 3
DOCUMENT = 4
# RelationshipData contains the pre-analysis data about a relationship between
# Files and/or Packages/targets. It is eventually parsed into a corresponding
# Relationship after we have organized the SPDX Package and File data.
class RelationshipData:
def __init__(self):
super(RelationshipData, self).__init__()
# for the "owner" element (e.g., the left side of the Relationship),
# is it a filename or a target name (e.g., a Package in the build doc)
self.ownerType = RelationshipDataElementType.UNKNOWN
# owner file absolute path (if ownerType is FILENAME)
self.ownerFileAbspath = ""
# owner target name (if ownerType is TARGETNAME)
self.ownerTargetName = ""
# owner SPDX Document (if ownerType is DOCUMENT)
self.ownerDocument = None
# for the "other" element (e.g., the right side of the Relationship),
# is it a filename or a target name (e.g., a Package in the build doc)
self.otherType = RelationshipDataElementType.UNKNOWN
# other file absolute path (if otherType is FILENAME)
self.otherFileAbspath = ""
# other target name (if otherType is TARGETNAME)
self.otherTargetName = ""
# other package ID (if ownerType is DOCUMENT and otherType is PACKAGEID)
self.otherPackageID = ""
# text string with Relationship type
# from table in section 7.1 of SPDX spec v2.2
self.rlnType = ""
# Relationship contains the post-analysis, processed data about a relationship
# in a form suitable for creating the actual SPDX Relationship in a particular
# Document's context.
class Relationship:
def __init__(self):
super(Relationship, self).__init__()
# SPDX ID for left side of relationship
# including "SPDXRef-" as well as "DocumentRef-" if needed
self.refA = ""
# SPDX ID for right side of relationship
# including "SPDXRef-" as well as "DocumentRef-" if needed
self.refB = ""
# text string with Relationship type
# from table in section 7.1 of SPDX spec v2.2
self.rlnType = ""
# File contains the data needed to create a File element in the context of a
# particular SPDX Document and Package.
class File:
# initialize with:
# 1) Document containing this File
# 2) Package containing this File
def __init__(self, doc, pkg):
super(File, self).__init__()
# absolute path to this file on disk
self.abspath = ""
# relative path for this file, measured from the owning Package's
# cfg.relativeBaseDir
self.relpath = ""
# SPDX ID for this file, including "SPDXRef-"
self.spdxID = ""
# SHA1 hash
self.sha1 = ""
# SHA256 hash, if pkg.cfg.doSHA256 == True; empty string otherwise
self.sha256 = ""
# MD5 hash, if pkg.cfg.doMD5 == True; empty string otherwise
self.md5 = ""
# concluded license, if pkg.cfg.shouldConcludeFileLicenses == True;
# "NOASSERTION" otherwise
self.concludedLicense = "NOASSERTION"
# license info in file
self.licenseInfoInFile = []
# copyright text
self.copyrightText = "NOASSERTION"
# Relationships "owned" by this File (e.g., this File is left side)
self.rlns = []
# Package that owns this File
self.pkg = pkg
# Document that owns this File
self.doc = doc

View file

@ -0,0 +1,65 @@
# Copyright (c) 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
from subprocess import run, PIPE
from west import log
# Given a path to the applicable C compiler, a C source file, and the
# corresponding TargetCompileGroup, determine which include files would
# be used.
# Arguments:
# 1) path to applicable C compiler
# 2) C source file being analyzed
# 3) TargetCompileGroup for the current target
# Returns: list of paths to include files, or [] on error or empty findings.
def getCIncludes(compilerPath, srcFile, tcg):
log.dbg(f" - getting includes for {srcFile}")
# prepare fragments
fragments = [fr for fr in tcg.compileCommandFragments if fr.strip() != ""]
# prepare include arguments
includes = ["-I" + incl.path for incl in tcg.includes]
# prepare defines
defines = ["-D" + d.define for d in tcg.defines]
# prepare command invocation
cmd = [compilerPath, "-E", "-H"] + fragments + includes + defines + [srcFile]
cp = run(cmd, stdout=PIPE, stderr=PIPE, universal_newlines=True)
if cp.returncode != 0:
log.dbg(f" - calling {compilerPath} failed with error code {cp.returncode}")
return []
else:
# response will be in cp.stderr, not cp.stdout
return extractIncludes(cp.stderr)
# Parse the response from the CC -E -H call, to extract the include file paths
def extractIncludes(resp):
includes = set()
# lines we want will start with one or more periods, followed by
# a space and then the include file path, e.g.:
# .... /home/steve/programming/zephyr/zephyrproject/zephyr/include/kernel.h
# the number of periods indicates the depth of nesting (for transitively-
# included files), but here we aren't going to care about that. We'll
# treat everything as tied to the corresponding source file.
# once we hit the line "Multiple include guards may be useful for:",
# we're done; ignore everything after that
for rline in resp.splitlines():
if rline.startswith("Multiple include guards"):
break
if rline[0] == ".":
sline = rline.split(" ", maxsplit=1)
if len(sline) != 2:
continue
includes.add(sline[1])
includesList = list(includes)
includesList.sort()
return includesList

View file

@ -0,0 +1,509 @@
# Copyright (c) 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
# from https://github.com/spdx/license-list-data/
LICENSE_LIST_VERSION = "3.12"
LICENSES = [
"0BSD",
"389-exception",
"AAL",
"ADSL",
"AFL-1.1",
"AFL-1.2",
"AFL-2.0",
"AFL-2.1",
"AFL-3.0",
"AGPL-1.0",
"AGPL-1.0-only",
"AGPL-1.0-or-later",
"AGPL-3.0",
"AGPL-3.0-only",
"AGPL-3.0-or-later",
"AMDPLPA",
"AML",
"AMPAS",
"ANTLR-PD",
"ANTLR-PD-fallback",
"APAFML",
"APL-1.0",
"APSL-1.0",
"APSL-1.1",
"APSL-1.2",
"APSL-2.0",
"Abstyles",
"Adobe-2006",
"Adobe-Glyph",
"Afmparse",
"Aladdin",
"Apache-1.0",
"Apache-1.1",
"Apache-2.0",
"Artistic-1.0",
"Artistic-1.0-Perl",
"Artistic-1.0-cl8",
"Artistic-2.0",
"Autoconf-exception-2.0",
"Autoconf-exception-3.0",
"BSD-1-Clause",
"BSD-2-Clause",
"BSD-2-Clause-FreeBSD",
"BSD-2-Clause-NetBSD",
"BSD-2-Clause-Patent",
"BSD-2-Clause-Views",
"BSD-3-Clause",
"BSD-3-Clause-Attribution",
"BSD-3-Clause-Clear",
"BSD-3-Clause-LBNL",
"BSD-3-Clause-Modification",
"BSD-3-Clause-No-Nuclear-License",
"BSD-3-Clause-No-Nuclear-License-2014",
"BSD-3-Clause-No-Nuclear-Warranty",
"BSD-3-Clause-Open-MPI",
"BSD-4-Clause",
"BSD-4-Clause-Shortened",
"BSD-4-Clause-UC",
"BSD-Protection",
"BSD-Source-Code",
"BSL-1.0",
"BUSL-1.1",
"Bahyph",
"Barr",
"Beerware",
"Bison-exception-2.2",
"BitTorrent-1.0",
"BitTorrent-1.1",
"BlueOak-1.0.0",
"Bootloader-exception",
"Borceux",
"C-UDA-1.0",
"CAL-1.0",
"CAL-1.0-Combined-Work-Exception",
"CATOSL-1.1",
"CC-BY-1.0",
"CC-BY-2.0",
"CC-BY-2.5",
"CC-BY-3.0",
"CC-BY-3.0-AT",
"CC-BY-3.0-US",
"CC-BY-4.0",
"CC-BY-NC-1.0",
"CC-BY-NC-2.0",
"CC-BY-NC-2.5",
"CC-BY-NC-3.0",
"CC-BY-NC-4.0",
"CC-BY-NC-ND-1.0",
"CC-BY-NC-ND-2.0",
"CC-BY-NC-ND-2.5",
"CC-BY-NC-ND-3.0",
"CC-BY-NC-ND-3.0-IGO",
"CC-BY-NC-ND-4.0",
"CC-BY-NC-SA-1.0",
"CC-BY-NC-SA-2.0",
"CC-BY-NC-SA-2.5",
"CC-BY-NC-SA-3.0",
"CC-BY-NC-SA-4.0",
"CC-BY-ND-1.0",
"CC-BY-ND-2.0",
"CC-BY-ND-2.5",
"CC-BY-ND-3.0",
"CC-BY-ND-4.0",
"CC-BY-SA-1.0",
"CC-BY-SA-2.0",
"CC-BY-SA-2.0-UK",
"CC-BY-SA-2.1-JP",
"CC-BY-SA-2.5",
"CC-BY-SA-3.0",
"CC-BY-SA-3.0-AT",
"CC-BY-SA-4.0",
"CC-PDDC",
"CC0-1.0",
"CDDL-1.0",
"CDDL-1.1",
"CDLA-Permissive-1.0",
"CDLA-Sharing-1.0",
"CECILL-1.0",
"CECILL-1.1",
"CECILL-2.0",
"CECILL-2.1",
"CECILL-B",
"CECILL-C",
"CERN-OHL-1.1",
"CERN-OHL-1.2",
"CERN-OHL-P-2.0",
"CERN-OHL-S-2.0",
"CERN-OHL-W-2.0",
"CLISP-exception-2.0",
"CNRI-Jython",
"CNRI-Python",
"CNRI-Python-GPL-Compatible",
"CPAL-1.0",
"CPL-1.0",
"CPOL-1.02",
"CUA-OPL-1.0",
"Caldera",
"ClArtistic",
"Classpath-exception-2.0",
"Condor-1.1",
"Crossword",
"CrystalStacker",
"Cube",
"D-FSL-1.0",
"DOC",
"DRL-1.0",
"DSDP",
"DigiRule-FOSS-exception",
"Dotseqn",
"ECL-1.0",
"ECL-2.0",
"EFL-1.0",
"EFL-2.0",
"EPICS",
"EPL-1.0",
"EPL-2.0",
"EUDatagrid",
"EUPL-1.0",
"EUPL-1.1",
"EUPL-1.2",
"Entessa",
"ErlPL-1.1",
"Eurosym",
"FLTK-exception",
"FSFAP",
"FSFUL",
"FSFULLR",
"FTL",
"Fair",
"Fawkes-Runtime-exception",
"Font-exception-2.0",
"Frameworx-1.0",
"FreeBSD-DOC",
"FreeImage",
"GCC-exception-2.0",
"GCC-exception-3.1",
"GD",
"GFDL-1.1",
"GFDL-1.1-invariants-only",
"GFDL-1.1-invariants-or-later",
"GFDL-1.1-no-invariants-only",
"GFDL-1.1-no-invariants-or-later",
"GFDL-1.1-only",
"GFDL-1.1-or-later",
"GFDL-1.2",
"GFDL-1.2-invariants-only",
"GFDL-1.2-invariants-or-later",
"GFDL-1.2-no-invariants-only",
"GFDL-1.2-no-invariants-or-later",
"GFDL-1.2-only",
"GFDL-1.2-or-later",
"GFDL-1.3",
"GFDL-1.3-invariants-only",
"GFDL-1.3-invariants-or-later",
"GFDL-1.3-no-invariants-only",
"GFDL-1.3-no-invariants-or-later",
"GFDL-1.3-only",
"GFDL-1.3-or-later",
"GL2PS",
"GLWTPL",
"GPL-1.0",
"GPL-1.0+",
"GPL-1.0-only",
"GPL-1.0-or-later",
"GPL-2.0",
"GPL-2.0+",
"GPL-2.0-only",
"GPL-2.0-or-later",
"GPL-2.0-with-GCC-exception",
"GPL-2.0-with-autoconf-exception",
"GPL-2.0-with-bison-exception",
"GPL-2.0-with-classpath-exception",
"GPL-2.0-with-font-exception",
"GPL-3.0",
"GPL-3.0+",
"GPL-3.0-linking-exception",
"GPL-3.0-linking-source-exception",
"GPL-3.0-only",
"GPL-3.0-or-later",
"GPL-3.0-with-GCC-exception",
"GPL-3.0-with-autoconf-exception",
"GPL-CC-1.0",
"Giftware",
"Glide",
"Glulxe",
"HPND",
"HPND-sell-variant",
"HTMLTIDY",
"HaskellReport",
"Hippocratic-2.1",
"IBM-pibs",
"ICU",
"IJG",
"IPA",
"IPL-1.0",
"ISC",
"ImageMagick",
"Imlib2",
"Info-ZIP",
"Intel",
"Intel-ACPI",
"Interbase-1.0",
"JPNIC",
"JSON",
"JasPer-2.0",
"LAL-1.2",
"LAL-1.3",
"LGPL-2.0",
"LGPL-2.0+",
"LGPL-2.0-only",
"LGPL-2.0-or-later",
"LGPL-2.1",
"LGPL-2.1+",
"LGPL-2.1-only",
"LGPL-2.1-or-later",
"LGPL-3.0",
"LGPL-3.0+",
"LGPL-3.0-linking-exception",
"LGPL-3.0-only",
"LGPL-3.0-or-later",
"LGPLLR",
"LLVM-exception",
"LPL-1.0",
"LPL-1.02",
"LPPL-1.0",
"LPPL-1.1",
"LPPL-1.2",
"LPPL-1.3a",
"LPPL-1.3c",
"LZMA-exception",
"Latex2e",
"Leptonica",
"LiLiQ-P-1.1",
"LiLiQ-R-1.1",
"LiLiQ-Rplus-1.1",
"Libpng",
"Libtool-exception",
"Linux-OpenIB",
"Linux-syscall-note",
"MIT",
"MIT-0",
"MIT-CMU",
"MIT-Modern-Variant",
"MIT-advertising",
"MIT-enna",
"MIT-feh",
"MIT-open-group",
"MITNFA",
"MPL-1.0",
"MPL-1.1",
"MPL-2.0",
"MPL-2.0-no-copyleft-exception",
"MS-PL",
"MS-RL",
"MTLL",
"MakeIndex",
"MirOS",
"Motosoto",
"MulanPSL-1.0",
"MulanPSL-2.0",
"Multics",
"Mup",
"NAIST-2003",
"NASA-1.3",
"NBPL-1.0",
"NCGL-UK-2.0",
"NCSA",
"NGPL",
"NIST-PD",
"NIST-PD-fallback",
"NLOD-1.0",
"NLPL",
"NOSL",
"NPL-1.0",
"NPL-1.1",
"NPOSL-3.0",
"NRL",
"NTP",
"NTP-0",
"Naumen",
"Net-SNMP",
"NetCDF",
"Newsletr",
"Nokia",
"Nokia-Qt-exception-1.1",
"Noweb",
"Nunit",
"O-UDA-1.0",
"OCCT-PL",
"OCCT-exception-1.0",
"OCLC-2.0",
"OCaml-LGPL-linking-exception",
"ODC-By-1.0",
"ODbL-1.0",
"OFL-1.0",
"OFL-1.0-RFN",
"OFL-1.0-no-RFN",
"OFL-1.1",
"OFL-1.1-RFN",
"OFL-1.1-no-RFN",
"OGC-1.0",
"OGDL-Taiwan-1.0",
"OGL-Canada-2.0",
"OGL-UK-1.0",
"OGL-UK-2.0",
"OGL-UK-3.0",
"OGTSL",
"OLDAP-1.1",
"OLDAP-1.2",
"OLDAP-1.3",
"OLDAP-1.4",
"OLDAP-2.0",
"OLDAP-2.0.1",
"OLDAP-2.1",
"OLDAP-2.2",
"OLDAP-2.2.1",
"OLDAP-2.2.2",
"OLDAP-2.3",
"OLDAP-2.4",
"OLDAP-2.5",
"OLDAP-2.6",
"OLDAP-2.7",
"OLDAP-2.8",
"OML",
"OPL-1.0",
"OSET-PL-2.1",
"OSL-1.0",
"OSL-1.1",
"OSL-2.0",
"OSL-2.1",
"OSL-3.0",
"OpenJDK-assembly-exception-1.0",
"OpenSSL",
"PDDL-1.0",
"PHP-3.0",
"PHP-3.01",
"PS-or-PDF-font-exception-20170817",
"PSF-2.0",
"Parity-6.0.0",
"Parity-7.0.0",
"Plexus",
"PolyForm-Noncommercial-1.0.0",
"PolyForm-Small-Business-1.0.0",
"PostgreSQL",
"Python-2.0",
"QPL-1.0",
"Qhull",
"Qt-GPL-exception-1.0",
"Qt-LGPL-exception-1.1",
"Qwt-exception-1.0",
"RHeCos-1.1",
"RPL-1.1",
"RPL-1.5",
"RPSL-1.0",
"RSA-MD",
"RSCPL",
"Rdisc",
"Ruby",
"SAX-PD",
"SCEA",
"SGI-B-1.0",
"SGI-B-1.1",
"SGI-B-2.0",
"SHL-0.5",
"SHL-0.51",
"SHL-2.0",
"SHL-2.1",
"SISSL",
"SISSL-1.2",
"SMLNJ",
"SMPPL",
"SNIA",
"SPL-1.0",
"SSH-OpenSSH",
"SSH-short",
"SSPL-1.0",
"SWL",
"Saxpath",
"Sendmail",
"Sendmail-8.23",
"SimPL-2.0",
"Sleepycat",
"Spencer-86",
"Spencer-94",
"Spencer-99",
"StandardML-NJ",
"SugarCRM-1.1.3",
"Swift-exception",
"TAPR-OHL-1.0",
"TCL",
"TCP-wrappers",
"TMate",
"TORQUE-1.1",
"TOSL",
"TU-Berlin-1.0",
"TU-Berlin-2.0",
"UCL-1.0",
"UPL-1.0",
"Unicode-DFS-2015",
"Unicode-DFS-2016",
"Unicode-TOU",
"Universal-FOSS-exception-1.0",
"Unlicense",
"VOSTROM",
"VSL-1.0",
"Vim",
"W3C",
"W3C-19980720",
"W3C-20150513",
"WTFPL",
"Watcom-1.0",
"Wsuipa",
"WxWindows-exception-3.1",
"X11",
"XFree86-1.1",
"XSkat",
"Xerox",
"Xnet",
"YPL-1.0",
"YPL-1.1",
"ZPL-1.1",
"ZPL-2.0",
"ZPL-2.1",
"Zed",
"Zend-2.0",
"Zimbra-1.3",
"Zimbra-1.4",
"Zlib",
"blessing",
"bzip2-1.0.5",
"bzip2-1.0.6",
"copyleft-next-0.3.0",
"copyleft-next-0.3.1",
"curl",
"diffmark",
"dvipdfm",
"eCos-2.0",
"eCos-exception-2.0",
"eGenix",
"etalab-2.0",
"freertos-exception-2.0",
"gSOAP-1.3b",
"gnu-javamail-exception",
"gnuplot",
"i2p-gpl-java-exception",
"iMatix",
"libpng-2.0",
"libselinux-1.0",
"libtiff",
"mif-exception",
"mpich2",
"openvpn-openssl-exception",
"psfrag",
"psutils",
"u-boot-exception-2.0",
"wxWindows",
"xinetd",
"xpp",
"zlib-acknowledgement",
]

View file

@ -0,0 +1,123 @@
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import os
from west import log
from zspdx.walker import WalkerConfig, Walker
from zspdx.scanner import ScannerConfig, scanDocument
from zspdx.writer import writeSPDX
# SBOMConfig contains settings that will be passed along to the various
# SBOM maker subcomponents.
class SBOMConfig:
def __init__(self):
super(SBOMConfig, self).__init__()
# prefix for Document namespaces; should not end with "/"
self.namespacePrefix = ""
# location of build directory
self.buildDir = ""
# location of SPDX document output directory
self.spdxDir = ""
# should also analyze for included header files?
self.analyzeIncludes = False
# should also add an SPDX document for the SDK?
self.includeSDK = False
# create Cmake file-based API directories and query file
# Arguments:
# 1) build_dir: build directory
def setupCmakeQuery(build_dir):
# check that query dir exists as a directory, or else create it
cmakeApiDirPath = os.path.join(build_dir, ".cmake", "api", "v1", "query")
if os.path.exists(cmakeApiDirPath):
if not os.path.isdir(cmakeApiDirPath):
log.err(f'cmake api query directory {cmakeApiDirPath} exists and is not a directory')
return False
# directory exists, we're good
else:
# create the directory
os.makedirs(cmakeApiDirPath, exist_ok=False)
# check that codemodel-v2 exists as a file, or else create it
queryFilePath = os.path.join(cmakeApiDirPath, "codemodel-v2")
if os.path.exists(queryFilePath):
if not os.path.isfile(queryFilePath):
log.err(f'cmake api query file {queryFilePath} exists and is not a directory')
return False
# file exists, we're good
return True
else:
# file doesn't exist, let's create it
os.mknod(queryFilePath)
return True
# main entry point for SBOM maker
# Arguments:
# 1) cfg: SBOMConfig
def makeSPDX(cfg):
# report any odd configuration settings
if cfg.analyzeIncludes and not cfg.includeSDK:
log.wrn(f"config: requested to analyze includes but not to generate SDK SPDX document;")
log.wrn(f"config: will proceed but will discard detected includes for SDK header files")
# set up walker configuration
walkerCfg = WalkerConfig()
walkerCfg.namespacePrefix = cfg.namespacePrefix
walkerCfg.buildDir = cfg.buildDir
walkerCfg.analyzeIncludes = cfg.analyzeIncludes
walkerCfg.includeSDK = cfg.includeSDK
# make and run the walker
w = Walker(walkerCfg)
retval = w.makeDocuments()
if not retval:
log.err("SPDX walker failed; bailing")
return False
# set up scanner configuration
scannerCfg = ScannerConfig()
# scan each document from walker
if cfg.includeSDK:
scanDocument(scannerCfg, w.docSDK)
scanDocument(scannerCfg, w.docApp)
scanDocument(scannerCfg, w.docZephyr)
scanDocument(scannerCfg, w.docBuild)
# write each document, in this particular order so that the
# hashes for external references are calculated
# write SDK document, if we made one
if cfg.includeSDK:
retval = writeSPDX(os.path.join(cfg.spdxDir, "sdk.spdx"), w.docSDK)
if not retval:
log.err("SPDX writer failed for SDK document; bailing")
return False
# write app document
retval = writeSPDX(os.path.join(cfg.spdxDir, "app.spdx"), w.docApp)
if not retval:
log.err("SPDX writer failed for app document; bailing")
return False
# write zephyr document
writeSPDX(os.path.join(cfg.spdxDir, "zephyr.spdx"), w.docZephyr)
if not retval:
log.err("SPDX writer failed for zephyr document; bailing")
return False
# write build document
writeSPDX(os.path.join(cfg.spdxDir, "build.spdx"), w.docBuild)
if not retval:
log.err("SPDX writer failed for build document; bailing")
return False
return True

View file

@ -0,0 +1,218 @@
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import hashlib
import os
import re
from west import log
from zspdx.licenses import LICENSES
from zspdx.util import getHashes
# ScannerConfig contains settings used to configure how the SPDX
# Document scanning should occur.
class ScannerConfig:
def __init__(self):
super(ScannerConfig, self).__init__()
# when assembling a Package's data, should we auto-conclude the
# Package's license, based on the licenses of its Files?
self.shouldConcludePackageLicense = True
# when assembling a Package's Files' data, should we auto-conclude
# each File's license, based on its detected license(s)?
self.shouldConcludeFileLicenses = True
# number of lines to scan for SPDX-License-Identifier (0 = all)
# defaults to 20
self.numLinesScanned = 20
# should we calculate SHA256 hashes for each Package's Files?
# note that SHA1 hashes are mandatory, per SPDX 2.2
self.doSHA256 = True
# should we calculate MD5 hashes for each Package's Files?
self.doMD5 = False
def parseLineForExpression(line):
"""Return parsed SPDX expression if tag found in line, or None otherwise."""
p = line.partition("SPDX-License-Identifier:")
if p[2] == "":
return None
# strip away trailing comment marks and whitespace, if any
expression = p[2].strip()
expression = expression.rstrip("/*")
expression = expression.strip()
return expression
def getExpressionData(filePath, numLines):
"""
Scans the specified file for the first SPDX-License-Identifier:
tag in the file.
Arguments:
- filePath: path to file to scan.
- numLines: number of lines to scan for an expression before
giving up. If 0, will scan the entire file.
Returns: parsed expression if found; None if not found.
"""
log.dbg(f" - getting licenses for {filePath}")
with open(filePath, "r") as f:
try:
lineno = 0
for line in f:
lineno += 1
if lineno > numLines > 0:
break
expression = parseLineForExpression(line)
if expression is not None:
return expression
except UnicodeDecodeError:
# invalid UTF-8 content
return None
# if we get here, we didn't find an expression
return None
def splitExpression(expression):
"""
Parse a license expression into its constituent identifiers.
Arguments:
- expression: SPDX license expression
Returns: array of split identifiers
"""
# remove parens and plus sign
e2 = re.sub(r'\(|\)|\+', "", expression, flags=re.IGNORECASE)
# remove word operators, ignoring case, leaving a blank space
e3 = re.sub(r' AND | OR | WITH ', " ", e2, flags=re.IGNORECASE)
# and split on space
e4 = e3.split(" ")
return sorted(e4)
def calculateVerificationCode(pkg):
"""
Calculate the SPDX Package Verification Code for all files in the package.
Arguments:
- pkg: Package
Returns: verification code as string
"""
hashes = []
for f in pkg.files.values():
hashes.append(f.sha1)
hashes.sort()
filelist = "".join(hashes)
hSHA1 = hashlib.sha1()
hSHA1.update(filelist.encode('utf-8'))
return hSHA1.hexdigest()
def checkLicenseValid(lic, doc):
"""
Check whether this license ID is a valid SPDX license ID, and add it
to the custom license IDs set for this Document if it isn't.
Arguments:
- lic: detected license ID
- doc: Document
"""
if lic not in LICENSES:
doc.customLicenseIDs.add(lic)
def getPackageLicenses(pkg):
"""
Extract lists of all concluded and infoInFile licenses seen.
Arguments:
- pkg: Package
Returns: sorted list of concluded license exprs,
sorted list of infoInFile ID's
"""
licsConcluded = set()
licsFromFiles = set()
for f in pkg.files.values():
licsConcluded.add(f.concludedLicense)
for licInfo in f.licenseInfoInFile:
licsFromFiles.add(licInfo)
return sorted(list(licsConcluded)), sorted(list(licsFromFiles))
def normalizeExpression(licsConcluded):
"""
Combine array of license expressions into one AND'd expression,
adding parens where needed.
Arguments:
- licsConcluded: array of license expressions
Returns: string with single AND'd expression.
"""
# return appropriate for simple cases
if len(licsConcluded) == 0:
return "NOASSERTION"
if len(licsConcluded) == 1:
return licsConcluded[0]
# more than one, so we'll need to combine them
# iff an expression has spaces, it needs parens
revised = []
for lic in licsConcluded:
if lic in ["NONE", "NOASSERTION"]:
continue
if " " in lic:
revised.append(f"({lic})")
else:
revised.append(lic)
return " AND ".join(revised)
def scanDocument(cfg, doc):
"""
Scan for licenses and calculate hashes for all Files and Packages
in this Document.
Arguments:
- cfg: ScannerConfig
- doc: Document
"""
for pkg in doc.pkgs.values():
log.inf(f"scanning files in package {pkg.cfg.name} in document {doc.cfg.name}")
# first, gather File data for this package
for f in pkg.files.values():
# set relpath based on package's relativeBaseDir
f.relpath = os.path.relpath(f.abspath, pkg.cfg.relativeBaseDir)
# get hashes for file
hashes = getHashes(f.abspath)
if not hashes:
log.wrn("unable to get hashes for file {f.abspath}; skipping")
continue
hSHA1, hSHA256, hMD5 = hashes
f.sha1 = hSHA1
if cfg.doSHA256:
f.sha256 = hSHA256
if cfg.doMD5:
f.md5 = hMD5
# get licenses for file
expression = getExpressionData(f.abspath, cfg.numLinesScanned)
if expression:
if cfg.shouldConcludeFileLicenses:
f.concludedLicense = expression
f.licenseInfoInFile = splitExpression(expression)
# check if any custom license IDs should be flagged for document
for lic in f.licenseInfoInFile:
checkLicenseValid(lic, doc)
# now, assemble the Package data
licsConcluded, licsFromFiles = getPackageLicenses(pkg)
if cfg.shouldConcludePackageLicense:
pkg.concludedLicense = normalizeExpression(licsConcluded)
pkg.licenseInfoFromFiles = licsFromFiles
pkg.verificationCode = calculateVerificationCode(pkg)

View file

@ -0,0 +1,61 @@
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import re
def getSPDXIDSafeCharacter(c):
"""
Converts a character to an SPDX-ID-safe character.
Arguments:
- c: character to test
Returns: c if it is SPDX-ID-safe (letter, number, '-' or '.');
'-' otherwise
"""
if c.isalpha() or c.isdigit() or c == "-" or c == ".":
return c
return "-"
def convertToSPDXIDSafe(s):
"""
Converts a filename or other string to only SPDX-ID-safe characters.
Note that a separate check (such as in getUniqueID, below) will need
to be used to confirm that this is still a unique identifier, after
conversion.
Arguments:
- s: string to be converted.
Returns: string with all non-safe characters replaced with dashes.
"""
return "".join([getSPDXIDSafeCharacter(c) for c in s])
def getUniqueFileID(filenameOnly, timesSeen):
"""
Find an SPDX ID that is unique among others seen so far.
Arguments:
- filenameOnly: filename only (directories omitted) seeking ID.
- timesSeen: dict of all filename-only to number of times seen.
Returns: unique SPDX ID; updates timesSeen to include it.
"""
converted = convertToSPDXIDSafe(filenameOnly)
spdxID = f"SPDXRef-File-{converted}"
# determine whether spdxID is unique so far, or not
filenameTimesSeen = timesSeen.get(converted, 0) + 1
if filenameTimesSeen > 1:
# we'll append the # of times seen to the end
spdxID += f"-{filenameTimesSeen}"
else:
# first time seeing this filename
# edge case: if the filename itself ends in "-{number}", then we
# need to add a "-1" to it, so that we don't end up overlapping
# with an appended number from a similarly-named file.
p = re.compile(r"-\d+$")
if p.search(converted):
spdxID += "-1"
timesSeen[converted] = filenameTimesSeen
return spdxID

View file

@ -0,0 +1,33 @@
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import hashlib
from west import log
def getHashes(filePath):
"""
Scan for and return hashes.
Arguments:
- filePath: path to file to scan.
Returns: tuple of (SHA1, SHA256, MD5) hashes for filePath, or
None if file is not found.
"""
hSHA1 = hashlib.sha1()
hSHA256 = hashlib.sha256()
hMD5 = hashlib.md5()
log.dbg(f" - getting hashes for {filePath}")
try:
with open(filePath, 'rb') as f:
buf = f.read()
hSHA1.update(buf)
hSHA256.update(buf)
hMD5.update(buf)
except OSError:
return None
return (hSHA1.hexdigest(), hSHA256.hexdigest(), hMD5.hexdigest())

View file

@ -0,0 +1,652 @@
# Copyright (c) 2020-2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
import os
from west import log
from west.util import west_topdir, WestNotFound
from zspdx.cmakecache import parseCMakeCacheFile
from zspdx.cmakefileapijson import parseReply
from zspdx.datatypes import DocumentConfig, Document, File, PackageConfig, Package, RelationshipDataElementType, RelationshipData, Relationship
from zspdx.getincludes import getCIncludes
import zspdx.spdxids
# WalkerConfig contains configuration data for the Walker.
class WalkerConfig:
def __init__(self):
super(WalkerConfig, self).__init__()
# prefix for Document namespaces; should not end with "/"
self.namespacePrefix = ""
# location of build directory
self.buildDir = ""
# should also analyze for included header files?
self.analyzeIncludes = False
# should also add an SPDX document for the SDK?
self.includeSDK = False
# Walker is the main analysis class: it walks through the CMake codemodel,
# build files, and corresponding source and SDK files, and gathers the
# information needed to build the SPDX data classes.
class Walker:
# initialize with WalkerConfig
def __init__(self, cfg):
super(Walker, self).__init__()
# configuration - WalkerConfig
self.cfg = cfg
# the various Documents that we will be building
self.docBuild = None
self.docZephyr = None
self.docApp = None
self.docSDK = None
# dict of absolute file path => the Document that owns that file
self.allFileLinks = {}
# queue of pending source Files to create, process and assign
self.pendingSources = []
# queue of pending relationships to create, process and assign
self.pendingRelationships = []
# parsed CMake codemodel
self.cm = None
# parsed CMake cache dict, once we have the build path
self.cmakeCache = {}
# C compiler path from parsed CMake cache
self.compilerPath = ""
# SDK install path from parsed CMake cache
self.sdkPath = ""
# primary entry point
def makeDocuments(self):
# parse CMake cache file and get compiler path
log.inf("parsing CMake Cache file")
self.getCacheFile()
# parse codemodel from Walker cfg's build dir
log.inf("parsing CMake Codemodel files")
self.cm = self.getCodemodel()
if not self.cm:
log.err("could not parse codemodel from CMake API reply; bailing")
return False
# set up Documents
log.inf("setting up SPDX documents")
retval = self.setupDocuments()
if not retval:
return False
# walk through targets in codemodel to gather information
log.inf("walking through targets")
self.walkTargets()
# walk through pending sources and create corresponding files
log.inf("walking through pending sources files")
self.walkPendingSources()
# walk through pending relationship data and create relationships
log.inf("walking through pending relationships")
self.walkRelationships()
return True
# parse cache file and pull out relevant data
def getCacheFile(self):
cacheFilePath = os.path.join(self.cfg.buildDir, "CMakeCache.txt")
self.cmakeCache = parseCMakeCacheFile(cacheFilePath)
if self.cmakeCache:
self.compilerPath = self.cmakeCache.get("CMAKE_C_COMPILER", "")
self.sdkPath = self.cmakeCache.get("ZEPHYR_SDK_INSTALL_DIR", "")
# determine path from build dir to CMake file-based API index file, then
# parse it and return the Codemodel
def getCodemodel(self):
log.dbg("getting codemodel from CMake API reply files")
# make sure the reply directory exists
cmakeReplyDirPath = os.path.join(self.cfg.buildDir, ".cmake", "api", "v1", "reply")
if not os.path.exists(cmakeReplyDirPath):
log.err(f'cmake api reply directory {cmakeReplyDirPath} does not exist')
log.err('was query directory created before cmake build ran?')
return None
if not os.path.isdir(cmakeReplyDirPath):
log.err(f'cmake api reply directory {cmakeReplyDirPath} exists but is not a directory')
return None
# find file with "index" prefix; there should only be one
indexFilePath = ""
for f in os.listdir(cmakeReplyDirPath):
if f.startswith("index"):
indexFilePath = os.path.join(cmakeReplyDirPath, f)
break
if indexFilePath == "":
# didn't find it
log.err(f'cmake api reply index file not found in {cmakeReplyDirPath}')
return None
# parse it
return parseReply(indexFilePath)
# set up Documents before beginning
def setupDocuments(self):
log.dbg("setting up placeholder documents")
# set up build document
cfgBuild = DocumentConfig()
cfgBuild.name = "build"
cfgBuild.namespace = self.cfg.namespacePrefix + "/build"
cfgBuild.docRefID = "DocumentRef-build"
self.docBuild = Document(cfgBuild)
# we'll create the build packages in walkTargets()
# the DESCRIBES relationship for the build document will be
# with the zephyr_final package
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.DOCUMENT
rd.ownerDocument = self.docBuild
rd.otherType = RelationshipDataElementType.TARGETNAME
rd.otherTargetName = "zephyr_final"
rd.rlnType = "DESCRIBES"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# set up zephyr document
cfgZephyr = DocumentConfig()
cfgZephyr.name = "zephyr-sources"
cfgZephyr.namespace = self.cfg.namespacePrefix + "/zephyr"
cfgZephyr.docRefID = "DocumentRef-zephyr"
self.docZephyr = Document(cfgZephyr)
# also set up zephyr sources package
cfgPackageZephyr = PackageConfig()
cfgPackageZephyr.name = "zephyr-sources"
cfgPackageZephyr.spdxID = "SPDXRef-zephyr-sources"
# relativeBaseDir is Zephyr sources topdir
try:
cfgPackageZephyr.relativeBaseDir = west_topdir(self.cm.paths_source)
except WestNotFound:
log.err(f"cannot find west_topdir for CMake Codemodel sources path {self.cm.paths_source}; bailing")
return False
pkgZephyr = Package(cfgPackageZephyr, self.docZephyr)
self.docZephyr.pkgs[pkgZephyr.cfg.spdxID] = pkgZephyr
# create DESCRIBES relationship data
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.DOCUMENT
rd.ownerDocument = self.docZephyr
rd.otherType = RelationshipDataElementType.PACKAGEID
rd.otherPackageID = cfgPackageZephyr.spdxID
rd.rlnType = "DESCRIBES"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# set up app document
cfgApp = DocumentConfig()
cfgApp.name = "app-sources"
cfgApp.namespace = self.cfg.namespacePrefix + "/app"
cfgApp.docRefID = "DocumentRef-app"
self.docApp = Document(cfgApp)
# also set up app sources package
cfgPackageApp = PackageConfig()
cfgPackageApp.name = "app-sources"
cfgPackageApp.spdxID = "SPDXRef-app-sources"
# relativeBaseDir is app sources dir
cfgPackageApp.relativeBaseDir = self.cm.paths_source
pkgApp = Package(cfgPackageApp, self.docApp)
self.docApp.pkgs[pkgApp.cfg.spdxID] = pkgApp
# create DESCRIBES relationship data
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.DOCUMENT
rd.ownerDocument = self.docApp
rd.otherType = RelationshipDataElementType.PACKAGEID
rd.otherPackageID = cfgPackageApp.spdxID
rd.rlnType = "DESCRIBES"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
if self.cfg.includeSDK:
# set up SDK document
cfgSDK = DocumentConfig()
cfgSDK.name = "sdk"
cfgSDK.namespace = self.cfg.namespacePrefix + "/sdk"
cfgSDK.docRefID = "DocumentRef-sdk"
self.docSDK = Document(cfgSDK)
# also set up zephyr sdk package
cfgPackageSDK = PackageConfig()
cfgPackageSDK.name = "sdk"
cfgPackageSDK.spdxID = "SPDXRef-sdk"
# relativeBaseDir is SDK dir
cfgPackageSDK.relativeBaseDir = self.sdkPath
pkgSDK = Package(cfgPackageSDK, self.docSDK)
self.docSDK.pkgs[pkgSDK.cfg.spdxID] = pkgSDK
# create DESCRIBES relationship data
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.DOCUMENT
rd.ownerDocument = self.docSDK
rd.otherType = RelationshipDataElementType.PACKAGEID
rd.otherPackageID = cfgPackageSDK.spdxID
rd.rlnType = "DESCRIBES"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
return True
# walk through targets and gather information
def walkTargets(self):
log.dbg("walking targets from codemodel")
# assuming just one configuration; consider whether this is incorrect
cfgTargets = self.cm.configurations[0].configTargets
for cfgTarget in cfgTargets:
# build the Package for this target
pkg = self.initConfigTargetPackage(cfgTarget)
# see whether this target has any build artifacts at all
if len(cfgTarget.target.artifacts) > 0:
# add its build file
bf = self.addBuildFile(cfgTarget, pkg)
# get its source files
self.collectPendingSourceFiles(cfgTarget, pkg, bf)
else:
log.dbg(f" - target {cfgTarget.name} has no build artifacts")
# get its target dependencies
self.collectTargetDependencies(cfgTargets, cfgTarget, pkg)
# build a Package in the Build doc for the given ConfigTarget
def initConfigTargetPackage(self, cfgTarget):
log.dbg(f" - initializing Package for target: {cfgTarget.name}")
# create target Package's config
cfg = PackageConfig()
cfg.name = cfgTarget.name
cfg.spdxID = "SPDXRef-" + zspdx.spdxids.convertToSPDXIDSafe(cfgTarget.name)
cfg.relativeBaseDir = self.cm.paths_build
# build Package
pkg = Package(cfg, self.docBuild)
# add Package to build Document
self.docBuild.pkgs[cfg.spdxID] = pkg
return pkg
# create a target's build product File and add it to its Package
# call with:
# 1) ConfigTarget
# 2) Package for that target
# returns: File
def addBuildFile(self, cfgTarget, pkg):
# assumes only one artifact in each target
artifactPath = os.path.join(pkg.cfg.relativeBaseDir, cfgTarget.target.artifacts[0])
log.dbg(f" - adding File {artifactPath}")
log.dbg(f" - relativeBaseDir: {pkg.cfg.relativeBaseDir}")
log.dbg(f" - artifacts[0]: {cfgTarget.target.artifacts[0]}")
# create build File
bf = File(self.docBuild, pkg)
bf.abspath = artifactPath
bf.relpath = cfgTarget.target.artifacts[0]
# can use nameOnDisk b/c it is just the filename w/out directory paths
bf.spdxID = zspdx.spdxids.getUniqueFileID(cfgTarget.target.nameOnDisk, self.docBuild.timesSeen)
# don't fill hashes / licenses / rlns now, we'll do that after walking
# add File to Package
pkg.files[bf.spdxID] = bf
# add file path link to Document and global links
self.docBuild.fileLinks[bf.abspath] = bf
self.allFileLinks[bf.abspath] = self.docBuild
# also set this file as the target package's build product file
pkg.targetBuildFile = bf
return bf
# collect a target's source files, add to pending sources queue, and
# create pending relationship data entry
# call with:
# 1) ConfigTarget
# 2) Package for that target
# 3) build File for that target
def collectPendingSourceFiles(self, cfgTarget, pkg, bf):
log.dbg(f" - collecting source files and adding to pending queue")
targetIncludesSet = set()
# walk through target's sources
for src in cfgTarget.target.sources:
log.dbg(f" - add pending source file and relationship for {src.path}")
# get absolute path if we don't have it
srcAbspath = src.path
if not os.path.isabs(src.path):
srcAbspath = os.path.join(self.cm.paths_source, src.path)
# check whether it even exists
if not (os.path.exists(srcAbspath) and os.path.isfile(srcAbspath)):
log.dbg(f" - {srcAbspath} does not exist but is referenced in sources for target {pkg.cfg.name}; skipping")
continue
# add it to pending source files queue
self.pendingSources.append(srcAbspath)
# create relationship data
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.FILENAME
rd.ownerFileAbspath = bf.abspath
rd.otherType = RelationshipDataElementType.FILENAME
rd.otherFileAbspath = srcAbspath
rd.rlnType = "GENERATED_FROM"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# collect this source file's includes
if self.cfg.analyzeIncludes and self.compilerPath:
includes = self.collectIncludes(cfgTarget, pkg, bf, src)
for inc in includes:
targetIncludesSet.add(inc)
# make relationships for the overall included files,
# avoiding duplicates for multiple source files including
# the same headers
targetIncludesList = list(targetIncludesSet)
targetIncludesList.sort()
for inc in targetIncludesList:
# add it to pending source files queue
self.pendingSources.append(inc)
# create relationship data
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.FILENAME
rd.ownerFileAbspath = bf.abspath
rd.otherType = RelationshipDataElementType.FILENAME
rd.otherFileAbspath = inc
rd.rlnType = "GENERATED_FROM"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# collect the include files corresponding to this source file
# call with:
# 1) ConfigTarget
# 2) Package for this target
# 3) build File for this target
# 4) TargetSource entry for this source file
# returns: sorted list of include files for this source file
def collectIncludes(self, cfgTarget, pkg, bf, src):
# get the right compile group for this source file
if len(cfgTarget.target.compileGroups) < (src.compileGroupIndex + 1):
log.dbg(f" - {cfgTarget.target.name} has compileGroupIndex {src.compileGroupIndex} but only {len(cfgTarget.target.compileGroups)} found; skipping included files search")
return []
cg = cfgTarget.target.compileGroups[src.compileGroupIndex]
# currently only doing C includes
if cg.language != "C":
log.dbg(f" - {cfgTarget.target.name} has compile group language {cg.language} but currently only searching includes for C files; skipping included files search")
return []
srcAbspath = src.path
if src.path[0] != "/":
srcAbspath = os.path.join(self.cm.paths_source, src.path)
return getCIncludes(self.compilerPath, srcAbspath, cg)
# collect relationships for dependencies of this target Package
# call with:
# 1) all ConfigTargets from CodeModel
# 2) this particular ConfigTarget
# 3) Package for this Target
def collectTargetDependencies(self, cfgTargets, cfgTarget, pkg):
log.dbg(f" - collecting target dependencies for {pkg.cfg.name}")
# walk through target's dependencies
for dep in cfgTarget.target.dependencies:
# extract dep name from its id
depFragments = dep.id.split(":")
depName = depFragments[0]
log.dbg(f" - adding pending relationship for {depName}")
# create relationship data between dependency packages
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.TARGETNAME
rd.ownerTargetName = pkg.cfg.name
rd.otherType = RelationshipDataElementType.TARGETNAME
rd.otherTargetName = depName
rd.rlnType = "HAS_PREREQUISITE"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# if this is a target with any build artifacts (e.g. non-UTILITY),
# also create STATIC_LINK relationship for dependency build files,
# together with this Package's own target build file
if len(cfgTarget.target.artifacts) == 0:
continue
# find the filename for the dependency's build product, using the
# codemodel (since we might not have created this dependency's
# Package or File yet)
depAbspath = ""
for ct in cfgTargets:
if ct.name == depName:
# skip utility targets
if len(ct.target.artifacts) == 0:
continue
# all targets use the same relativeBaseDir, so this works
# even though pkg is the owner package
depAbspath = os.path.join(pkg.cfg.relativeBaseDir, ct.target.artifacts[0])
break
if depAbspath == "":
continue
# create relationship data between build files
rd = RelationshipData()
rd.ownerType = RelationshipDataElementType.FILENAME
rd.ownerFileAbspath = pkg.targetBuildFile.abspath
rd.otherType = RelationshipDataElementType.FILENAME
rd.otherFileAbspath = depAbspath
rd.rlnType = "STATIC_LINK"
# add it to pending relationships queue
self.pendingRelationships.append(rd)
# walk through pending sources and create corresponding files,
# assigning them to the appropriate Document and Package
def walkPendingSources(self):
log.dbg(f"walking pending sources")
# only one package in each doc; get it
pkgZephyr = list(self.docZephyr.pkgs.values())[0]
pkgApp = list(self.docApp.pkgs.values())[0]
if self.cfg.includeSDK:
pkgSDK = list(self.docSDK.pkgs.values())[0]
for srcAbspath in self.pendingSources:
# check whether we've already seen it
srcDoc = self.allFileLinks.get(srcAbspath, None)
srcPkg = None
if srcDoc:
log.dbg(f" - {srcAbspath}: already seen, assigned to {srcDoc.cfg.name}")
continue
# not yet assigned; figure out where it goes
pkgBuild = self.findBuildPackage(srcAbspath)
if pkgBuild:
log.dbg(f" - {srcAbspath}: assigning to build document, package {pkgBuild.cfg.name}")
srcDoc = self.docBuild
srcPkg = pkgBuild
elif self.cfg.includeSDK and os.path.commonpath([srcAbspath, pkgSDK.cfg.relativeBaseDir]) == pkgSDK.cfg.relativeBaseDir:
log.dbg(f" - {srcAbspath}: assigning to sdk document")
srcDoc = self.docSDK
srcPkg = pkgSDK
elif os.path.commonpath([srcAbspath, pkgApp.cfg.relativeBaseDir]) == pkgApp.cfg.relativeBaseDir:
log.dbg(f" - {srcAbspath}: assigning to app document")
srcDoc = self.docApp
srcPkg = pkgApp
elif os.path.commonpath([srcAbspath, pkgZephyr.cfg.relativeBaseDir]) == pkgZephyr.cfg.relativeBaseDir:
log.dbg(f" - {srcAbspath}: assigning to zephyr document")
srcDoc = self.docZephyr
srcPkg = pkgZephyr
else:
log.dbg(f" - {srcAbspath}: can't determine which document should own; skipping")
continue
# create File and assign it to the Package and Document
sf = File(srcDoc, srcPkg)
sf.abspath = srcAbspath
sf.relpath = os.path.relpath(srcAbspath, srcPkg.cfg.relativeBaseDir)
filenameOnly = os.path.split(srcAbspath)[1]
sf.spdxID = zspdx.spdxids.getUniqueFileID(filenameOnly, srcDoc.timesSeen)
# don't fill hashes / licenses / rlns now, we'll do that after walking
# add File to Package
srcPkg.files[sf.spdxID] = sf
# add file path link to Document and global links
srcDoc.fileLinks[sf.abspath] = sf
self.allFileLinks[sf.abspath] = srcDoc
# figure out which build Package contains the given file, if any
# call with:
# 1) absolute path for source filename being searched
def findBuildPackage(self, srcAbspath):
# Multiple target Packages might "contain" the file path, if they
# are nested. If so, the one with the longest path would be the
# most deeply-nested target directory, so that's the one which
# should get the file path.
pkgLongestMatch = None
for pkg in self.docBuild.pkgs.values():
if os.path.commonpath([srcAbspath, pkg.cfg.relativeBaseDir]) == pkg.cfg.relativeBaseDir:
# the package does contain this file; is it the deepest?
if pkgLongestMatch:
if len(pkg.cfg.relativeBaseDir) > len(pkgLongestMatch.cfg.relativeBaseDir):
pkgLongestMatch = pkg
else:
# first package containing it, so assign it
pkgLongestMatch = pkg
return pkgLongestMatch
# walk through pending RelationshipData entries, create corresponding
# Relationships, and assign them to the applicable Files / Packages
def walkRelationships(self):
for rlnData in self.pendingRelationships:
rln = Relationship()
# get left side of relationship data
docA, spdxIDA, rlnsA = self.getRelationshipLeft(rlnData)
if not docA or not spdxIDA:
continue
rln.refA = spdxIDA
# get right side of relationship data
spdxIDB = self.getRelationshipRight(rlnData, docA)
if not spdxIDB:
continue
rln.refB = spdxIDB
rln.rlnType = rlnData.rlnType
rlnsA.append(rln)
log.dbg(f" - adding relationship to {docA.cfg.name}: {rln.refA} {rln.rlnType} {rln.refB}")
# get owner (left side) document and SPDX ID of Relationship for given RelationshipData
# returns: doc, spdxID, rlnsArray (for either Document, Package, or File, as applicable)
def getRelationshipLeft(self, rlnData):
if rlnData.ownerType == RelationshipDataElementType.FILENAME:
# find the document for this file abspath, and then the specific file's ID
ownerDoc = self.allFileLinks.get(rlnData.ownerFileAbspath, None)
if not ownerDoc:
log.dbg(f" - searching for relationship, can't find document with file {rlnData.ownerFileAbspath}; skipping")
return None, None, None
sf = ownerDoc.fileLinks.get(rlnData.ownerFileAbspath, None)
if not sf:
log.dbg(f" - searching for relationship for file {rlnData.ownerFileAbspath} points to document {ownerDoc.cfg.name} but file not found; skipping")
return None, None, None
# found it
if not sf.spdxID:
log.dbg(f" - searching for relationship for file {rlnData.ownerFileAbspath} found file, but empty ID; skipping")
return None, None, None
return ownerDoc, sf.spdxID, sf.rlns
elif rlnData.ownerType == RelationshipDataElementType.TARGETNAME:
# find the document for this target name, and then the specific package's ID
# for target names, must be docBuild
ownerDoc = self.docBuild
# walk through target Packages and check names
for pkg in ownerDoc.pkgs.values():
if pkg.cfg.name == rlnData.ownerTargetName:
if not pkg.cfg.spdxID:
log.dbg(f" - searching for relationship for target {rlnData.ownerTargetName} found package, but empty ID; skipping")
return None, None, None
return ownerDoc, pkg.cfg.spdxID, pkg.rlns
log.dbg(f" - searching for relationship for target {rlnData.ownerTargetName}, target not found in build document; skipping")
return None, None, None
elif rlnData.ownerType == RelationshipDataElementType.DOCUMENT:
# will always be SPDXRef-DOCUMENT
return rlnData.ownerDocument, "SPDXRef-DOCUMENT", rlnData.ownerDocument.relationships
else:
log.dbg(f" - unknown relationship type {rlnData.ownerType}; skipping")
return None, None, None
# get other (right side) SPDX ID of Relationship for given RelationshipData
def getRelationshipRight(self, rlnData, docA):
if rlnData.otherType == RelationshipDataElementType.FILENAME:
# find the document for this file abspath, and then the specific file's ID
otherDoc = self.allFileLinks.get(rlnData.otherFileAbspath, None)
if not otherDoc:
log.dbg(f" - searching for relationship, can't find document with file {rlnData.otherFileAbspath}; skipping")
return None
bf = otherDoc.fileLinks.get(rlnData.otherFileAbspath, None)
if not bf:
log.dbg(f" - searching for relationship for file {rlnData.otherFileAbspath} points to document {otherDoc.cfg.name} but file not found; skipping")
return None
# found it
if not bf.spdxID:
log.dbg(f" - searching for relationship for file {rlnData.otherFileAbspath} found file, but empty ID; skipping")
return None
# figure out whether to append DocumentRef
spdxIDB = bf.spdxID
if otherDoc != docA:
spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
docA.externalDocuments.add(otherDoc)
return spdxIDB
elif rlnData.otherType == RelationshipDataElementType.TARGETNAME:
# find the document for this target name, and then the specific package's ID
# for target names, must be docBuild
otherDoc = self.docBuild
# walk through target Packages and check names
for pkg in otherDoc.pkgs.values():
if pkg.cfg.name == rlnData.otherTargetName:
if not pkg.cfg.spdxID:
log.dbg(f" - searching for relationship for target {rlnData.otherTargetName} found package, but empty ID; skipping")
return None
spdxIDB = pkg.cfg.spdxID
if otherDoc != docA:
spdxIDB = otherDoc.cfg.docRefID + ":" + spdxIDB
docA.externalDocuments.add(otherDoc)
return spdxIDB
log.dbg(f" - searching for relationship for target {rlnData.otherTargetName}, target not found in build document; skipping")
return None
elif rlnData.otherType == RelationshipDataElementType.PACKAGEID:
# will just be the package ID that was passed in
return rlnData.otherPackageID
else:
log.dbg(f" - unknown relationship type {rlnData.otherType}; skipping")
return None

View file

@ -0,0 +1,153 @@
# Copyright (c) 2020, 2021 The Linux Foundation
#
# SPDX-License-Identifier: Apache-2.0
from datetime import datetime
from west import log
from zspdx.util import getHashes
# Output tag-value SPDX 2.2 content for the given Relationship object.
# Arguments:
# 1) f: file handle for SPDX document
# 2) rln: Relationship object being described
def writeRelationshipSPDX(f, rln):
f.write(f"Relationship: {rln.refA} {rln.rlnType} {rln.refB}\n")
# Output tag-value SPDX 2.2 content for the given File object.
# Arguments:
# 1) f: file handle for SPDX document
# 2) bf: File object being described
def writeFileSPDX(f, bf):
f.write(f"""FileName: ./{bf.relpath}
SPDXID: {bf.spdxID}
FileChecksum: SHA1: {bf.sha1}
""")
if bf.sha256 != "":
f.write(f"FileChecksum: SHA256: {bf.sha256}\n")
if bf.md5 != "":
f.write(f"FileChecksum: MD5: {bf.md5}\n")
f.write(f"LicenseConcluded: {bf.concludedLicense}\n")
if len(bf.licenseInfoInFile) == 0:
f.write(f"LicenseInfoInFile: NONE\n")
else:
for licInfoInFile in bf.licenseInfoInFile:
f.write(f"LicenseInfoInFile: {licInfoInFile}\n")
f.write(f"FileCopyrightText: {bf.copyrightText}\n\n")
# write file relationships
if len(bf.rlns) > 0:
for rln in bf.rlns:
writeRelationshipSPDX(f, rln)
f.write("\n")
# Output tag-value SPDX 2.2 content for the given Package object.
# Arguments:
# 1) f: file handle for SPDX document
# 2) pkg: Package object being described
def writePackageSPDX(f, pkg):
f.write(f"""##### Package: {pkg.cfg.name}
PackageName: {pkg.cfg.name}
SPDXID: {pkg.cfg.spdxID}
PackageDownloadLocation: NOASSERTION
PackageLicenseConcluded: {pkg.concludedLicense}
""")
for licFromFiles in pkg.licenseInfoFromFiles:
f.write(f"PackageLicenseInfoFromFiles: {licFromFiles}\n")
f.write(f"""PackageLicenseDeclared: {pkg.cfg.declaredLicense}
PackageCopyrightText: {pkg.cfg.copyrightText}
""")
# flag whether files analyzed / any files present
if len(pkg.files) > 0:
f.write(f"FilesAnalyzed: true\nPackageVerificationCode: {pkg.verificationCode}\n\n")
else:
f.write(f"FilesAnalyzed: false\nPackageComment: Utility target; no files\n\n")
# write package relationships
if len(pkg.rlns) > 0:
for rln in pkg.rlns:
writeRelationshipSPDX(f, rln)
f.write("\n")
# write package files, if any
if len(pkg.files) > 0:
bfs = list(pkg.files.values())
bfs.sort(key = lambda x: x.relpath)
for bf in bfs:
writeFileSPDX(f, bf)
# Output tag-value SPDX 2.2 content for a custom license.
# Arguments:
# 1) f: file handle for SPDX document
# 2) lic: custom license ID being described
def writeOtherLicenseSPDX(f, lic):
f.write(f"""LicenseID: {lic}
ExtractedText: {lic}
LicenseName: {lic}
LicenseComment: Corresponds to the license ID `{lic}` detected in an SPDX-License-Identifier: tag.
""")
# Output tag-value SPDX 2.2 content for the given Document object.
# Arguments:
# 1) f: file handle for SPDX document
# 2) doc: Document object being described
def writeDocumentSPDX(f, doc):
f.write(f"""SPDXVersion: SPDX-2.2
DataLicense: CC0-1.0
SPDXID: SPDXRef-DOCUMENT
DocumentName: {doc.cfg.name}
DocumentNamespace: {doc.cfg.namespace}
Creator: Tool: Zephyr SPDX builder
Created: {datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")}
""")
# write any external document references
if len(doc.externalDocuments) > 0:
extDocs = list(doc.externalDocuments)
extDocs.sort(key = lambda x: x.cfg.docRefID)
for extDoc in extDocs:
f.write(f"ExternalDocumentRef: {extDoc.cfg.docRefID} {extDoc.cfg.namespace} SHA1: {extDoc.myDocSHA1}\n")
f.write(f"\n")
# write relationships owned by this Document (not by its Packages, etc.), if any
if len(doc.relationships) > 0:
for rln in doc.relationships:
writeRelationshipSPDX(f, rln)
f.write(f"\n")
# write packages
for pkg in doc.pkgs.values():
writePackageSPDX(f, pkg)
# write other license info, if any
if len(doc.customLicenseIDs) > 0:
for lic in list(doc.customLicenseIDs).sort():
writeOtherLicenseSPDX(f, lic)
# Open SPDX document file for writing, write the document, and calculate
# its hash for other referring documents to use.
# Arguments:
# 1) spdxPath: path to write SPDX document
# 2) doc: SPDX Document object to write
def writeSPDX(spdxPath, doc):
# create and write document to disk
try:
log.inf(f"Writing SPDX document {doc.cfg.name} to {spdxPath}")
with open(spdxPath, "w") as f:
writeDocumentSPDX(f, doc)
except OSError as e:
log.err(f"Error: Unable to write to {spdxPath}: {str(e)}")
return False
# calculate hash of the document we just wrote
hashes = getHashes(spdxPath)
if not hashes:
log.err(f"Error: created document but unable to calculate hash values")
return False
doc.myDocSHA1 = hashes[0]
return True