sanitycheck: validate YAML files w/ schemas files

This introduces an schema-based YAML validation process when loading any YAML file, before doing any operations on them. An exception will be raised at SanityConfigParser() if the file fails to verify with the given schema. Schemas are defined for the platform files in board///*.yaml and for the (sample|testcase).yaml files. The verification is done using the pykwalify python library. If not installed, a warning is printed and the verification schema is skipped. At some point, we might want to force it being installed. The verification library is made a separate module (scl.py) so it can be easily imported by others. Signed-off-by: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
2017-07-24 10:24:35 -07:00 · 2017-07-24 10:24:35 -07:00 · 662dde6675
commit 662dde6675
parent d3409c5e67
5 changed files with 268 additions and 24 deletions
--- a/doc/getting_started/installation_linux.rst
+++ b/doc/getting_started/installation_linux.rst
@ -55,7 +55,7 @@ Install the required packages in a Ubuntu host system with:
 .. code-block:: console

   $ sudo apt-get install git make gcc g++ ncurses-dev \
-	 doxygen dfu-util device-tree-compiler python3-pip
+	 doxygen dfu-util device-tree-compiler python3-ply python3-pip

 Install the required packages in a Fedora host system with:

@ -64,8 +64,8 @@ Install the required packages in a Fedora host system with:
   $ sudo dnf group install "Development Tools"
   $ sudo dnf install git make gcc glibc-static \
 	 libstdc++-static ncurses-devel \
-	 doxygen dfu-util dtc python3-pip
-
+	 doxygen dfu-util dtc python3-pip \
+	 python3-ply python3-yaml dfu-util dtc python3-pykwalify

 Install additional packages required for development with Zephyr::

--- a/scripts/sanitycheck
+++ b/scripts/sanitycheck
@ -176,11 +176,17 @@ from collections import OrderedDict
 from itertools import islice
 import yaml

+import logging
+log_format = "%(levelname)s %(name)s::%(module)s.%(funcName)s():%(lineno)d: %(message)s"
+logging.basicConfig(format = log_format, level = 30)
+
 if "ZEPHYR_BASE" not in os.environ:
    sys.stderr.write("$ZEPHYR_BASE environment variable undefined.\n")
    exit(1)
 ZEPHYR_BASE = os.environ["ZEPHYR_BASE"]

+import scl
+
 sys.path.insert(0, os.path.join(ZEPHYR_BASE, "scripts/"))

 import expr_parser
@ -936,13 +942,12 @@ testcase_valid_keys = {"tags" : {"type" : "set", "required" : True},
 class SanityConfigParser:
    """Class to read test case files with semantic checking
    """
-    def __init__(self, filename):
+    def __init__(self, filename, schema):
        """Instantiate a new SanityConfigParser object

        @param filename Source .yaml file to read
        """
-        with open(filename, 'r') as stream:
-            cp = yaml.load(stream)
+        cp = scl.yaml_load_verify(filename, schema)
        self.filename = filename
        self.cp = cp

@ -1055,6 +1060,11 @@ class Platform:
    """Class representing metadata for a particular platform

    Maps directly to BOARD when building"""
+
+    yaml_platform_schema = scl.yaml_load(
+        os.path.join(os.environ['ZEPHYR_BASE'],
+                     "scripts", "sanitycheck-platform-schema.yaml"))
+
    def __init__(self, cfile):
        """Constructor.

@ -1064,7 +1074,7 @@ class Platform:
            in the architecture configuration file which has lots of metadata.
            See the Architecture class.
        """
-        scp = SanityConfigParser(cfile)
+        scp = SanityConfigParser(cfile, self.yaml_platform_schema)
        cp = scp.cp

        self.name = cp['identifier']
@ -1218,6 +1228,10 @@ def defconfig_cb(context, goals, goal):
 class TestSuite:
    config_re = re.compile('(CONFIG_[A-Za-z0-9_]+)[=]\"?([^\"]*)\"?$')

+    yaml_tc_schema = scl.yaml_load(
+        os.path.join(os.environ['ZEPHYR_BASE'],
+                     "scripts", "sanitycheck-tc-schema.yaml"))
+
    def __init__(self, arch_root, testcase_roots, outdir, coverage):
        # Keep track of which test cases we've filtered out and why
        discards = {}
@ -1240,22 +1254,28 @@ class TestSuite:
            for dirpath, dirnames, filenames in os.walk(testcase_root,
                                                        topdown=True):
                verbose("scanning %s" % dirpath)
-                if "sample.yaml" in filenames or "testcase.yaml" in filenames:
-                    verbose("Found possible test case in " + dirpath)
-                    dirnames[:] = []
-                    if "sample.yaml" in filenames:
-                        yaml_path = os.path.join(dirpath, "sample.yaml")
-                    else:
-                        yaml_path = os.path.join(dirpath, "testcase.yaml")
-                    cp = SanityConfigParser(yaml_path)
-                    workdir = os.path.relpath(dirpath, testcase_root)
+                if 'sample.yaml' in filenames:
+                    filename = 'sample.yaml'
+                elif 'testcase.yaml' in filenames:
+                    filename = 'testcase.yaml'
+                else:
+                    continue
+                verbose("Found possible test case in " + dirpath)
+                dirnames[:] = []
+                yaml_path = os.path.join(dirpath, filename)
+                try:
+                    cp = SanityConfigParser(yaml_path, self.yaml_tc_schema)
+                except RuntimeError as e:
+                    error("E: %s: can't load: %s" % (yaml_path, e))

-                    for section in cp.sections():
-                        name = list(section.keys())[0]
-                        tc_dict = cp.get_section(name, testcase_valid_keys)
-                        tc = TestCase(testcase_root, workdir, name, tc_dict,
-                                      yaml_path)
-                        self.testcases[tc.name] = tc
+                workdir = os.path.relpath(dirpath, testcase_root)
+
+                for section in cp.sections():
+                    name = list(section.keys())[0]
+                    tc_dict = cp.get_section(name, testcase_valid_keys)
+                    tc = TestCase(testcase_root, workdir, name, tc_dict,
+                                  yaml_path)
+                    self.testcases[tc.name] = tc

        debug("Reading platform configuration files under %s..." % arch_root)
        for dirpath, dirnames, filenames in os.walk(arch_root):
@ -1263,8 +1283,11 @@ class TestSuite:
                if filename.endswith(".yaml"):
                    fn = os.path.join(dirpath, filename)
                    verbose("Found plaform configuration " + fn)
-                    platform = Platform(fn)
-                    self.platforms.append(platform)
+                    try:
+                        platform = Platform(fn)
+                        self.platforms.append(platform)
+                    except RuntimeError as e:
+                        error("E: %s: can't load: %s" % (fn, e))

        arches = []
        for p in self.platforms:
--- a/scripts/sanitycheck-platform-schema.yaml
+++ b/scripts/sanitycheck-platform-schema.yaml
@ -0,0 +1,47 @@
+#
+# Schema to validate a YAML file describing a Zephyr test platform
+#
+# We load this with pykwalify
+# (http://pykwalify.readthedocs.io/en/unstable/validation-rules.html),
+# a YAML structure validator, to validate the YAML files that describe
+# Zephyr test platforms
+#
+# The original spec comes from Zephyr's sanitycheck script
+#
+
+type: map
+mapping:
+  "identifier":
+    type: str
+  "name":
+    type: str
+  "type":
+    type: str
+    enum: [ 'mcu', 'qemu' ]
+  "arch":
+    type: str
+  "toolchain":
+    type: seq
+    seq:
+      -
+        type: str
+        enum: [ 'gccarmemb', 'issm', 'xcc', 'zephyr' ]
+  "ram":
+    type: int
+  "flash":
+    type: int
+  "supported":
+    type: seq
+    seq:
+      -
+        type: str
+  "testing":
+    type: map
+    mapping:
+      "default":
+        type: bool
+      "ignore_tags":
+        type: seq
+        seq:
+          -
+            type: str
--- a/scripts/sanitycheck-tc-schema.yaml
+++ b/scripts/sanitycheck-tc-schema.yaml
@ -0,0 +1,102 @@
+#
+# Schema to validate a YAML file describing a Zephyr test platform
+#
+# We load this with pykwalify
+# (http://pykwalify.readthedocs.io/en/unstable/validation-rules.html),
+# a YAML structure validator, to validate the YAML files that describe
+# Zephyr test platforms
+#
+# The original spec comes from Zephyr's sanitycheck script
+#
+type: map
+mapping:
+  # The sample descriptor, if present
+  "sample":
+    type: map
+    required: no
+    mapping:
+      "name":
+        type: str
+        required: yes
+      "description":
+        type: str
+        required: no
+      "platforms":
+        type: str
+        required: no
+  # The list of testcases -- IDK why this is a sequence of
+  # maps maps, shall just be a sequence of maps
+  # maybe it is just an artifact?
+  "tests":
+    type: seq
+    required: yes
+    sequence:
+      - type: map
+        matching-rule: "any"
+        mapping:
+          # The key for the testname is any, so
+          # regex;(([a-zA-Z0-9_]+)) for this to work, note below we
+          # make it required: no
+          regex;(([a-zA-Z0-9_]+)):
+            type: map
+            # has to be not-required, otherwise the parser gets
+            # confused and things it never found it
+            required: no
+            mapping:
+             "arch_exclude":
+               type: str
+               required: no
+             "arch_whitelist":
+               type: str
+               required: no
+             "build_only":
+               type: bool
+               required: no
+             "build_on_all":
+               type: bool
+               required: no
+             "depends_on":
+               type: str
+               required: no
+             "extra_args":
+               type: str
+               required: no
+             "extra_sections":
+               type: str
+               required: no
+             "filter":
+               type: str
+               required: no
+             "min_ram":
+               type: int
+               required: no
+             "min_flash":
+               type: int
+               required: no
+             "platform_exclude":
+               type: str
+               required: no
+             "platform_whitelist":
+               type: str
+               required: no
+             "tags":
+               type: str
+               required: yes
+             "timeout":
+               type: int
+               required: no
+             "toolchain_exclude":
+               type: str
+               required: no
+             "toolchain_whitelist":
+               type: str
+               required: no
+             "type":
+               type: str
+               enum: [ 'unit' ]
+             "skip":
+               type: bool
+               required: no
+             "slow":
+               type: bool
+               required: no
--- a/scripts/scl.py
+++ b/scripts/scl.py
@ -0,0 +1,72 @@
+#! /usr/bin/python
+#
+# Zephyr's Sanity Check library
+#
+# Set of code that other projects can also import to do things on
+# Zephyr's sanity check testcases.
+
+import logging
+import os
+import yaml
+
+log = logging.getLogger("scl")
+
+#
+#
+def yaml_load(filename):
+    """
+    Safely load a YAML document
+
+    Follows recomendations from
+    https://security.openstack.org/guidelines/dg_avoid-dangerous-input-parsing-libraries.html.
+
+    :param str filename: filename to load
+    :raises yaml.scanner: On YAML scan issues
+    :raises: any other exception on file access erors
+    :return: dictionary representing the YAML document
+    """
+    try:
+        with open(filename, 'r') as f:
+            return yaml.safe_load(f)
+    except yaml.scanner.ScannerError as e:	# For errors parsing schema.yaml
+        mark = e.problem_mark
+        cmark = e.context_mark
+        log.error("%s:%d:%d: error: %s (note %s context @%s:%d:%d %s)",
+                  mark.name, mark.line, mark.column, e.problem,
+                  e.note, cmark.name, cmark.line, cmark.column, e.context)
+        raise
+
+# If pykwalify is installed, then the validate functionw ill work --
+# otherwise, it is a stub and we'd warn about it.
+try:
+    import pykwalify.core
+    # Don't print error messages yourself, let us do it
+    logging.getLogger("pykwalify.core").setLevel(50)
+
+    def _yaml_validate(data, schema):
+        if not schema:
+            return
+        c = pykwalify.core.Core(source_data = data, schema_data = schema)
+        c.validate(raise_exception = True)
+
+except ImportError as e:
+    log.warning("can't import pykwalify; won't validate YAML (%s)", e)
+    def _yaml_validate(data, schema):
+        pass
+
+def yaml_load_verify(filename, schema):
+    """
+    Safely load a testcase/sample yaml document and validate it
+    against the YAML schema, returing in case of success the YAML data.
+
+    :param str filename: name of the file to load and process
+    :param dict schema: loaded YAML schema (can load with :func:`yaml_load`)
+
+    # 'document.yaml' contains a single YAML document.
+    :raises yaml.scanner.ScannerError: on YAML parsing error
+    :raises pykwalify.errors.SchemaError: on Schema violation error
+    """
+    # 'document.yaml' contains a single YAML document.
+    y = yaml_load(filename)
+    _yaml_validate(y, schema)
+    return y