dtlib: fix issue which allowed invalid node names

Node names are subject to the rules in table 2.1 of the devicetree specification v0.3, while properties are subject to rules in table 2.2. These rules mean that some property names are invalid node names. However, the same regular expression is being used to validate the names of nodes and properties in dtlib. This leads to invalid node names being allowed to pass. Fix this issue by moving the node name handling code to the Node constructor and checking against the characters in table 2.1. The test cases claim that the existing behavior matches dtc. I can't reproduce that. I get errors when I use invalid characters (like "?") in a node name. For example: foo.dts:3.8-11: ERROR (node_name_chars): /node?: Bad character '?' in node name Try to make the dtlib error message reminiscent of that. Signed-off-by: Martí Bolívar <marti.bolivar@nordicsemi.no>
2021-08-30 20:55:55 -07:00 · 2021-08-30 20:55:55 -07:00 · ffa1515978
commit ffa1515978
parent fff818bbe6
2 changed files with 27 additions and 10 deletions
--- a/scripts/dts/python-devicetree/src/devicetree/dtlib.py
+++ b/scripts/dts/python-devicetree/src/devicetree/dtlib.py
@ -17,6 +17,7 @@ import enum
 import errno
 import os
 import re
+import string
 import sys
 import textwrap
 from typing import Any, Dict, Iterable, List, \
@ -92,6 +93,14 @@ class Node:
        self.parent = parent
        self.dt = dt

+        if name.count("@") > 1:
+            dt._parse_error("multiple '@' in node name")
+        if not name == "/":
+            for char in name:
+                if char not in _nodename_chars:
+                    dt._parse_error(f"{self.path}: bad character '{char}' "
+                                    "in node name")
+
        self.props: Dict[str, 'Property'] = collections.OrderedDict()
        self.nodes: Dict[str, 'Node'] = collections.OrderedDict()
        self.labels: List[str] = []
@ -950,9 +959,6 @@ class DT:
                if self._peek_token().val == "{":
                    # '<tok> { ...', expect node

-                    if tok.val.count("@") > 1:
-                        self._parse_error("multiple '@' in node name")
-
                    # Fetch the existing node if it already exists. This
                    # happens when overriding nodes.
                    child = node.nodes.get(tok.val) or \
@ -1925,6 +1931,9 @@ _num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
 # names that would clash with other stuff
 _propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")

+# Node names are more restrictive than property names.
+_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
+
 # Misc. tokens that are tried after a property/node name. This is important, as
 # there's overlap with the allowed characters in names.
 _misc_re = re.compile(
--- a/scripts/dts/python-devicetree/tests/test_dtlib.py
+++ b/scripts/dts/python-devicetree/tests/test_dtlib.py
@ -100,6 +100,16 @@ def temporary_chdir(dirname):
    finally:
        os.chdir(here)

+def test_invalid_nodenames():
+    # Regression test that verifies node names are not matched against
+    # the more permissive set of rules used for property names.
+
+    verify_error_endswith("""
+/dts-v1/;
+/ { node? {}; };
+""",
+                          "/node?: bad character '?' in node name")
+
 def test_cell_parsing():
    '''Miscellaneous properties containing zero or more cells'''

@ -2107,15 +2117,12 @@ def test_reprs():
 def test_names():
    '''Tests for node/property names.'''

-    # The C tools disallow '@' in property names, but otherwise accept the same
-    # characters in node and property names. Emulate that instead of the DT spec
-    # (v0.2), which gives different characters for nodes and properties.
    verify_parse(r"""
 /dts-v1/;

 / {
 	// A leading \ is accepted but ignored in node/propert names
-	\aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
+	\aA0,._+*#?- = &_, &{/aA0,._+@-};

 	// Names that overlap with operators and integer literals

@ -2126,7 +2133,8 @@ def test_names():
 	0 = [ 04 ];
 	0x123 = [ 05 ];

-	_: \aA0,._+*#?@- {
+	// Node names are more restrictive than property names.
+	_: \aA0,._+@- {
 	};

 	0 {
@ -2137,14 +2145,14 @@ def test_names():
 /dts-v1/;

 / {
-	aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
+	aA0,._+*#?- = &_, &{/aA0,._+@-};
 	+ = [ 00 ];
 	* = [ 02 ];
 	- = [ 01 ];
 	? = [ 03 ];
 	0 = [ 04 ];
 	0x123 = [ 05 ];
-	_: aA0,._+*#?@- {
+	_: aA0,._+@- {
 	};
 	0 {
 	};