dtlib: fix issue which allowed invalid node names

Node names are subject to the rules in table 2.1 of the devicetree
specification v0.3, while properties are subject to rules in table
2.2. These rules mean that some property names are invalid node names.

However, the same regular expression is being used to validate the
names of nodes and properties in dtlib. This leads to invalid node
names being allowed to pass. Fix this issue by moving the node name
handling code to the Node constructor and checking against the
characters in table 2.1.

The test cases claim that the existing behavior matches dtc. I can't
reproduce that. I get errors when I use invalid characters (like "?")
in a node name. For example:

foo.dts:3.8-11: ERROR (node_name_chars): /node?: Bad character '?' in
node name

Try to make the dtlib error message reminiscent of that.

Signed-off-by: Martí Bolívar <marti.bolivar@nordicsemi.no>
This commit is contained in:
Martí Bolívar 2021-08-30 20:55:55 -07:00 committed by Christopher Friedt
commit ffa1515978
2 changed files with 27 additions and 10 deletions

View file

@ -17,6 +17,7 @@ import enum
import errno
import os
import re
import string
import sys
import textwrap
from typing import Any, Dict, Iterable, List, \
@ -92,6 +93,14 @@ class Node:
self.parent = parent
self.dt = dt
if name.count("@") > 1:
dt._parse_error("multiple '@' in node name")
if not name == "/":
for char in name:
if char not in _nodename_chars:
dt._parse_error(f"{self.path}: bad character '{char}' "
"in node name")
self.props: Dict[str, 'Property'] = collections.OrderedDict()
self.nodes: Dict[str, 'Node'] = collections.OrderedDict()
self.labels: List[str] = []
@ -950,9 +959,6 @@ class DT:
if self._peek_token().val == "{":
# '<tok> { ...', expect node
if tok.val.count("@") > 1:
self._parse_error("multiple '@' in node name")
# Fetch the existing node if it already exists. This
# happens when overriding nodes.
child = node.nodes.get(tok.val) or \
@ -1925,6 +1931,9 @@ _num_re = re.compile(r"(0[xX][0-9a-fA-F]+|[0-9]+)(?:ULL|UL|LL|U|L)?")
# names that would clash with other stuff
_propnodename_re = re.compile(r"\\?([a-zA-Z0-9,._+*#?@-]+)")
# Node names are more restrictive than property names.
_nodename_chars = set(string.ascii_letters + string.digits + ',._+-@')
# Misc. tokens that are tried after a property/node name. This is important, as
# there's overlap with the allowed characters in names.
_misc_re = re.compile(

View file

@ -100,6 +100,16 @@ def temporary_chdir(dirname):
finally:
os.chdir(here)
def test_invalid_nodenames():
# Regression test that verifies node names are not matched against
# the more permissive set of rules used for property names.
verify_error_endswith("""
/dts-v1/;
/ { node? {}; };
""",
"/node?: bad character '?' in node name")
def test_cell_parsing():
'''Miscellaneous properties containing zero or more cells'''
@ -2107,15 +2117,12 @@ def test_reprs():
def test_names():
'''Tests for node/property names.'''
# The C tools disallow '@' in property names, but otherwise accept the same
# characters in node and property names. Emulate that instead of the DT spec
# (v0.2), which gives different characters for nodes and properties.
verify_parse(r"""
/dts-v1/;
/ {
// A leading \ is accepted but ignored in node/propert names
\aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
\aA0,._+*#?- = &_, &{/aA0,._+@-};
// Names that overlap with operators and integer literals
@ -2126,7 +2133,8 @@ def test_names():
0 = [ 04 ];
0x123 = [ 05 ];
_: \aA0,._+*#?@- {
// Node names are more restrictive than property names.
_: \aA0,._+@- {
};
0 {
@ -2137,14 +2145,14 @@ def test_names():
/dts-v1/;
/ {
aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
aA0,._+*#?- = &_, &{/aA0,._+@-};
+ = [ 00 ];
* = [ 02 ];
- = [ 01 ];
? = [ 03 ];
0 = [ 04 ];
0x123 = [ 05 ];
_: aA0,._+*#?@- {
_: aA0,._+@- {
};
0 {
};