dts: dtlib: Fix parsing of character literals

This was allowed due to a misunderstanding: foo = 'x'; In reality, 'x' works like an integer literal, and is used like this: foo = < 'x' >; Fix character literal parsing to match the C tools. Also fix backslash escape parsing to match the C tools exactly (get_escape_char() in util.c): \<char> should be turned into <char> if <char> isn't recognized as a special escape character, instead of being left alone. This fixes parsing of e.g. '\'' (a character literal with a single quote in it). Piggyback some more tests for weird property/node names. Signed-off-by: Ulf Magnusson <Ulf.Magnusson@nordicsemi.no>
2019-08-13 08:23:19 +02:00 · 2019-08-13 08:23:19 +02:00 · c2d702b961
commit c2d702b961
parent 7168118290
2 changed files with 92 additions and 27 deletions
--- a/scripts/dts/dtlib.py
+++ b/scripts/dts/dtlib.py
@ -443,12 +443,6 @@ class DT:
            elif tok.val == "[":
                self._parse_bytes(prop)
            elif tok.id is _T_CHAR_LITERAL:
                val = self._unescape(tok.val.encode("utf-8"))
                if len(val) != 1:
                    self._parse_error("character literals must be length 1")
                prop.value += val
            elif tok.id is _T_STRING:
                prop._add_marker(_TYPE_STRING)
                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
@ -767,6 +761,12 @@ class DT:
            match = _token_re.match(self._file_contents, self._tok_end_i)
            if match:
                tok_id = match.lastindex
                if tok_id is _T_CHAR_LITERAL:
                    val = self._unescape(match.group(tok_id).encode("utf-8"))
                    if len(val) != 1:
                        self._parse_error("character literals must be length 1")
                    tok_val = ord(val)
                else:
                    tok_val = match.group(tok_id)
            elif self._lexer_state is _DEFAULT:
@ -1146,8 +1146,6 @@ class DT:
        def sub(match):
            esc = match.group(1)
            if esc == b"\\": return b"\\"
            if esc == b'"':  return b'"'
            if esc == b"a": return b"\a"
            if esc == b"b": return b"\b"
            if esc == b"t": return b"\t"
@ -1156,15 +1154,20 @@ class DT:
            if esc == b"f": return b"\f"
            if esc == b"r": return b"\r"
-            if esc.startswith(b"x"):
+            if esc[0] in b"01234567":
                return int(esc[1:], 16).to_bytes(1, "big")
                # Octal escape
                try:
                    return int(esc, 8).to_bytes(1, "big")
                except OverflowError:
                    self._parse_error("octal escape out of range (> 255)")
            if esc[0] == ord("x") and len(esc) > 1:
                # Hex escape
                return int(esc[1:], 16).to_bytes(1, "big")
            # Return <char> as-is for other \<char>
            return esc[0].to_bytes(1, "big")
        return _unescape_re.sub(sub, b)
    def _open(self, filename, mode="r", **kwargs):
@ -1846,10 +1849,9 @@ _misc_re = re.compile(
 _byte_re = re.compile(r"[0-9a-fA-F]{2}")
-# Matches a backslash escape within a 'bytes' array. Captures the 'x' part of
+# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
-# '\x'.
+# '\c', where c might be a single character or an octal/hex escape.
-_unescape_re = \
+_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
    re.compile(br'\\(\\|"|a|b|t|n|v|f|r|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})')
 # #line directive (this is the regex the C tools use)
 _line_re = re.compile(
--- a/scripts/dts/testdtlib.py
+++ b/scripts/dts/testdtlib.py
@ -88,6 +88,7 @@ def run():
 	i = /bits/ 16 < 0x10 0x20 (-1) >;
 	j = /bits/ 32 < 0x10 0x20 (-1) >;
 	k = /bits/ 64 < 0x10 0x20 (-1) >;
 	l = < 'a' 'b' 'c' >;
 };
 """,
 """
@ -105,6 +106,7 @@ def run():
 	i = /bits/ 16 < 0x10 0x20 0xffff >;
 	j = < 0x10 0x20 0xffffffff >;
 	k = /bits/ 64 < 0x10 0x20 0xffffffffffffffff >;
 	l = < 0x61 0x62 0x63 >;
 };
 """)
@ -199,6 +201,55 @@ r"""
 """,
 ".tmp.dts:4 (column 6): parse error: octal escape out of range (> 255)")
    #
    # Test character literal parsing
    #
    verify_parse(r"""
 /dts-v1/;
 / {
 	a = < '\'' >;
 	b = < '\x12' >;
 };
 """,
 """
 /dts-v1/;
 / {
 	a = < 0x27 >;
 	b = < 0x12 >;
 };
 """)
    verify_error("""
 /dts-v1/;
 / {
 	// Character literals are not allowed at the top level
 	a = 'x';
 };
 """,
 ".tmp.dts:5 (column 6): parse error: malformed value")
    verify_error("""
 /dts-v1/;
 / {
 	a = < '' >;
 };
 """,
 ".tmp.dts:4 (column 7): parse error: character literals must be length 1")
    verify_error("""
 /dts-v1/;
 / {
 	a = < '12' >;
 };
 """,
 ".tmp.dts:4 (column 7): parse error: character literals must be length 1")
    #
    # Test /incbin/
    #
@ -1115,6 +1166,7 @@ y /include/ "via-include-path-1"
 	not2        = < (!1) >;
 	not3        = < (!2) >;
 	nest        = < (((--3) + (-2)) * (--(-2))) >;
 	char_lits   = < ('a' + 'b') >;
 };
 """,
 """
@ -1166,6 +1218,7 @@ y /include/ "via-include-path-1"
 	not2 = < 0x0 >;
 	not3 = < 0x0 >;
 	nest = < 0xfffffffe >;
 	char_lits = < 0xc3 >;
 };
 """)
@ -1980,14 +2033,20 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
 	// A leading \ is accepted but ignored in node/propert names
 	\aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
-	// Names that overlap with operators
+	// Names that overlap with operators and integer literals
 	+ = [ 00 ];
 	* = [ 02 ];
 	- = [ 01 ];
 	? = [ 03 ];
 	0 = [ 04 ];
 	0x123 = [ 05 ];
 	_: \aA0,._+*#?@- {
 	};
 	0 {
 	};
 };
 """,
 """
@ -1999,8 +2058,12 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
 	* = [ 02 ];
 	- = [ 01 ];
 	? = [ 03 ];
 	0 = [ 04 ];
 	0x123 = [ 05 ];
 	_: aA0,._+*#?@- {
 	};
 	0 {
 	};
 };
 """)