dts: dtlib: Fix parsing of character literals

This was allowed due to a misunderstanding: foo = 'x'; In reality, 'x' works like an integer literal, and is used like this: foo = < 'x' >; Fix character literal parsing to match the C tools. Also fix backslash escape parsing to match the C tools exactly (get_escape_char() in util.c): \<char> should be turned into <char> if <char> isn't recognized as a special escape character, instead of being left alone. This fixes parsing of e.g. '\'' (a character literal with a single quote in it). Piggyback some more tests for weird property/node names. Signed-off-by: Ulf Magnusson <Ulf.Magnusson@nordicsemi.no>
2019-08-13 08:23:19 +02:00 · 2019-08-13 08:23:19 +02:00 · c2d702b961
commit c2d702b961
parent 7168118290
2 changed files with 92 additions and 27 deletions
--- a/scripts/dts/dtlib.py
+++ b/scripts/dts/dtlib.py
@ -443,12 +443,6 @@ class DT:
            elif tok.val == "[":
                self._parse_bytes(prop)

-            elif tok.id is _T_CHAR_LITERAL:
-                val = self._unescape(tok.val.encode("utf-8"))
-                if len(val) != 1:
-                    self._parse_error("character literals must be length 1")
-                prop.value += val
-
            elif tok.id is _T_STRING:
                prop._add_marker(_TYPE_STRING)
                prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
@ -767,7 +761,13 @@ class DT:
            match = _token_re.match(self._file_contents, self._tok_end_i)
            if match:
                tok_id = match.lastindex
-                tok_val = match.group(tok_id)
+                if tok_id is _T_CHAR_LITERAL:
+                    val = self._unescape(match.group(tok_id).encode("utf-8"))
+                    if len(val) != 1:
+                        self._parse_error("character literals must be length 1")
+                    tok_val = ord(val)
+                else:
+                    tok_val = match.group(tok_id)

            elif self._lexer_state is _DEFAULT:
                match = _num_re.match(self._file_contents, self._tok_end_i)
@ -1146,24 +1146,27 @@ class DT:

        def sub(match):
            esc = match.group(1)
-            if esc == b"\\": return b"\\"
-            if esc == b'"':  return b'"'
-            if esc == b"a":  return b"\a"
-            if esc == b"b":  return b"\b"
-            if esc == b"t":  return b"\t"
-            if esc == b"n":  return b"\n"
-            if esc == b"v":  return b"\v"
-            if esc == b"f":  return b"\f"
-            if esc == b"r":  return b"\r"
+            if esc == b"a": return b"\a"
+            if esc == b"b": return b"\b"
+            if esc == b"t": return b"\t"
+            if esc == b"n": return b"\n"
+            if esc == b"v": return b"\v"
+            if esc == b"f": return b"\f"
+            if esc == b"r": return b"\r"

-            if esc.startswith(b"x"):
+            if esc[0] in b"01234567":
+                # Octal escape
+                try:
+                    return int(esc, 8).to_bytes(1, "big")
+                except OverflowError:
+                    self._parse_error("octal escape out of range (> 255)")
+
+            if esc[0] == ord("x") and len(esc) > 1:
+                # Hex escape
                return int(esc[1:], 16).to_bytes(1, "big")

-            # Octal escape
-            try:
-                return int(esc, 8).to_bytes(1, "big")
-            except OverflowError:
-                self._parse_error("octal escape out of range (> 255)")
+            # Return <char> as-is for other \<char>
+            return esc[0].to_bytes(1, "big")

        return _unescape_re.sub(sub, b)

@ -1846,10 +1849,9 @@ _misc_re = re.compile(

 _byte_re = re.compile(r"[0-9a-fA-F]{2}")

-# Matches a backslash escape within a 'bytes' array. Captures the 'x' part of
-# '\x'.
-_unescape_re = \
-    re.compile(br'\\(\\|"|a|b|t|n|v|f|r|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})')
+# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
+# '\c', where c might be a single character or an octal/hex escape.
+_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')

 # #line directive (this is the regex the C tools use)
 _line_re = re.compile(
--- a/scripts/dts/testdtlib.py
+++ b/scripts/dts/testdtlib.py
@ -88,6 +88,7 @@ def run():
 	i = /bits/ 16 < 0x10 0x20 (-1) >;
 	j = /bits/ 32 < 0x10 0x20 (-1) >;
 	k = /bits/ 64 < 0x10 0x20 (-1) >;
+	l = < 'a' 'b' 'c' >;
 };
 """,
 """
@ -105,6 +106,7 @@ def run():
 	i = /bits/ 16 < 0x10 0x20 0xffff >;
 	j = < 0x10 0x20 0xffffffff >;
 	k = /bits/ 64 < 0x10 0x20 0xffffffffffffffff >;
+	l = < 0x61 0x62 0x63 >;
 };
 """)

@ -199,6 +201,55 @@ r"""
 """,
 ".tmp.dts:4 (column 6): parse error: octal escape out of range (> 255)")

+    #
+    # Test character literal parsing
+    #
+
+    verify_parse(r"""
+/dts-v1/;
+
+/ {
+	a = < '\'' >;
+	b = < '\x12' >;
+};
+""",
+"""
+/dts-v1/;
+
+/ {
+	a = < 0x27 >;
+	b = < 0x12 >;
+};
+""")
+
+    verify_error("""
+/dts-v1/;
+
+/ {
+	// Character literals are not allowed at the top level
+	a = 'x';
+};
+""",
+".tmp.dts:5 (column 6): parse error: malformed value")
+
+    verify_error("""
+/dts-v1/;
+
+/ {
+	a = < '' >;
+};
+""",
+".tmp.dts:4 (column 7): parse error: character literals must be length 1")
+
+    verify_error("""
+/dts-v1/;
+
+/ {
+	a = < '12' >;
+};
+""",
+".tmp.dts:4 (column 7): parse error: character literals must be length 1")
+
    #
    # Test /incbin/
    #
@ -1115,6 +1166,7 @@ y /include/ "via-include-path-1"
 	not2        = < (!1) >;
 	not3        = < (!2) >;
 	nest        = < (((--3) + (-2)) * (--(-2))) >;
+	char_lits   = < ('a' + 'b') >;
 };
 """,
 """
@ -1166,6 +1218,7 @@ y /include/ "via-include-path-1"
 	not2 = < 0x0 >;
 	not3 = < 0x0 >;
 	nest = < 0xfffffffe >;
+	char_lits = < 0xc3 >;
 };
 """)

@ -1980,14 +2033,20 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
 	// A leading \ is accepted but ignored in node/propert names
 	\aA0,._+*#?- = &_, &{/aA0,._+*#?@-};

-	// Names that overlap with operators
+	// Names that overlap with operators and integer literals
+
 	+ = [ 00 ];
 	* = [ 02 ];
 	- = [ 01 ];
 	? = [ 03 ];
+	0 = [ 04 ];
+	0x123 = [ 05 ];

 	_: \aA0,._+*#?@- {
 	};
+
+	0 {
+	};
 };
 """,
 """
@ -1999,8 +2058,12 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
 	* = [ 02 ];
 	- = [ 01 ];
 	? = [ 03 ];
+	0 = [ 04 ];
+	0x123 = [ 05 ];
 	_: aA0,._+*#?@- {
 	};
+	0 {
+	};
 };
 """)