dts: dtlib: Fix parsing of character literals

This was allowed due to a misunderstanding:

    foo = 'x';

In reality, 'x' works like an integer literal, and is used like this:

    foo = < 'x' >;

Fix character literal parsing to match the C tools.

Also fix backslash escape parsing to match the C tools exactly
(get_escape_char() in util.c): \<char> should be turned into <char> if
<char> isn't recognized as a special escape character, instead of being
left alone. This fixes parsing of e.g. '\'' (a character literal with a
single quote in it).

Piggyback some more tests for weird property/node names.

Signed-off-by: Ulf Magnusson <Ulf.Magnusson@nordicsemi.no>
This commit is contained in:
Ulf Magnusson 2019-08-13 08:23:19 +02:00 committed by Kumar Gala
commit c2d702b961
2 changed files with 92 additions and 27 deletions

View file

@ -443,12 +443,6 @@ class DT:
elif tok.val == "[":
self._parse_bytes(prop)
elif tok.id is _T_CHAR_LITERAL:
val = self._unescape(tok.val.encode("utf-8"))
if len(val) != 1:
self._parse_error("character literals must be length 1")
prop.value += val
elif tok.id is _T_STRING:
prop._add_marker(_TYPE_STRING)
prop.value += self._unescape(tok.val.encode("utf-8")) + b"\0"
@ -767,7 +761,13 @@ class DT:
match = _token_re.match(self._file_contents, self._tok_end_i)
if match:
tok_id = match.lastindex
tok_val = match.group(tok_id)
if tok_id is _T_CHAR_LITERAL:
val = self._unescape(match.group(tok_id).encode("utf-8"))
if len(val) != 1:
self._parse_error("character literals must be length 1")
tok_val = ord(val)
else:
tok_val = match.group(tok_id)
elif self._lexer_state is _DEFAULT:
match = _num_re.match(self._file_contents, self._tok_end_i)
@ -1146,24 +1146,27 @@ class DT:
def sub(match):
esc = match.group(1)
if esc == b"\\": return b"\\"
if esc == b'"': return b'"'
if esc == b"a": return b"\a"
if esc == b"b": return b"\b"
if esc == b"t": return b"\t"
if esc == b"n": return b"\n"
if esc == b"v": return b"\v"
if esc == b"f": return b"\f"
if esc == b"r": return b"\r"
if esc == b"a": return b"\a"
if esc == b"b": return b"\b"
if esc == b"t": return b"\t"
if esc == b"n": return b"\n"
if esc == b"v": return b"\v"
if esc == b"f": return b"\f"
if esc == b"r": return b"\r"
if esc.startswith(b"x"):
if esc[0] in b"01234567":
# Octal escape
try:
return int(esc, 8).to_bytes(1, "big")
except OverflowError:
self._parse_error("octal escape out of range (> 255)")
if esc[0] == ord("x") and len(esc) > 1:
# Hex escape
return int(esc[1:], 16).to_bytes(1, "big")
# Octal escape
try:
return int(esc, 8).to_bytes(1, "big")
except OverflowError:
self._parse_error("octal escape out of range (> 255)")
# Return <char> as-is for other \<char>
return esc[0].to_bytes(1, "big")
return _unescape_re.sub(sub, b)
@ -1846,10 +1849,9 @@ _misc_re = re.compile(
_byte_re = re.compile(r"[0-9a-fA-F]{2}")
# Matches a backslash escape within a 'bytes' array. Captures the 'x' part of
# '\x'.
_unescape_re = \
re.compile(br'\\(\\|"|a|b|t|n|v|f|r|[0-7]{1,3}|x[0-9A-Fa-f]{1,2})')
# Matches a backslash escape within a 'bytes' array. Captures the 'c' part of
# '\c', where c might be a single character or an octal/hex escape.
_unescape_re = re.compile(br'\\([0-7]{1,3}|x[0-9A-Fa-f]{1,2}|.)')
# #line directive (this is the regex the C tools use)
_line_re = re.compile(

View file

@ -88,6 +88,7 @@ def run():
i = /bits/ 16 < 0x10 0x20 (-1) >;
j = /bits/ 32 < 0x10 0x20 (-1) >;
k = /bits/ 64 < 0x10 0x20 (-1) >;
l = < 'a' 'b' 'c' >;
};
""",
"""
@ -105,6 +106,7 @@ def run():
i = /bits/ 16 < 0x10 0x20 0xffff >;
j = < 0x10 0x20 0xffffffff >;
k = /bits/ 64 < 0x10 0x20 0xffffffffffffffff >;
l = < 0x61 0x62 0x63 >;
};
""")
@ -199,6 +201,55 @@ r"""
""",
".tmp.dts:4 (column 6): parse error: octal escape out of range (> 255)")
#
# Test character literal parsing
#
verify_parse(r"""
/dts-v1/;
/ {
a = < '\'' >;
b = < '\x12' >;
};
""",
"""
/dts-v1/;
/ {
a = < 0x27 >;
b = < 0x12 >;
};
""")
verify_error("""
/dts-v1/;
/ {
// Character literals are not allowed at the top level
a = 'x';
};
""",
".tmp.dts:5 (column 6): parse error: malformed value")
verify_error("""
/dts-v1/;
/ {
a = < '' >;
};
""",
".tmp.dts:4 (column 7): parse error: character literals must be length 1")
verify_error("""
/dts-v1/;
/ {
a = < '12' >;
};
""",
".tmp.dts:4 (column 7): parse error: character literals must be length 1")
#
# Test /incbin/
#
@ -1115,6 +1166,7 @@ y /include/ "via-include-path-1"
not2 = < (!1) >;
not3 = < (!2) >;
nest = < (((--3) + (-2)) * (--(-2))) >;
char_lits = < ('a' + 'b') >;
};
""",
"""
@ -1166,6 +1218,7 @@ y /include/ "via-include-path-1"
not2 = < 0x0 >;
not3 = < 0x0 >;
nest = < 0xfffffffe >;
char_lits = < 0xc3 >;
};
""")
@ -1980,14 +2033,20 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
// A leading \ is accepted but ignored in node/propert names
\aA0,._+*#?- = &_, &{/aA0,._+*#?@-};
// Names that overlap with operators
// Names that overlap with operators and integer literals
+ = [ 00 ];
* = [ 02 ];
- = [ 01 ];
? = [ 03 ];
0 = [ 04 ];
0x123 = [ 05 ];
_: \aA0,._+*#?@- {
};
0 {
};
};
""",
"""
@ -1999,8 +2058,12 @@ l1: l2: /memreserve/ 0x0000000000000002 0x0000000000000004;
* = [ 02 ];
- = [ 01 ];
? = [ 03 ];
0 = [ 04 ];
0x123 = [ 05 ];
_: aA0,._+*#?@- {
};
0 {
};
};
""")