scripts: logging: dictionary: Fix database generation

I noticed that not all strings were put into database. That is because algorithm was searching for any null terminated byte arrays in the section (e.g. rodata) and then attempting to decode it to string. However, section may contain other static const variable and if a string is preceded by a variable that has non-printable bytes and no zero at the end then algorithm was picking a candidate that started with non-printable characters followed by the string. Such candidate was discarded because it was not a valid string. Algorithm is changed to treat as string candidate any sequences that contains printable bytes followed by 0. Signed-off-by: Krzysztof Chruściński <krzysztof.chruscinski@nordicsemi.no>
2023-06-14 09:59:52 +02:00 · 2023-06-14 09:59:52 +02:00 · 9c2d1c8e99
commit 9c2d1c8e99
parent 12e8de71b1
1 changed files with 44 additions and 71 deletions
--- a/scripts/logging/dictionary/database_gen.py
+++ b/scripts/logging/dictionary/database_gen.py
@ -378,97 +378,70 @@ def extract_string_variables(elf):

    return strings

-
 def try_decode_string(str_maybe):
    """Check if it is a printable string"""
    for encoding in STR_ENCODINGS:
        try:
-            decoded_str = str_maybe.decode(encoding)
-
-            # Check if string is printable according to Python
-            # since the parser (written in Python) will need to
-            # print the string.
-            #
-            # Note that '\r' and '\n' are not included in
-            # string.printable so they need to be checked separately.
-            printable = True
-            for one_char in decoded_str:
-                if (one_char not in string.printable
-                    and one_char not in ACCEPTABLE_ESCAPE_CHARS):
-                    printable = False
-                    break
-
-            if printable:
-                return decoded_str
+            return str_maybe.decode(encoding)
        except UnicodeDecodeError:
            pass

    return None

+def is_printable(b):
+    # Check if string is printable according to Python
+    # since the parser (written in Python) will need to
+    # print the string.
+    #
+    # Note that '\r' and '\n' are not included in
+    # string.printable so they need to be checked separately.
+    return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)

 def extract_strings_in_one_section(section, str_mappings):
    """Extract NULL-terminated strings in one ELF section"""
-    bindata = section['data']
-
-    if len(bindata) < 2:
-        # Can't have a NULL-terminated string with fewer than 2 bytes.
-        return str_mappings
-
+    data = section['data']
    idx = 0
-
-    # If first byte is not NULL, it may be a string.
-    if bindata[0] == 0:
-        start = None
-    else:
-        start = 0
-
-    while idx < len(bindata):
-        if start is None:
-            if bindata[idx] == 0:
-                # Skip NULL bytes to find next string
-                idx += 1
-            else:
-                # Beginning of possible string
+    start = None
+    for x in data:
+        if is_printable(chr(x)):
+            # Printable character, potential part of string
+            if start is None:
+                # Beginning of potential string
                start = idx
-                idx += 1
-        else:
-            if bindata[idx] != 0:
-                # Skipping till next NULL byte for possible string
-                idx += 1
-            else:
-                # End of possible string
-                end = idx
+        elif x == 0:
+            # End of possible string
+            if start is not None:
+                # Found potential string
+                str_maybe = data[start : idx]
+                decoded_str = try_decode_string(str_maybe)

-                if start != end:
-                    str_maybe = bindata[start:end]
-                    decoded_str = try_decode_string(str_maybe)
+                if decoded_str is not None:
+                    addr = section['start'] + start

-                    # Only store readable string
-                    if decoded_str is not None:
-                        addr = section['start'] + start
+                    if addr not in str_mappings:
+                        str_mappings[addr] = decoded_str

-                        if addr not in str_mappings:
-                            str_mappings[addr] = decoded_str
+                        # Decoded string may contain un-printable characters
+                        # (e.g. extended ASC-II characters) or control
+                        # characters (e.g. '\r' or '\n'), so simply print
+                        # the byte string instead.
+                        logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
+                                     addr, str_maybe)

-                            # Decoded string may contain un-printable characters
-                            # (e.g. extended ASC-II characters) or control
-                            # characters (e.g. '\r' or '\n'), so simply print
-                            # the byte string instead.
-                            logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
-                                         addr, str_maybe)
-
-                            # GCC-based toolchain will reuse the NULL character
-                            # for empty strings. There is no way to know which
-                            # one is being reused, so just treat all NULL character
-                            # at the end of legitimate strings as empty strings.
-                            null_addr = section['start'] + end
-                            str_mappings[null_addr] = ''
-
-                            logger.debug('Found null string via extraction at ' + PTR_FMT,
-                                         null_addr)
+                        # GCC-based toolchain will reuse the NULL character
+                        # for empty strings. There is no way to know which
+                        # one is being reused, so just treat all NULL character
+                        # at the end of legitimate strings as empty strings.
+                        null_addr = section['start'] + idx
+                        str_mappings[null_addr] = ''

+                        logger.debug('Found null string via extraction at ' + PTR_FMT,
+                                     null_addr)
                start = None
-                idx += 1
+        else:
+            # Non-printable byte, remove start location
+            start = None
+        idx += 1

    return str_mappings