scripts: logging: dictionary: Fix database generation

I noticed that not all strings were put into database. That is because
algorithm was searching for any null terminated byte arrays in the
section (e.g. rodata) and then attempting to decode it to string.
However, section may contain other static const variable and if a
string is preceded by a variable that has non-printable bytes and no
zero at the end then algorithm was picking a candidate that started
with non-printable characters followed by the string. Such candidate
was discarded because it was not a valid string.

Algorithm is changed to treat as string candidate any sequences that
contains printable bytes followed by 0.

Signed-off-by: Krzysztof Chruściński <krzysztof.chruscinski@nordicsemi.no>
This commit is contained in:
Krzysztof Chruściński 2023-06-14 09:59:52 +02:00 committed by Carles Cufí
commit 9c2d1c8e99

View file

@ -378,97 +378,70 @@ def extract_string_variables(elf):
return strings
def try_decode_string(str_maybe):
"""Check if it is a printable string"""
for encoding in STR_ENCODINGS:
try:
decoded_str = str_maybe.decode(encoding)
# Check if string is printable according to Python
# since the parser (written in Python) will need to
# print the string.
#
# Note that '\r' and '\n' are not included in
# string.printable so they need to be checked separately.
printable = True
for one_char in decoded_str:
if (one_char not in string.printable
and one_char not in ACCEPTABLE_ESCAPE_CHARS):
printable = False
break
if printable:
return decoded_str
return str_maybe.decode(encoding)
except UnicodeDecodeError:
pass
return None
def is_printable(b):
# Check if string is printable according to Python
# since the parser (written in Python) will need to
# print the string.
#
# Note that '\r' and '\n' are not included in
# string.printable so they need to be checked separately.
return (b in string.printable) or (b in ACCEPTABLE_ESCAPE_CHARS)
def extract_strings_in_one_section(section, str_mappings):
"""Extract NULL-terminated strings in one ELF section"""
bindata = section['data']
if len(bindata) < 2:
# Can't have a NULL-terminated string with fewer than 2 bytes.
return str_mappings
data = section['data']
idx = 0
# If first byte is not NULL, it may be a string.
if bindata[0] == 0:
start = None
else:
start = 0
while idx < len(bindata):
if start is None:
if bindata[idx] == 0:
# Skip NULL bytes to find next string
idx += 1
else:
# Beginning of possible string
start = None
for x in data:
if is_printable(chr(x)):
# Printable character, potential part of string
if start is None:
# Beginning of potential string
start = idx
idx += 1
else:
if bindata[idx] != 0:
# Skipping till next NULL byte for possible string
idx += 1
else:
# End of possible string
end = idx
elif x == 0:
# End of possible string
if start is not None:
# Found potential string
str_maybe = data[start : idx]
decoded_str = try_decode_string(str_maybe)
if start != end:
str_maybe = bindata[start:end]
decoded_str = try_decode_string(str_maybe)
if decoded_str is not None:
addr = section['start'] + start
# Only store readable string
if decoded_str is not None:
addr = section['start'] + start
if addr not in str_mappings:
str_mappings[addr] = decoded_str
if addr not in str_mappings:
str_mappings[addr] = decoded_str
# Decoded string may contain un-printable characters
# (e.g. extended ASC-II characters) or control
# characters (e.g. '\r' or '\n'), so simply print
# the byte string instead.
logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
addr, str_maybe)
# Decoded string may contain un-printable characters
# (e.g. extended ASC-II characters) or control
# characters (e.g. '\r' or '\n'), so simply print
# the byte string instead.
logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
addr, str_maybe)
# GCC-based toolchain will reuse the NULL character
# for empty strings. There is no way to know which
# one is being reused, so just treat all NULL character
# at the end of legitimate strings as empty strings.
null_addr = section['start'] + end
str_mappings[null_addr] = ''
logger.debug('Found null string via extraction at ' + PTR_FMT,
null_addr)
# GCC-based toolchain will reuse the NULL character
# for empty strings. There is no way to know which
# one is being reused, so just treat all NULL character
# at the end of legitimate strings as empty strings.
null_addr = section['start'] + idx
str_mappings[null_addr] = ''
logger.debug('Found null string via extraction at ' + PTR_FMT,
null_addr)
start = None
idx += 1
else:
# Non-printable byte, remove start location
start = None
idx += 1
return str_mappings