scripts: logging/dictionary: extract actual strings for db
This changes the database generation to actually extracting individual strings instead of stuffing the whole binary sections into the database. This allows the generation script to be extended to accommodate more output formats. Note that if CONFIG_LOG2_FMT_SECTION is enabled, the format strings are in log_strings_sections, and also have associated debug symbols in DWARF. So there is no need to manually extract them. Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
parent
dc6b51a52d
commit
caca548cf9
4 changed files with 381 additions and 54 deletions
|
@ -16,16 +16,25 @@ import argparse
|
|||
import logging
|
||||
import os
|
||||
import re
|
||||
import string
|
||||
import struct
|
||||
import sys
|
||||
|
||||
import dictionary_parser.log_database
|
||||
from dictionary_parser.log_database import LogDatabase
|
||||
from dictionary_parser.utils import extract_one_string_in_section
|
||||
|
||||
import elftools
|
||||
from elftools.elf.constants import SH_FLAGS
|
||||
from elftools.elf.elffile import ELFFile
|
||||
from elftools.elf.descriptions import describe_ei_data
|
||||
from elftools.elf.sections import SymbolTableSection
|
||||
from elftools.dwarf.descriptions import (
|
||||
describe_DWARF_expr
|
||||
)
|
||||
from elftools.dwarf.locationlists import (
|
||||
LocationExpr, LocationParser
|
||||
)
|
||||
|
||||
|
||||
LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
|
||||
|
@ -37,7 +46,28 @@ STATIC_STRING_SECTIONS = [
|
|||
'rodata',
|
||||
'.rodata',
|
||||
'pinned.rodata',
|
||||
'log_strings_sections'
|
||||
]
|
||||
|
||||
|
||||
# Regulation expression to match DWARF location
|
||||
DT_LOCATION_REGEX = re.compile(r"\(DW_OP_addr: ([0-9a-f]+)")
|
||||
|
||||
|
||||
# Format string for pointers (default for 32-bit pointers)
|
||||
PTR_FMT = '0x%08x'
|
||||
|
||||
|
||||
# Potential string encodings. Add as needed.
|
||||
STR_ENCODINGS = [
|
||||
'ascii',
|
||||
'iso-8859-1',
|
||||
]
|
||||
|
||||
|
||||
# List of acceptable escape character
|
||||
ACCEPTABLE_ESCAPE_CHARS = [
|
||||
b'\r',
|
||||
b'\n',
|
||||
]
|
||||
|
||||
|
||||
|
@ -58,6 +88,30 @@ def parse_args():
|
|||
return argparser.parse_args()
|
||||
|
||||
|
||||
def extract_elf_code_data_sections(elf):
|
||||
"""Find all sections in ELF file"""
|
||||
sections = {}
|
||||
|
||||
for sect in elf.iter_sections():
|
||||
# Only Allocated sections with PROGBITS are included
|
||||
# since they actually have code/data.
|
||||
#
|
||||
# On contrary, BSS is allocated but NOBITS.
|
||||
if (
|
||||
(sect['sh_flags'] & SH_FLAGS.SHF_ALLOC) == SH_FLAGS.SHF_ALLOC
|
||||
and sect['sh_type'] == 'SHT_PROGBITS'
|
||||
):
|
||||
sections[sect.name] = {
|
||||
'name' : sect.name,
|
||||
'size' : sect['sh_size'],
|
||||
'start' : sect['sh_addr'],
|
||||
'end' : sect['sh_addr'] + sect['sh_size'] - 1,
|
||||
'data' : sect.data(),
|
||||
}
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
def find_elf_sections(elf, sh_name):
|
||||
"""Find all sections in ELF file"""
|
||||
for section in elf.iter_sections():
|
||||
|
@ -196,30 +250,6 @@ def process_kconfigs(elf, database):
|
|||
kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
|
||||
|
||||
|
||||
def extract_static_string_sections(elf, database):
|
||||
"""Extract sections containing static strings"""
|
||||
string_sections = STATIC_STRING_SECTIONS
|
||||
|
||||
# Some architectures may put static strings into additional sections.
|
||||
# So need to extract them too.
|
||||
arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
|
||||
if "extra_string_section" in arch_data:
|
||||
string_sections.extend(arch_data['extra_string_section'])
|
||||
|
||||
for name in string_sections:
|
||||
content = find_elf_sections(elf, name)
|
||||
if content is None:
|
||||
continue
|
||||
|
||||
logger.info("Found section: %s, 0x%x - 0x%x",
|
||||
name, content['start'], content['end'])
|
||||
database.add_string_section(name, content)
|
||||
|
||||
if not database.has_string_sections():
|
||||
logger.error("Cannot find any static string sections in ELF, exiting...")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def extract_logging_subsys_information(elf, database):
|
||||
"""
|
||||
Extract logging subsys related information and store in database.
|
||||
|
@ -242,18 +272,248 @@ def extract_logging_subsys_information(elf, database):
|
|||
parse_log_const_symbols(database, section_log_const, log_const_symbols)
|
||||
|
||||
|
||||
def is_die_attr_ref(attr):
|
||||
"""
|
||||
Returns True if the DIE attribute is a reference.
|
||||
"""
|
||||
return bool(attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
|
||||
'DW_FORM_ref4', 'DW_FORM_ref8',
|
||||
'DW_FORM_ref'))
|
||||
|
||||
|
||||
def find_die_var_base_type(compile_unit, die, is_const):
|
||||
"""
|
||||
Finds the base type of a DIE and returns the name.
|
||||
If DW_AT_type is a reference, it will recursively go through
|
||||
the references to find the base type. Returns None is no
|
||||
base type is found.
|
||||
"""
|
||||
# DIE is of base type. So extract the name.
|
||||
if die.tag == 'DW_TAG_base_type':
|
||||
return die.attributes['DW_AT_name'].value.decode('ascii'), is_const
|
||||
|
||||
# Not a type, cannot continue
|
||||
if not 'DW_AT_type' in die.attributes:
|
||||
return None, None
|
||||
|
||||
if die.tag == 'DW_TAG_const_type':
|
||||
is_const = True
|
||||
|
||||
# DIE is probably a reference to another.
|
||||
# If so, check if the reference is a base type.
|
||||
type_attr = die.attributes['DW_AT_type']
|
||||
if is_die_attr_ref(type_attr):
|
||||
ref_addr = compile_unit.cu_offset + type_attr.raw_value
|
||||
ref_die = compile_unit.get_DIE_from_refaddr(ref_addr)
|
||||
|
||||
return find_die_var_base_type(compile_unit, ref_die, is_const)
|
||||
|
||||
# Not a base type, and not reference
|
||||
return None, None
|
||||
|
||||
|
||||
def is_die_var_const_char(compile_unit, die):
|
||||
"""
|
||||
Returns True if DIE of type variable is const char.
|
||||
"""
|
||||
var_type, is_const = find_die_var_base_type(compile_unit, die, False)
|
||||
|
||||
if var_type is not None and var_type.endswith('char') and is_const:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def extract_string_variables(elf):
|
||||
"""
|
||||
Find all string variables (char) in all Compilation Units and
|
||||
Debug information Entry (DIE) in ELF file.
|
||||
"""
|
||||
dwarf_info = elf.get_dwarf_info()
|
||||
loc_lists = dwarf_info.location_lists()
|
||||
loc_parser = LocationParser(loc_lists)
|
||||
|
||||
strings = []
|
||||
|
||||
# Loop through all Compilation Units and
|
||||
# Debug information Entry (DIE) to extract all string variables
|
||||
for compile_unit in dwarf_info.iter_CUs():
|
||||
for die in compile_unit.iter_DIEs():
|
||||
# Only care about variables with location information
|
||||
# and of type "char"
|
||||
if die.tag == 'DW_TAG_variable':
|
||||
if ('DW_AT_type' in die.attributes
|
||||
and 'DW_AT_location' in die.attributes
|
||||
and is_die_var_const_char(compile_unit, die)
|
||||
):
|
||||
# Extract location information, which is
|
||||
# its address in memory.
|
||||
loc_attr = die.attributes['DW_AT_location']
|
||||
if loc_parser.attribute_has_location(loc_attr, die.cu['version']):
|
||||
loc = loc_parser.parse_from_attribute(loc_attr, die.cu['version'])
|
||||
if isinstance(loc, LocationExpr):
|
||||
try:
|
||||
addr = describe_DWARF_expr(loc.loc_expr,
|
||||
dwarf_info.structs)
|
||||
|
||||
matcher = DT_LOCATION_REGEX.match(addr)
|
||||
if matcher:
|
||||
addr = int(matcher.group(1), 16)
|
||||
if addr > 0:
|
||||
strings.append({
|
||||
'name': die.attributes['DW_AT_name'].value,
|
||||
'addr': addr,
|
||||
'die': die
|
||||
})
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
return strings
|
||||
|
||||
|
||||
def try_decode_string(str_maybe):
|
||||
"""Check if it is a printable string"""
|
||||
for encoding in STR_ENCODINGS:
|
||||
try:
|
||||
decoded_str = str_maybe.decode(encoding)
|
||||
|
||||
# Check if string is printable according to Python
|
||||
# since the parser (written in Python) will need to
|
||||
# print the string.
|
||||
#
|
||||
# Note that '\r' and '\n' are not included in
|
||||
# string.printable so they need to be checked separately.
|
||||
printable = True
|
||||
for one_char in decoded_str:
|
||||
if (one_char not in string.printable
|
||||
and one_char not in ACCEPTABLE_ESCAPE_CHARS):
|
||||
printable = False
|
||||
break
|
||||
|
||||
if printable:
|
||||
return decoded_str
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_strings_in_one_section(section, str_mappings):
|
||||
"""Extract NULL-terminated strings in one ELF section"""
|
||||
bindata = section['data']
|
||||
|
||||
if len(bindata) < 2:
|
||||
# Can't have a NULL-terminated string with fewer than 2 bytes.
|
||||
return str_mappings
|
||||
|
||||
idx = 0
|
||||
|
||||
# If first byte is not NULL, it may be a string.
|
||||
if bindata[0] == 0:
|
||||
start = None
|
||||
else:
|
||||
start = 0
|
||||
|
||||
while idx < len(bindata):
|
||||
if start is None:
|
||||
if bindata[idx] == 0:
|
||||
# Skip NULL bytes to find next string
|
||||
idx += 1
|
||||
else:
|
||||
# Beginning of possible string
|
||||
start = idx
|
||||
idx += 1
|
||||
else:
|
||||
if bindata[idx] != 0:
|
||||
# Skipping till next NULL byte for possible string
|
||||
idx += 1
|
||||
else:
|
||||
# End of possible string
|
||||
end = idx
|
||||
|
||||
if start != end:
|
||||
str_maybe = bindata[start:end]
|
||||
decoded_str = try_decode_string(str_maybe)
|
||||
|
||||
# Only store readable string
|
||||
if decoded_str is not None:
|
||||
addr = section['start'] + start
|
||||
|
||||
if addr not in str_mappings:
|
||||
str_mappings[addr] = decoded_str
|
||||
|
||||
# Decoded string may contain un-printable characters
|
||||
# (e.g. extended ASC-II characters) or control
|
||||
# characters (e.g. '\r' or '\n'), so simply print
|
||||
# the byte string instead.
|
||||
logger.debug('Found string via extraction at ' + PTR_FMT + ': %s',
|
||||
addr, str_maybe)
|
||||
|
||||
# GCC-based toolchain will reuse the NULL character
|
||||
# for empty strings. There is no way to know which
|
||||
# one is being reused, so just treat all NULL character
|
||||
# at the end of legitimate strings as empty strings.
|
||||
null_addr = section['start'] + end
|
||||
str_mappings[null_addr] = ''
|
||||
|
||||
logger.debug('Found null string via extraction at ' + PTR_FMT,
|
||||
null_addr)
|
||||
|
||||
start = None
|
||||
idx += 1
|
||||
|
||||
return str_mappings
|
||||
|
||||
|
||||
def extract_static_strings(elf, database):
|
||||
"""
|
||||
Extract static strings from ELF file using DWARF,
|
||||
and also extraction from binary data.
|
||||
"""
|
||||
string_mappings = {}
|
||||
|
||||
elf_sections = extract_elf_code_data_sections(elf)
|
||||
|
||||
# Extract strings using ELF DWARF information
|
||||
str_vars = extract_string_variables(elf)
|
||||
for str_var in str_vars:
|
||||
for _, sect in elf_sections.items():
|
||||
one_str = extract_one_string_in_section(sect, str_var['addr'])
|
||||
if one_str is not None:
|
||||
string_mappings[str_var['addr']] = one_str
|
||||
logger.debug('Found string variable at ' + PTR_FMT + ': %s',
|
||||
str_var['addr'], one_str)
|
||||
break
|
||||
|
||||
# Extract strings from ELF sections
|
||||
string_sections = STATIC_STRING_SECTIONS
|
||||
|
||||
# Some architectures may put static strings into additional sections.
|
||||
# So need to extract them too.
|
||||
arch_data = dictionary_parser.log_database.ARCHS[database.get_arch()]
|
||||
if "extra_string_section" in arch_data:
|
||||
string_sections.extend(arch_data['extra_string_section'])
|
||||
|
||||
for sect_name in string_sections:
|
||||
if sect_name in elf_sections:
|
||||
string_mappings = extract_strings_in_one_section(elf_sections[sect_name],
|
||||
string_mappings)
|
||||
|
||||
if len(string_mappings) > 0:
|
||||
database.set_string_mappings(string_mappings)
|
||||
logger.info("Found %d strings", len(string_mappings))
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function of database generator"""
|
||||
args = parse_args()
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(format=LOGGER_FORMAT)
|
||||
if args.verbose:
|
||||
logger.setLevel(logging.INFO)
|
||||
elif args.debug:
|
||||
logging.basicConfig(format=LOGGER_FORMAT, level=logging.WARNING)
|
||||
if args.debug:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
else:
|
||||
logger.setLevel(logging.WARNING)
|
||||
elif args.verbose:
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
elffile = open(args.elffile, "rb")
|
||||
if not elffile:
|
||||
|
@ -289,8 +549,12 @@ def main():
|
|||
else:
|
||||
logger.info("Endianness: Big")
|
||||
|
||||
# Extract sections from ELF files that contain strings
|
||||
extract_static_string_sections(elf, database)
|
||||
if database.is_tgt_64bit():
|
||||
global PTR_FMT
|
||||
PTR_FMT = '0x%016x'
|
||||
|
||||
# Extract strings from ELF files
|
||||
extract_static_strings(elf, database)
|
||||
|
||||
# Extract information related to logging subsystem
|
||||
extract_logging_subsys_information(elf, database)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue