From 80e78208e6cda603c9c909e934fbdf49d23c181e Mon Sep 17 00:00:00 2001 From: Daniel Leung Date: Thu, 25 May 2023 11:41:48 -0700 Subject: [PATCH] kernel: syscalls: no need to include all syscalls in binary The syscall generation phase parses all header files to look for potential syscalls, and emits all the relevant files to enable syscalls. However, this results in all the syscall marshalling functions being included in the final binary. This is due to these functions being referred to inside the dispatch list, resulting in ineffective garbage collection during linking. Previous commits allows each drivers and subsystems to specify which header files containing syscalls are relevant. So this commit changes the syscall generation to only include the syscalls needed for the build in the syscall dispatch list and removing various bits related to that. This allows the linker to garbage collect unused syscall related function, and thus reducing final binary size. Signed-off-by: Daniel Leung --- CMakeLists.txt | 22 ++++++-- Kconfig.zephyr | 7 +++ cmake/modules/extensions.cmake | 27 +++++++++- scripts/build/gen_syscalls.py | 35 ++++++++---- scripts/build/parse_syscalls.py | 95 +++++++++++++++++++++++++-------- 5 files changed, 151 insertions(+), 35 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90c60baabaf..f6b06937f87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,6 +92,11 @@ set(PARSE_SYSCALLS_TARGET parse_syscalls_target) define_property(GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT BRIEF_DOCS " " FULL_DOCS " ") set_property( GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT elf32-little${ARCH}) # BFD format +# Contains the list of files with syscall function prototypes. +add_library(syscalls_interface INTERFACE) +set(syscalls_file_list_output + ${CMAKE_CURRENT_BINARY_DIR}/misc/generated/syscalls_file_list.txt) + # "zephyr_interface" is a source-less library that encapsulates all the global # compiler options needed by all source files. All zephyr libraries, # including the library named "zephyr" link with this library to @@ -728,13 +733,16 @@ add_custom_command( COMMAND ${PYTHON_EXECUTABLE} ${ZEPHYR_BASE}/scripts/build/parse_syscalls.py - --include ${ZEPHYR_BASE}/include # Read files from this dir - --include ${ZEPHYR_BASE}/drivers # For net sockets - --include ${ZEPHYR_BASE}/subsys/net # More net sockets + --scan ${ZEPHYR_BASE}/include # Read files from this dir + --scan ${ZEPHYR_BASE}/drivers # For net sockets + --scan ${ZEPHYR_BASE}/subsys/net # More net sockets ${parse_syscalls_include_args} # Read files from these dirs also --json-file ${syscalls_json} # Write this file --tag-struct-file ${struct_tags_json} # Write subsystem list to this file + --file-list ${syscalls_file_list_output} + $<$:--emit-all-syscalls> DEPENDS ${syscalls_subdirs_trigger} ${PARSE_SYSCALLS_HEADER_DEPENDS} + ${syscalls_file_list_output} ${syscalls_interface} ) # Make sure Picolibc is built before the rest of the system; there's no explicit @@ -850,6 +858,14 @@ zephyr_get_include_directories_for_lang(C ZEPHYR_INCLUDES) add_subdirectory(kernel) +get_property( + syscalls_file_list + TARGET syscalls_interface + PROPERTY INTERFACE_INCLUDE_DIRECTORIES +) +file(CONFIGURE OUTPUT ${syscalls_file_list_output} + CONTENT "@syscalls_file_list@" @ONLY) + # Read list content get_property(ZEPHYR_LIBS_PROPERTY GLOBAL PROPERTY ZEPHYR_LIBS) diff --git a/Kconfig.zephyr b/Kconfig.zephyr index 05670647bfd..38d86271311 100644 --- a/Kconfig.zephyr +++ b/Kconfig.zephyr @@ -738,6 +738,13 @@ config CHECK_INIT_PRIORITIES_FAIL_ON_WARNING devices depending on each other but initialized with the same priority. +config EMIT_ALL_SYSCALLS + bool "Emit all possible syscalls in the tree" + help + This tells the build system to emit all possible syscalls found + in the tree, instead of only those syscalls associated with enabled + drivers and subsystems. + endmenu config DEPRECATED diff --git a/cmake/modules/extensions.cmake b/cmake/modules/extensions.cmake index c6dc67790d2..69d802e32ca 100644 --- a/cmake/modules/extensions.cmake +++ b/cmake/modules/extensions.cmake @@ -1488,14 +1488,37 @@ endfunction() # Function to add header file(s) to the list to be passed to syscall generator. function(zephyr_syscall_header) - # Empty function for now. Will implement later. + foreach(one_file ${ARGV}) + if(EXISTS ${one_file}) + set(header_file ${one_file}) + elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${one_file}) + set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/${one_file}) + else() + message(FATAL_ERROR "Syscall header file not found: ${one_file}") + endif() + + target_sources( + syscalls_interface INTERFACE + ${header_file} + ) + target_include_directories( + syscalls_interface INTERFACE + ${header_file} + ) + add_dependencies( + syscalls_interface + ${header_file} + ) + + unset(header_file) + endforeach() endfunction() # Function to add header file(s) to the list to be passed to syscall generator # if condition is true. function(zephyr_syscall_header_ifdef feature_toggle) if(${${feature_toggle}}) - # Empty function for now. Will implement later. + zephyr_syscall_header(${ARGN}) endif() endfunction() diff --git a/scripts/build/gen_syscalls.py b/scripts/build/gen_syscalls.py index 301b963e870..3a95207a414 100755 --- a/scripts/build/gen_syscalls.py +++ b/scripts/build/gen_syscalls.py @@ -419,22 +419,29 @@ def main(): invocations = {} mrsh_defs = {} mrsh_includes = {} - ids = [] + ids_emit = [] + ids_not_emit = [] table_entries = [] handlers = [] + emit_list = [] - for match_group, fn in syscalls: + for match_group, fn, to_emit in syscalls: handler, inv, mrsh, sys_id, entry = analyze_fn(match_group, fn) if fn not in invocations: invocations[fn] = [] invocations[fn].append(inv) - ids.append(sys_id) - table_entries.append(entry) handlers.append(handler) - if mrsh: + if to_emit: + ids_emit.append(sys_id) + table_entries.append(entry) + emit_list.append(handler) + else: + ids_not_emit.append(sys_id) + + if mrsh and to_emit: syscall = typename_split(match_group[0])[1] mrsh_defs[syscall] = mrsh mrsh_includes[syscall] = "#include " % fn @@ -444,7 +451,7 @@ def main(): weak_defines = "".join([weak_template % name for name in handlers - if not name in noweak]) + if not name in noweak and name in emit_list]) # The "noweak" ones just get a regular declaration weak_defines += "\n".join(["extern uintptr_t %s(uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, void *ssf);" @@ -454,13 +461,23 @@ def main(): ",\n\t".join(table_entries))) # Listing header emitted to stdout - ids.sort() - ids.extend(["K_SYSCALL_BAD", "K_SYSCALL_LIMIT"]) + ids_emit.sort() + ids_emit.extend(["K_SYSCALL_BAD", "K_SYSCALL_LIMIT"]) ids_as_defines = "" - for i, item in enumerate(ids): + for i, item in enumerate(ids_emit): ids_as_defines += "#define {} {}\n".format(item, i) + if ids_not_emit: + # There are syscalls that are not used in the image but + # their IDs are used in the generated stubs. So need to + # make them usable but outside the syscall ID range. + ids_as_defines += "\n\n/* Following syscalls are not used in image */\n" + ids_not_emit.sort() + num_emitted_ids = len(ids_emit) + for i, item in enumerate(ids_not_emit): + ids_as_defines += "#define {} {}\n".format(item, i + num_emitted_ids) + with open(args.syscall_list, "w") as fp: fp.write(list_template % ids_as_defines) diff --git a/scripts/build/parse_syscalls.py b/scripts/build/parse_syscalls.py index 3536d52a9c4..c9c25998011 100644 --- a/scripts/build/parse_syscalls.py +++ b/scripts/build/parse_syscalls.py @@ -55,13 +55,38 @@ def tagged_struct_update(target_list, tag, contents): target_list.extend(items) -def analyze_headers(multiple_directories): +def analyze_headers(include_dir, scan_dir, file_list): syscall_ret = [] tagged_ret = {} for tag in struct_tags: tagged_ret[tag] = [] + syscall_files = dict() + + # Get the list of header files which contains syscalls to be emitted. + # If file_list does not exist, we emit all syscalls. + if file_list: + with open(file_list, "r", encoding="utf-8") as fp: + contents = fp.read() + + for one_file in contents.split(";"): + if os.path.isfile(one_file): + syscall_files[one_file] = {"emit": True} + else: + sys.stderr.write(f"{one_file} does not exists!\n") + sys.exit(1) + + multiple_directories = set() + if include_dir: + multiple_directories |= set(include_dir) + if scan_dir: + multiple_directories |= set(scan_dir) + + # Look for source files under various directories. + # Due to "syscalls/*.h" being included unconditionally in various + # other header files. We must generate the associated syscall + # header files (e.g. for function stubs). for base_path in multiple_directories: for root, dirs, files in os.walk(base_path, topdown=True): dirs.sort() @@ -76,23 +101,35 @@ def analyze_headers(multiple_directories): 'common.h'))): continue - with open(path, "r", encoding="utf-8") as fp: - try: - contents = fp.read() - except Exception: - sys.stderr.write("Error decoding %s\n" % path) - raise + if path not in syscall_files: + if include_dir and base_path in include_dir: + syscall_files[path] = {"emit" : True} + else: + syscall_files[path] = {"emit" : False} - try: - syscall_result = [(mo.groups(), fn) - for mo in syscall_regex.finditer(contents)] - for tag in struct_tags: - tagged_struct_update(tagged_ret[tag], tag, contents) - except Exception: - sys.stderr.write("While parsing %s\n" % fn) - raise + # Parse files to extract syscall functions + for one_file in syscall_files: + with open(one_file, "r", encoding="utf-8") as fp: + try: + contents = fp.read() + except Exception: + sys.stderr.write("Error decoding %s\n" % path) + raise - syscall_ret.extend(syscall_result) + fn = os.path.basename(one_file) + + try: + to_emit = syscall_files[one_file]["emit"] | args.emit_all_syscalls + + syscall_result = [(mo.groups(), fn, to_emit) + for mo in syscall_regex.finditer(contents)] + for tag in struct_tags: + tagged_struct_update(tagged_ret[tag], tag, contents) + except Exception: + sys.stderr.write("While parsing %s\n" % fn) + raise + + syscall_ret.extend(syscall_result) return syscall_ret, tagged_ret @@ -116,16 +153,31 @@ def parse_args(): description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False) - parser.add_argument("-i", "--include", required=True, action='append', - help='''include directories recursively scanned - for .h files. Can be specified multiple times: - -i topdir1 -i topdir2 ...''') + parser.add_argument( + "-i", "--include", required=False, action="append", + help="Include directories recursively scanned for .h files " + "containing syscalls that must be present in final binary. " + "Can be specified multiple times: -i topdir1 -i topdir2 ...") + parser.add_argument( + "--scan", required=False, action="append", + help="Scan directories recursively for .h files containing " + "syscalls that need stubs generated but may not need to " + "be present in final binary. Can be specified multiple " + "times.") parser.add_argument( "-j", "--json-file", required=True, help="Write system call prototype information as json to file") parser.add_argument( "-t", "--tag-struct-file", required=True, help="Write tagged struct name information as json to file") + parser.add_argument( + "--file-list", required=False, + help="Text file containing semi-colon separated list of " + "header file where only syscalls in these files " + "are emitted.") + parser.add_argument( + "--emit-all-syscalls", required=False, action="store_true", + help="Emit all potential syscalls in the tree") args = parser.parse_args() @@ -133,7 +185,8 @@ def parse_args(): def main(): parse_args() - syscalls, tagged = analyze_headers(args.include) + syscalls, tagged = analyze_headers(args.include, args.scan, + args.file_list) # Only write json files if they don't exist or have changes since # they will force an incremental rebuild.