diff --git a/CODEOWNERS b/CODEOWNERS index 75c4c64d83a..d846917e182 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -596,6 +596,7 @@ /subsys/shell/ @jakub-uC @nordic-krch /subsys/stats/ @nvlsianpu /subsys/storage/ @nvlsianpu +/subsys/task_wdt/ @martinjaeger /subsys/testsuite/ @nashif /subsys/timing/ @nashif @dcpleung /subsys/usb/ @jfischer-no diff --git a/doc/reference/index.rst b/doc/reference/index.rst index 399db3916fb..7cc58838f0b 100644 --- a/doc/reference/index.rst +++ b/doc/reference/index.rst @@ -30,6 +30,7 @@ API Reference resource_management/index.rst shell/index.rst storage/index.rst + task_wdt/index.rst misc/timeutil.rst usb/index.rst usermode/index.rst diff --git a/doc/reference/overview.rst b/doc/reference/overview.rst index 8e8e8d9e3cc..3895cda636b 100644 --- a/doc/reference/overview.rst +++ b/doc/reference/overview.rst @@ -282,6 +282,11 @@ current :ref:`stability level `. - 2.3 - 2.3 + * - :ref:`task_wdt_api` + - Experimental + - 2.5 + - 2.5 + * - :ref:`uart_api` - Stable - 1.0 diff --git a/doc/reference/task_wdt/index.rst b/doc/reference/task_wdt/index.rst new file mode 100644 index 00000000000..ddd4593e33c --- /dev/null +++ b/doc/reference/task_wdt/index.rst @@ -0,0 +1,55 @@ +.. _task_wdt_api: + +Task Watchdog +############# + +Overview +******** + +Many microcontrollers feature a hardware watchdog timer peripheral. Its purpose +is to trigger an action (usually a system reset) in case of severe software +malfunctions. Once initialized, the watchdog timer has to be restarted ("fed") +in regular intervals to prevent it from timing out. If the software got stuck +and does not manage to feed the watchdog anymore, the corrective action is +triggered to bring the system back to normal operation. + +In real-time operating systems with multiple tasks running in parallel, a +single watchdog instance may not be sufficient anymore, as it can be used for +only one task. This software watchdog based on kernel timers provides a method +to supervise multiple threads or tasks (called watchdog channels). + +An existing hardware watchdog can be used as an optional fallback if the task +watchdog itself or the scheduler has a malfunction. + +The task watchdog uses a kernel timer as its backend. If configured properly, +the timer ISR is never actually called during normal operation, as the timer is +continuously updated in the feed calls. + +It's currently not possible to have multiple instances of task watchdogs. +Instead, the task watchdog API can be accessed globally to add or delete new +channels without passing around a context or device pointer in the firmware. + +The maximum number of channels is predefined via Kconfig and should be adjusted +to match exactly the number of channels required by the application. + +Configuration Options +********************* + +Related configuration options can be found under +:zephyr_file:`subsys/task_wdt/Kconfig`. + +* :option:`CONFIG_TASK_WDT` + +* :option:`CONFIG_TASK_WDT_CHANNELS` + +* :option:`CONFIG_TASK_WDT_HW_FALLBACK` + +* :option:`CONFIG_TASK_WDT_MIN_TIMEOUT` + +* :option:`CONFIG_TASK_WDT_HW_FALLBACK_DELAY` + +API Reference +************* + +.. doxygengroup:: task_wdt_api + :project: Zephyr diff --git a/include/task_wdt/task_wdt.h b/include/task_wdt/task_wdt.h new file mode 100644 index 00000000000..cd3ba078315 --- /dev/null +++ b/include/task_wdt/task_wdt.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2020 Libre Solar Technologies GmbH + * + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * @file + * @brief Task watchdog header file + * + * This header file declares prototypes for the task watchdog APIs. + * + * The task watchdog can be used to monitor correct operation of individual + * threads. It can be used together with a hardware watchdog as a fallback. + */ + +#ifndef TASK_WDT_H_ +#define TASK_WDT_H_ + +#include +#include +#include + +/** + * @brief Task Watchdog APIs + * @defgroup task_wdt_api Task Watchdog APIs + * @ingroup subsystem + * @{ + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Task watchdog callback. */ +typedef void (*task_wdt_callback_t)(int channel_id, void *user_data); + +/** + * @brief Initialize task watchdog. + * + * This function sets up necessary kernel timers and the hardware watchdog (if + * desired as fallback). It has to be called before task_wdt_add() and + * task_wdt_feed(). + * + * @param hw_wdt Pointer to the hardware watchdog device used as fallback. + * Pass NULL if no hardware watchdog fallback is desired. + * + * @retval 0 If successful. + * @retval -ENOTSUP If assigning a hardware watchdog is not supported. + */ +int task_wdt_init(const struct device *hw_wdt); + +/** + * @brief Install new timeout. + * + * Adds a new timeout to the list of task watchdog channels. + * + * @param reload_period Period in milliseconds used to reset the timeout + * @param callback Function to be called when watchdog timer expired. Pass + * NULL to use system reset handler. + * @param user_data User data to associate with the watchdog channel. + * + * @retval channel_id If successful, a non-negative value indicating the index + * of the channel to which the timeout was assigned. This + * ID is supposed to be used as the parameter in calls to + * task_wdt_feed(). + * @retval -EINVAL If the reload_period is invalid. + * @retval -ENOMEM If no more timeouts can be installed. + */ +int task_wdt_add(uint32_t reload_period, task_wdt_callback_t callback, + void *user_data); + +/** + * @brief Delete task watchdog channel. + * + * Deletes the specified channel from the list of task watchdog channels. The + * channel is now available again for other tasks via task_wdt_add() function. + * + * @param channel_id Index of the channel as returned by task_wdt_add(). + * + * @retval 0 If successful. + * @retval -EINVAL If there is no installed timeout for supplied channel. + */ +int task_wdt_delete(int channel_id); + +/** + * @brief Feed specified watchdog channel. + * + * This function loops through all installed task watchdogs and updates the + * internal kernel timer used as for the software watchdog with the next due + * timeout. + * + * @param channel_id Index of the fed channel as returned by task_wdt_add(). + * + * @retval 0 If successful. + * @retval -EINVAL If there is no installed timeout for supplied channel. + */ +int task_wdt_feed(int channel_id); + +#ifdef __cplusplus +} +#endif + +/** + * @} + */ + +#endif /* TASK_WDT_H_ */ diff --git a/subsys/CMakeLists.txt b/subsys/CMakeLists.txt index 9608ddec21c..fc18f2c1006 100644 --- a/subsys/CMakeLists.txt +++ b/subsys/CMakeLists.txt @@ -21,6 +21,7 @@ add_subdirectory_ifdef(CONFIG_SETTINGS settings) add_subdirectory(fb) add_subdirectory(power) add_subdirectory(stats) +add_subdirectory(task_wdt) add_subdirectory(testsuite) add_subdirectory(tracing) add_subdirectory_ifdef(CONFIG_JWT jwt) diff --git a/subsys/Kconfig b/subsys/Kconfig index e6b60b9c67a..b3a9661811a 100644 --- a/subsys/Kconfig +++ b/subsys/Kconfig @@ -52,6 +52,8 @@ source "subsys/storage/Kconfig" source "subsys/settings/Kconfig" +source "subsys/task_wdt/Kconfig" + source "subsys/testsuite/Kconfig" source "subsys/timing/Kconfig" diff --git a/subsys/task_wdt/CMakeLists.txt b/subsys/task_wdt/CMakeLists.txt new file mode 100644 index 00000000000..586eec92dd3 --- /dev/null +++ b/subsys/task_wdt/CMakeLists.txt @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: Apache-2.0 + +zephyr_sources_ifdef(CONFIG_TASK_WDT task_wdt.c) diff --git a/subsys/task_wdt/Kconfig b/subsys/task_wdt/Kconfig new file mode 100644 index 00000000000..c9a05973cc9 --- /dev/null +++ b/subsys/task_wdt/Kconfig @@ -0,0 +1,60 @@ +# Software watchdog configuration + +# Copyright (c) 2020 Libre Solar Technologies GmbH +# SPDX-License-Identifier: Apache-2.0 + +menuconfig TASK_WDT + bool "Task-level software watchdog" + select REBOOT + help + Enable task watchdog + + The task watchdog allows to have individual watchdog channels + per thread, even if the hardware supports only a single watchdog. + +config TASK_WDT_CHANNELS + int "Maximum number of task watchdog channels" + depends on TASK_WDT + default 5 + range 2 100 + help + The timeouts for each channel are stored in an array. Allocate only + the required amount of channels to reduce memory footprint. + +config TASK_WDT_HW_FALLBACK + bool "Use hardware watchdog as a fallback" + depends on TASK_WDT + default y + help + This option allows to specify a hardware watchdog device in the + application that is used as an additional safety layer if the task + watchdog itself gets stuck. + +config TASK_WDT_MIN_TIMEOUT + int "Minimum timeout for task watchdog (ms)" + depends on TASK_WDT_HW_FALLBACK + default 100 + range 1 10000 + help + The task watchdog uses a continuously restarted k_timer as its + backend. This value specifies the minimum timeout in milliseconds + among all task watchdogs used in the application. + + If a hardware watchdog is configured as a fallback for the task + watchdog, its timeout is set to this value plus + TASK_WDT_HW_FALLBACK_DELAY. + +config TASK_WDT_HW_FALLBACK_DELAY + int "Additional delay for hardware watchdog (ms)" + depends on TASK_WDT_HW_FALLBACK + default 20 + range 1 1000 + help + The timeout of the hardware watchdog fallback will be increased by + this value to provide sufficient time for corrective actions in the + callback function. + + In addition to that, the delay allows to compensate deviations + between different clock sources for the hardware watchdog and the + kernel timer. This is especially important if the hardware watchdog + is clocked by an inaccurate low-speed RC oscillator. diff --git a/subsys/task_wdt/task_wdt.c b/subsys/task_wdt/task_wdt.c new file mode 100644 index 00000000000..a68e6f3779d --- /dev/null +++ b/subsys/task_wdt/task_wdt.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2020 Libre Solar Technologies GmbH + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "task_wdt/task_wdt.h" + +#include +#include +#include +#include + +#define LOG_LEVEL CONFIG_WDT_LOG_LEVEL +#include +LOG_MODULE_REGISTER(task_wdt); + +/* + * This dummy channel is used to continue feeding the hardware watchdog if the + * task watchdog timeouts are too long for regular updates + */ +#define TASK_WDT_BACKGROUND_CHANNEL (-1) + +/* + * Task watchdog channel data + */ +struct task_wdt_channel { + /* period in milliseconds used to reset the timeout, set to 0 to + * indicate that the channel is available + */ + uint32_t reload_period; + /* abs. ticks when this channel expires (updated by task_wdt_feed) */ + int64_t timeout_abs_ticks; + /* user data passed to the callback function */ + void *user_data; + /* function to be called when watchdog timer expired */ + task_wdt_callback_t callback; +}; + +/* array of all task watchdog channels */ +static struct task_wdt_channel channels[CONFIG_TASK_WDT_CHANNELS]; + +/* timer used for watchdog handling */ +static struct k_timer timer; + +#ifdef CONFIG_TASK_WDT_HW_FALLBACK +/* pointer to the hardware watchdog used as a fallback */ +static const struct device *hw_wdt_dev; +static int hw_wdt_channel; +static bool hw_wdt_started; +#endif + +/** + * @brief Task watchdog timer callback. + * + * If the device operates as intended, this function will never be called, + * as the timer is continuously restarted with the next due timeout in the + * task_wdt_feed() function. + * + * If all task watchdogs have longer timeouts than the hardware watchdog, + * this function is called regularly (via the background channel). This + * should be avoided by setting CONFIG_TASK_WDT_MIN_TIMEOUT to the minimum + * task watchdog timeout used in the application. + * + * @param timer_id Pointer to the timer which called the function + */ +static void task_wdt_trigger(struct k_timer *timer_id) +{ + int channel_id = (int)k_timer_user_data_get(timer_id); + +#ifdef CONFIG_TASK_WDT_HW_FALLBACK + if (channel_id == TASK_WDT_BACKGROUND_CHANNEL) { + if (hw_wdt_dev) { + wdt_feed(hw_wdt_dev, 0); + } + return; + } +#endif + + if (channels[channel_id].reload_period == 0) { + /* channel was deleted */ + return; + } else if (channels[channel_id].callback) { + channels[channel_id].callback(channel_id, + channels[channel_id].user_data); + } else { + sys_reboot(SYS_REBOOT_COLD); + } +} + +int task_wdt_init(const struct device *hw_wdt) +{ + if (hw_wdt) { +#ifdef CONFIG_TASK_WDT_HW_FALLBACK + struct wdt_timeout_cfg wdt_config; + + wdt_config.flags = WDT_FLAG_RESET_SOC; + wdt_config.window.min = 0U; + wdt_config.window.max = CONFIG_TASK_WDT_MIN_TIMEOUT + + CONFIG_TASK_WDT_HW_FALLBACK_DELAY; + wdt_config.callback = NULL; + + hw_wdt_dev = hw_wdt; + hw_wdt_channel = wdt_install_timeout(hw_wdt_dev, &wdt_config); +#else + return -ENOTSUP; +#endif + } + + k_timer_init(&timer, task_wdt_trigger, NULL); + + return 0; +} + +int task_wdt_add(uint32_t reload_period, task_wdt_callback_t callback, + void *user_data) +{ + if (reload_period == 0) { + return -EINVAL; + } + + /* look for unused channel (reload_period set to 0) */ + for (int id = 0; id < ARRAY_SIZE(channels); id++) { + if (channels[id].reload_period == 0) { + channels[id].reload_period = reload_period; + channels[id].user_data = user_data; + channels[id].timeout_abs_ticks = K_TICKS_FOREVER; + channels[id].callback = callback; + task_wdt_feed(id); + +#ifdef CONFIG_TASK_WDT_HW_FALLBACK + if (!hw_wdt_started && hw_wdt_dev) { + /* also start fallback hw wdt */ + wdt_setup(hw_wdt_dev, 0); + hw_wdt_started = true; + } +#endif + return id; + } + } + + return -ENOMEM; +} + +int task_wdt_delete(int channel_id) +{ + if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) { + return -EINVAL; + } + + channels[channel_id].reload_period = 0; + + return 0; +} + +int task_wdt_feed(int channel_id) +{ + int64_t current_ticks; + int next_channel_id; /* channel which will time out next */ + int64_t next_timeout; /* timeout in absolute ticks of this channel */ + + if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) { + return -EINVAL; + } + + /* + * We need a critical section instead of a mutex while updating the + * channels array in order to prevent priority inversion. Otherwise, + * a low priority thread could be preempted before releasing the mutex + * and block a high priority thread that wants to feed its task wdt. + */ + k_sched_lock(); + + current_ticks = z_tick_get(); + + /* feed the specified channel */ + channels[channel_id].timeout_abs_ticks = current_ticks + + k_ms_to_ticks_ceil64(channels[channel_id].reload_period); + +#ifdef CONFIG_TASK_WDT_HW_FALLBACK + next_channel_id = TASK_WDT_BACKGROUND_CHANNEL; + next_timeout = current_ticks + + k_ms_to_ticks_ceil64(CONFIG_TASK_WDT_MIN_TIMEOUT); +#else + next_channel_id = 0; + next_timeout = INT64_MAX; +#endif + + /* find minimum timeout of all channels */ + for (int id = 0; id < ARRAY_SIZE(channels); id++) { + if (channels[id].reload_period != 0 && + channels[id].timeout_abs_ticks < next_timeout) { + next_channel_id = id; + next_timeout = channels[id].timeout_abs_ticks; + } + } + + /* update task wdt kernel timer */ + k_timer_user_data_set(&timer, (void *)next_channel_id); + k_timer_start(&timer, K_TIMEOUT_ABS_TICKS(next_timeout), + K_TIMEOUT_ABS_TICKS(next_timeout)); + +#ifdef CONFIG_TASK_WDT_HW_FALLBACK + if (hw_wdt_dev) { + wdt_feed(hw_wdt_dev, 0); + } +#endif + + k_sched_unlock(); + + return 0; +}