task_wdt: add task-level watchdog subsystem
This new subsystem can be used to supervise individual threads. It is based on a regularly updated kernel timer, whose ISR is never actually called in regular system operation. An existing hardware watchdog can be used as an optional fallback if the task watchdog itself gets stuck. Signed-off-by: Martin Jäger <martin@libre.solar>
This commit is contained in:
parent
5b87cca98b
commit
1aaf508bde
10 changed files with 448 additions and 0 deletions
3
subsys/task_wdt/CMakeLists.txt
Normal file
3
subsys/task_wdt/CMakeLists.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
zephyr_sources_ifdef(CONFIG_TASK_WDT task_wdt.c)
|
60
subsys/task_wdt/Kconfig
Normal file
60
subsys/task_wdt/Kconfig
Normal file
|
@ -0,0 +1,60 @@
|
|||
# Software watchdog configuration
|
||||
|
||||
# Copyright (c) 2020 Libre Solar Technologies GmbH
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
menuconfig TASK_WDT
|
||||
bool "Task-level software watchdog"
|
||||
select REBOOT
|
||||
help
|
||||
Enable task watchdog
|
||||
|
||||
The task watchdog allows to have individual watchdog channels
|
||||
per thread, even if the hardware supports only a single watchdog.
|
||||
|
||||
config TASK_WDT_CHANNELS
|
||||
int "Maximum number of task watchdog channels"
|
||||
depends on TASK_WDT
|
||||
default 5
|
||||
range 2 100
|
||||
help
|
||||
The timeouts for each channel are stored in an array. Allocate only
|
||||
the required amount of channels to reduce memory footprint.
|
||||
|
||||
config TASK_WDT_HW_FALLBACK
|
||||
bool "Use hardware watchdog as a fallback"
|
||||
depends on TASK_WDT
|
||||
default y
|
||||
help
|
||||
This option allows to specify a hardware watchdog device in the
|
||||
application that is used as an additional safety layer if the task
|
||||
watchdog itself gets stuck.
|
||||
|
||||
config TASK_WDT_MIN_TIMEOUT
|
||||
int "Minimum timeout for task watchdog (ms)"
|
||||
depends on TASK_WDT_HW_FALLBACK
|
||||
default 100
|
||||
range 1 10000
|
||||
help
|
||||
The task watchdog uses a continuously restarted k_timer as its
|
||||
backend. This value specifies the minimum timeout in milliseconds
|
||||
among all task watchdogs used in the application.
|
||||
|
||||
If a hardware watchdog is configured as a fallback for the task
|
||||
watchdog, its timeout is set to this value plus
|
||||
TASK_WDT_HW_FALLBACK_DELAY.
|
||||
|
||||
config TASK_WDT_HW_FALLBACK_DELAY
|
||||
int "Additional delay for hardware watchdog (ms)"
|
||||
depends on TASK_WDT_HW_FALLBACK
|
||||
default 20
|
||||
range 1 1000
|
||||
help
|
||||
The timeout of the hardware watchdog fallback will be increased by
|
||||
this value to provide sufficient time for corrective actions in the
|
||||
callback function.
|
||||
|
||||
In addition to that, the delay allows to compensate deviations
|
||||
between different clock sources for the hardware watchdog and the
|
||||
kernel timer. This is especially important if the hardware watchdog
|
||||
is clocked by an inaccurate low-speed RC oscillator.
|
212
subsys/task_wdt/task_wdt.c
Normal file
212
subsys/task_wdt/task_wdt.c
Normal file
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Libre Solar Technologies GmbH
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
#include "task_wdt/task_wdt.h"
|
||||
|
||||
#include <drivers/watchdog.h>
|
||||
#include <power/reboot.h>
|
||||
#include <device.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define LOG_LEVEL CONFIG_WDT_LOG_LEVEL
|
||||
#include <logging/log.h>
|
||||
LOG_MODULE_REGISTER(task_wdt);
|
||||
|
||||
/*
|
||||
* This dummy channel is used to continue feeding the hardware watchdog if the
|
||||
* task watchdog timeouts are too long for regular updates
|
||||
*/
|
||||
#define TASK_WDT_BACKGROUND_CHANNEL (-1)
|
||||
|
||||
/*
|
||||
* Task watchdog channel data
|
||||
*/
|
||||
struct task_wdt_channel {
|
||||
/* period in milliseconds used to reset the timeout, set to 0 to
|
||||
* indicate that the channel is available
|
||||
*/
|
||||
uint32_t reload_period;
|
||||
/* abs. ticks when this channel expires (updated by task_wdt_feed) */
|
||||
int64_t timeout_abs_ticks;
|
||||
/* user data passed to the callback function */
|
||||
void *user_data;
|
||||
/* function to be called when watchdog timer expired */
|
||||
task_wdt_callback_t callback;
|
||||
};
|
||||
|
||||
/* array of all task watchdog channels */
|
||||
static struct task_wdt_channel channels[CONFIG_TASK_WDT_CHANNELS];
|
||||
|
||||
/* timer used for watchdog handling */
|
||||
static struct k_timer timer;
|
||||
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
/* pointer to the hardware watchdog used as a fallback */
|
||||
static const struct device *hw_wdt_dev;
|
||||
static int hw_wdt_channel;
|
||||
static bool hw_wdt_started;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @brief Task watchdog timer callback.
|
||||
*
|
||||
* If the device operates as intended, this function will never be called,
|
||||
* as the timer is continuously restarted with the next due timeout in the
|
||||
* task_wdt_feed() function.
|
||||
*
|
||||
* If all task watchdogs have longer timeouts than the hardware watchdog,
|
||||
* this function is called regularly (via the background channel). This
|
||||
* should be avoided by setting CONFIG_TASK_WDT_MIN_TIMEOUT to the minimum
|
||||
* task watchdog timeout used in the application.
|
||||
*
|
||||
* @param timer_id Pointer to the timer which called the function
|
||||
*/
|
||||
static void task_wdt_trigger(struct k_timer *timer_id)
|
||||
{
|
||||
int channel_id = (int)k_timer_user_data_get(timer_id);
|
||||
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
if (channel_id == TASK_WDT_BACKGROUND_CHANNEL) {
|
||||
if (hw_wdt_dev) {
|
||||
wdt_feed(hw_wdt_dev, 0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (channels[channel_id].reload_period == 0) {
|
||||
/* channel was deleted */
|
||||
return;
|
||||
} else if (channels[channel_id].callback) {
|
||||
channels[channel_id].callback(channel_id,
|
||||
channels[channel_id].user_data);
|
||||
} else {
|
||||
sys_reboot(SYS_REBOOT_COLD);
|
||||
}
|
||||
}
|
||||
|
||||
int task_wdt_init(const struct device *hw_wdt)
|
||||
{
|
||||
if (hw_wdt) {
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
struct wdt_timeout_cfg wdt_config;
|
||||
|
||||
wdt_config.flags = WDT_FLAG_RESET_SOC;
|
||||
wdt_config.window.min = 0U;
|
||||
wdt_config.window.max = CONFIG_TASK_WDT_MIN_TIMEOUT +
|
||||
CONFIG_TASK_WDT_HW_FALLBACK_DELAY;
|
||||
wdt_config.callback = NULL;
|
||||
|
||||
hw_wdt_dev = hw_wdt;
|
||||
hw_wdt_channel = wdt_install_timeout(hw_wdt_dev, &wdt_config);
|
||||
#else
|
||||
return -ENOTSUP;
|
||||
#endif
|
||||
}
|
||||
|
||||
k_timer_init(&timer, task_wdt_trigger, NULL);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int task_wdt_add(uint32_t reload_period, task_wdt_callback_t callback,
|
||||
void *user_data)
|
||||
{
|
||||
if (reload_period == 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* look for unused channel (reload_period set to 0) */
|
||||
for (int id = 0; id < ARRAY_SIZE(channels); id++) {
|
||||
if (channels[id].reload_period == 0) {
|
||||
channels[id].reload_period = reload_period;
|
||||
channels[id].user_data = user_data;
|
||||
channels[id].timeout_abs_ticks = K_TICKS_FOREVER;
|
||||
channels[id].callback = callback;
|
||||
task_wdt_feed(id);
|
||||
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
if (!hw_wdt_started && hw_wdt_dev) {
|
||||
/* also start fallback hw wdt */
|
||||
wdt_setup(hw_wdt_dev, 0);
|
||||
hw_wdt_started = true;
|
||||
}
|
||||
#endif
|
||||
return id;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
int task_wdt_delete(int channel_id)
|
||||
{
|
||||
if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
channels[channel_id].reload_period = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int task_wdt_feed(int channel_id)
|
||||
{
|
||||
int64_t current_ticks;
|
||||
int next_channel_id; /* channel which will time out next */
|
||||
int64_t next_timeout; /* timeout in absolute ticks of this channel */
|
||||
|
||||
if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We need a critical section instead of a mutex while updating the
|
||||
* channels array in order to prevent priority inversion. Otherwise,
|
||||
* a low priority thread could be preempted before releasing the mutex
|
||||
* and block a high priority thread that wants to feed its task wdt.
|
||||
*/
|
||||
k_sched_lock();
|
||||
|
||||
current_ticks = z_tick_get();
|
||||
|
||||
/* feed the specified channel */
|
||||
channels[channel_id].timeout_abs_ticks = current_ticks +
|
||||
k_ms_to_ticks_ceil64(channels[channel_id].reload_period);
|
||||
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
next_channel_id = TASK_WDT_BACKGROUND_CHANNEL;
|
||||
next_timeout = current_ticks +
|
||||
k_ms_to_ticks_ceil64(CONFIG_TASK_WDT_MIN_TIMEOUT);
|
||||
#else
|
||||
next_channel_id = 0;
|
||||
next_timeout = INT64_MAX;
|
||||
#endif
|
||||
|
||||
/* find minimum timeout of all channels */
|
||||
for (int id = 0; id < ARRAY_SIZE(channels); id++) {
|
||||
if (channels[id].reload_period != 0 &&
|
||||
channels[id].timeout_abs_ticks < next_timeout) {
|
||||
next_channel_id = id;
|
||||
next_timeout = channels[id].timeout_abs_ticks;
|
||||
}
|
||||
}
|
||||
|
||||
/* update task wdt kernel timer */
|
||||
k_timer_user_data_set(&timer, (void *)next_channel_id);
|
||||
k_timer_start(&timer, K_TIMEOUT_ABS_TICKS(next_timeout),
|
||||
K_TIMEOUT_ABS_TICKS(next_timeout));
|
||||
|
||||
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
|
||||
if (hw_wdt_dev) {
|
||||
wdt_feed(hw_wdt_dev, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
k_sched_unlock();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue