kernel: optimize ms-to-ticks for certain tick frequencies
Some tick frequencies lend themselves to optimized conversions from ms to ticks and vice-versa. - 1000Hz which does not need any conversion - 500Hz, 250Hz, 125Hz where the division/multiplication are a straight shift since they are power-of-two factors of 1000. In addition, some more generally used values are made to use optimized conversion equations rather than the generic one that uses 64-bit math, and often results in calling compiler intrinsics. These values are: 100Hz, 50Hz, 25Hz, 20Hz, 10Hz, 1Hz (the last one used in some testing). Avoiding the 64-bit math intrisics has the additional benefit, in addition to increased performance, of using a significant lower amount of stack space: 52 bytes on ARM Cortex-M and 80 bytes on x86. Change-Id: I080eb338a2637d6b1c6838c119af1a9fa37fe869 Signed-off-by: Benjamin Walsh <benjamin.walsh@windriver.com>
This commit is contained in:
parent
95061b6561
commit
6209218f40
6 changed files with 73 additions and 10 deletions
|
@ -32,6 +32,7 @@
|
|||
#include <misc/__assert.h>
|
||||
#include <misc/dlist.h>
|
||||
#include <misc/slist.h>
|
||||
#include <misc/util.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
|
@ -634,13 +635,47 @@ extern void *k_thread_custom_data_get(void);
|
|||
* @cond INTERNAL_HIDDEN
|
||||
*/
|
||||
|
||||
/* kernel clocks */
|
||||
|
||||
#if (sys_clock_ticks_per_sec == 1000) || \
|
||||
(sys_clock_ticks_per_sec == 500) || \
|
||||
(sys_clock_ticks_per_sec == 250) || \
|
||||
(sys_clock_ticks_per_sec == 125) || \
|
||||
(sys_clock_ticks_per_sec == 100) || \
|
||||
(sys_clock_ticks_per_sec == 50) || \
|
||||
(sys_clock_ticks_per_sec == 25) || \
|
||||
(sys_clock_ticks_per_sec == 20) || \
|
||||
(sys_clock_ticks_per_sec == 10) || \
|
||||
(sys_clock_ticks_per_sec == 1)
|
||||
|
||||
#define _ms_per_tick (MSEC_PER_SEC / sys_clock_ticks_per_sec)
|
||||
#else
|
||||
/* yields horrible 64-bit math on many architectures: try to avoid */
|
||||
#define _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
#endif
|
||||
|
||||
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
extern int32_t _ms_to_ticks(int32_t ms);
|
||||
#else
|
||||
static ALWAYS_INLINE int32_t _ms_to_ticks(int32_t ms)
|
||||
{
|
||||
return (int32_t)ceiling_fraction((uint32_t)ms, _ms_per_tick);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* added tick needed to account for tick in progress */
|
||||
#define _TICK_ALIGN 1
|
||||
|
||||
static int64_t __ticks_to_ms(int64_t ticks)
|
||||
static inline int64_t __ticks_to_ms(int64_t ticks)
|
||||
{
|
||||
#if CONFIG_SYS_CLOCK_EXISTS
|
||||
#ifdef CONFIG_SYS_CLOCK_EXISTS
|
||||
|
||||
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
return (MSEC_PER_SEC * (uint64_t)ticks) / sys_clock_ticks_per_sec;
|
||||
#else
|
||||
return (uint64_t)ticks * _ms_per_tick;
|
||||
#endif
|
||||
|
||||
#else
|
||||
__ASSERT(ticks == 0, "");
|
||||
return 0;
|
||||
|
|
|
@ -2892,10 +2892,6 @@ nano_stack_pop(struct nano_stack *stack, uint32_t *data,
|
|||
*/
|
||||
#define nano_task_stack_pop nano_stack_pop
|
||||
|
||||
/* kernel clocks */
|
||||
|
||||
extern int32_t _ms_to_ticks(int32_t ms);
|
||||
|
||||
/**
|
||||
* @brief Return the current system tick count.
|
||||
*
|
||||
|
|
|
@ -399,6 +399,30 @@ config SYS_CLOCK_TICKS_PER_SEC
|
|||
help
|
||||
This option specifies the frequency of the system clock in Hz.
|
||||
|
||||
Depending on the choice made, an amount of possibly expensive math must
|
||||
occur when converting ticks to milliseconds and vice-versa. Some values
|
||||
are optimized, and yield significantly less math.
|
||||
|
||||
The optimal values from a computational point-of-view are 1000, 500,
|
||||
250 and 125, since in these cases there is either no computation
|
||||
required, or it is all done via bit-shifting. These also give a
|
||||
granularity from 1ms to 8ms.
|
||||
|
||||
Other good values are 100, 50, 25, 20 and 10. In this case, some math
|
||||
is required but is minimized. These are also values that necessitate a
|
||||
reduced number of clock interrupts per second, at the cost of
|
||||
granularity (10ms to 100ms).
|
||||
|
||||
All other values require some extensive 64-bit math, and in some
|
||||
configurations even require calls to compiler built-in functions, and
|
||||
can require a non-trivial extra amount of stack space (e.g. around 80
|
||||
bytes on x86).
|
||||
|
||||
Using the legacy API also incurs an extra penalty, since when asking
|
||||
for a timeout, a translation is made from ticks to milliseconds to call
|
||||
the native kernel APIs, and then another translation is made back to
|
||||
ticks, since the kernel is tick-based.
|
||||
|
||||
config SYS_CLOCK_HW_CYCLES_PER_SEC
|
||||
int "System clock's h/w timer frequency"
|
||||
help
|
||||
|
|
|
@ -31,7 +31,9 @@ extern void _pend_thread(struct k_thread *thread,
|
|||
extern void _pend_current_thread(_wait_q_t *wait_q, int32_t timeout);
|
||||
extern void _move_thread_to_end_of_prio_q(struct k_thread *thread);
|
||||
extern int __must_switch_threads(void);
|
||||
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
extern int32_t _ms_to_ticks(int32_t ms);
|
||||
#endif
|
||||
extern void idle(void *, void *, void *);
|
||||
|
||||
/* find which one is the next thread to run */
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#include <atomic.h>
|
||||
#include <ksched.h>
|
||||
#include <wait_q.h>
|
||||
#include <misc/util.h>
|
||||
|
||||
/* the only struct _kernel instance */
|
||||
struct _kernel _kernel = {0};
|
||||
|
@ -191,15 +192,14 @@ static int _is_wait_q_insert_point(sys_dnode_t *node, void *insert_prio)
|
|||
|
||||
/* convert milliseconds to ticks */
|
||||
|
||||
#define ceiling(numerator, divider) \
|
||||
(((numerator) + ((divider) - 1)) / (divider))
|
||||
|
||||
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
int32_t _ms_to_ticks(int32_t ms)
|
||||
{
|
||||
int64_t ms_ticks_per_sec = (int64_t)ms * sys_clock_ticks_per_sec;
|
||||
|
||||
return (int32_t)ceiling(ms_ticks_per_sec, MSEC_PER_SEC);
|
||||
return (int32_t)ceiling_fraction(ms_ticks_per_sec, MSEC_PER_SEC);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* pend the specified thread: it must *not* be in the ready queue */
|
||||
/* must be called with interrupts locked */
|
||||
|
|
|
@ -23,6 +23,12 @@
|
|||
#include <wait_q.h>
|
||||
#include <drivers/system_timer.h>
|
||||
|
||||
#ifdef CONFIG_SYS_CLOCK_EXISTS
|
||||
#ifdef _NON_OPTIMIZED_TICKS_PER_SEC
|
||||
#warning "non-optimized system clock frequency chosen: performance may suffer"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_SYS_CLOCK_EXISTS
|
||||
int sys_clock_us_per_tick = 1000000 / sys_clock_ticks_per_sec;
|
||||
int sys_clock_hw_cycles_per_tick =
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue