drivers/timer/apic_tsc: move to common code pattern

Let's replicate a common code pattern for this to be abstracted more
easily in the future. In addition to duplicating the correctness fixes
implemented in the ARM and RISC-V drivers, this eliminates a couple large
runtime divisions.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2024-05-22 16:43:51 -04:00 committed by Henrik Brix Andersen
commit e34369ce31

View file

@ -16,8 +16,37 @@
#define IA32_TSC_DEADLINE_MSR 0x6e0
#define IA32_TSC_ADJUST_MSR 0x03b
#define CYC_PER_TICK (CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC \
/ (uint64_t) CONFIG_SYS_CLOCK_TICKS_PER_SEC)
#define CYC_PER_TICK (uint32_t)(CONFIG_SYS_CLOCK_HW_CYCLES_PER_SEC \
/ CONFIG_SYS_CLOCK_TICKS_PER_SEC)
/* the unsigned long cast limits divisors to native CPU register width */
#define cycle_diff_t unsigned long
#define CYCLE_DIFF_MAX (~(cycle_diff_t)0)
/*
* We have two constraints on the maximum number of cycles we can wait for.
*
* 1) sys_clock_announce() accepts at most INT32_MAX ticks.
*
* 2) The number of cycles between two reports must fit in a cycle_diff_t
* variable before converting it to ticks.
*
* Then:
*
* 3) Pick the smallest between (1) and (2).
*
* 4) Take into account some room for the unavoidable IRQ servicing latency.
* Let's use 3/4 of the max range.
*
* Finally let's add the LSB value to the result so to clear out a bunch of
* consecutive set bits coming from the original max values to produce a
* nicer literal for assembly generation.
*/
#define CYCLES_MAX_1 ((uint64_t)INT32_MAX * (uint64_t)CYC_PER_TICK)
#define CYCLES_MAX_2 ((uint64_t)CYCLE_DIFF_MAX)
#define CYCLES_MAX_3 MIN(CYCLES_MAX_1, CYCLES_MAX_2)
#define CYCLES_MAX_4 (CYCLES_MAX_3 / 2 + CYCLES_MAX_3 / 4)
#define CYCLES_MAX (CYCLES_MAX_4 + LSB_GET(CYCLES_MAX_4))
struct apic_timer_lvt {
uint8_t vector : 8;
@ -28,7 +57,9 @@ struct apic_timer_lvt {
};
static struct k_spinlock lock;
static uint64_t last_announce;
static uint64_t last_cycle;
static uint64_t last_tick;
static uint32_t last_elapsed;
static union { uint32_t val; struct apic_timer_lvt lvt; } lvt_reg;
static ALWAYS_INLINE uint64_t rdtsc(void)
@ -39,21 +70,6 @@ static ALWAYS_INLINE uint64_t rdtsc(void)
return lo + (((uint64_t)hi) << 32);
}
static void isr(const void *arg)
{
ARG_UNUSED(arg);
k_spinlock_key_t key = k_spin_lock(&lock);
uint32_t ticks = (rdtsc() - last_announce) / CYC_PER_TICK;
last_announce += ticks * CYC_PER_TICK;
k_spin_unlock(&lock, key);
sys_clock_announce(ticks);
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
sys_clock_set_timeout(1, false);
}
}
static inline void wrmsr(int32_t msr, uint64_t val)
{
uint32_t hi = (uint32_t) (val >> 32);
@ -62,18 +78,50 @@ static inline void wrmsr(int32_t msr, uint64_t val)
__asm__ volatile("wrmsr" :: "d"(hi), "a"(lo), "c"(msr));
}
static void isr(const void *arg)
{
ARG_UNUSED(arg);
k_spinlock_key_t key = k_spin_lock(&lock);
uint64_t curr_cycle = rdtsc();
uint64_t delta_cycles = curr_cycle - last_cycle;
uint32_t delta_ticks = (cycle_diff_t)delta_cycles / CYC_PER_TICK;
last_cycle += (cycle_diff_t)delta_ticks * CYC_PER_TICK;
last_tick += delta_ticks;
last_elapsed = 0;
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
uint64_t next_cycle = last_cycle + CYC_PER_TICK;
wrmsr(IA32_TSC_DEADLINE_MSR, next_cycle);
}
k_spin_unlock(&lock, key);
sys_clock_announce(delta_ticks);
}
void sys_clock_set_timeout(int32_t ticks, bool idle)
{
ARG_UNUSED(idle);
uint64_t now = rdtsc();
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
return;
}
k_spinlock_key_t key = k_spin_lock(&lock);
uint64_t expires = now + (MAX(ticks - 1, 0) * CYC_PER_TICK);
uint64_t next_cycle;
expires = last_announce + (((expires - last_announce + CYC_PER_TICK - 1)
/ CYC_PER_TICK) * CYC_PER_TICK);
if (ticks == K_TICKS_FOREVER) {
next_cycle = last_cycle + CYCLES_MAX;
} else {
next_cycle = (last_tick + last_elapsed + ticks) * CYC_PER_TICK;
if ((next_cycle - last_cycle) > CYCLES_MAX) {
next_cycle = last_cycle + CYCLES_MAX;
}
}
/* The second condition is to catch the wraparound.
/*
* Interpreted strictly, the IA SDM description of the
* TSC_DEADLINE MSR implies that it will trigger an immediate
* interrupt if we try to set an expiration across the 64 bit
@ -81,21 +129,28 @@ void sys_clock_set_timeout(int32_t ticks, bool idle)
* real hardware it requires more than a century of uptime,
* but this is cheap and safe.
*/
if ((ticks == K_TICKS_FOREVER) || (expires < last_announce)) {
expires = UINT64_MAX;
if (next_cycle < last_cycle) {
next_cycle = UINT64_MAX;
}
wrmsr(IA32_TSC_DEADLINE_MSR, next_cycle);
wrmsr(IA32_TSC_DEADLINE_MSR, expires);
k_spin_unlock(&lock, key);
}
uint32_t sys_clock_elapsed(void)
{
k_spinlock_key_t key = k_spin_lock(&lock);
uint32_t ret = (rdtsc() - last_announce) / CYC_PER_TICK;
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
return 0;
}
k_spinlock_key_t key = k_spin_lock(&lock);
uint64_t curr_cycle = rdtsc();
uint64_t delta_cycles = curr_cycle - last_cycle;
uint32_t delta_ticks = (cycle_diff_t)delta_cycles / CYC_PER_TICK;
last_elapsed = delta_ticks;
k_spin_unlock(&lock, key);
return ret;
return delta_ticks;
}
uint32_t sys_clock_cycle_get_32(void)
@ -190,12 +245,12 @@ static int sys_clock_driver_init(void)
*/
__asm__ volatile("mfence" ::: "memory");
last_announce = rdtsc();
irq_enable(timer_irq());
last_tick = rdtsc() / CYC_PER_TICK;
last_cycle = last_tick * CYC_PER_TICK;
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
sys_clock_set_timeout(1, false);
wrmsr(IA32_TSC_DEADLINE_MSR, last_cycle + CYC_PER_TICK);
}
irq_enable(timer_irq());
return 0;
}