soc/intel_adsp: Robustify logging code
The existing implementation of the adsplog.py script worked fine for individual runs (e.g. when running specific code) but had no support for detecting system reset events and thus could not be used for monitoring applications like test automation. It also could not handle the case where a rapid log burst would overflow the buffer before being noticed at the client. Also, the protocol here was also rife with opportunities for race conditions. Fix all that up via what is mostly a rewrite of the script. The protocol itself hasn't changed, just the handling. Also includes some changes to the trace_out.c code on the device side. These are required to get ordering correct to make race conditions tractably handleable on the reader side. Some of the specific cases that are managed: * There is a 0.4s backoff when a reset is detected. Continuing to poll the buffer has been observed to hang the device (I'm fairly sure this is actually a hardware bug, reads aren't visible to the DSP software). * The "no magic number" case needs to be reserved for detecting system reset. * Slot data must be read BETWEEN two reads of the ID value to detect the case where the slot gets clobbered while being read. * The "currently being filled" slot needs to always have an ID value that does not appear in sequence from the prior slot. * We need to check the full history in the buffer at each poll to detect resets, which opens up a race between the read of the "next slot" (which is absent) and the full history retrieval (when it can now be present!). Detect that. * A null termination bug in the current output slot got fixed. Broadly: this was a huge bear to make work. It sounds like this should be a simple protocol, but it's not in practice. Also: clean up the error reporting in the script so it can handle new PCI IDs being added, and reports permissions failures on the required sysfs file as a human-readable error. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
08253db46b
commit
a5110b52ca
2 changed files with 232 additions and 76 deletions
|
@ -56,7 +56,7 @@ static __aligned(64) union {
|
|||
uint32_t cache_pad[16];
|
||||
} data_rec;
|
||||
|
||||
#define data ((struct metadata *)UNCACHED_PTR(&data_rec.meta))
|
||||
#define data ((volatile struct metadata *)UNCACHED_PTR(&data_rec.meta))
|
||||
|
||||
static inline struct slot *slot(int i)
|
||||
{
|
||||
|
@ -65,9 +65,18 @@ static inline struct slot *slot(int i)
|
|||
return &slots[i];
|
||||
}
|
||||
|
||||
static int slot_incr(int s)
|
||||
{
|
||||
return (s + 1) % NSLOTS;
|
||||
}
|
||||
|
||||
void intel_adsp_trace_out(int8_t *str, size_t len)
|
||||
{
|
||||
k_spinlock_key_t key = k_spin_lock(&data->lock);
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
k_spinlock_key_t key = k_spin_lock((void *)&data->lock);
|
||||
|
||||
if (!data->initialized) {
|
||||
slot(0)->hdr.magic = 0;
|
||||
|
@ -77,8 +86,8 @@ void intel_adsp_trace_out(int8_t *str, size_t len)
|
|||
}
|
||||
|
||||
/* We work with a local copy of the global data for
|
||||
* performance reasons (*data is uncached!) and put it back at
|
||||
* the end.
|
||||
* performance reasons (The memory behind the "data" pointer
|
||||
* is uncached and volatile!) and put it back at the end.
|
||||
*/
|
||||
uint32_t curr_slot = data->curr_slot;
|
||||
uint32_t n_bytes = data->n_bytes;
|
||||
|
@ -89,22 +98,40 @@ void intel_adsp_trace_out(int8_t *str, size_t len)
|
|||
|
||||
s->msg[n_bytes++] = c;
|
||||
|
||||
/* Are we done with this slot? Terminate it and flag
|
||||
* it for consumption on the other side
|
||||
*/
|
||||
if (c == '\n' || n_bytes >= MSGSZ) {
|
||||
curr_slot = (curr_slot + 1) % NSLOTS;
|
||||
n_bytes = 0;
|
||||
slot(curr_slot)->hdr.magic = 0;
|
||||
slot(curr_slot)->hdr.id = s->hdr.id + 1;
|
||||
s->hdr.magic = SLOT_MAGIC;
|
||||
}
|
||||
}
|
||||
if (n_bytes < MSGSZ) {
|
||||
s->msg[n_bytes] = 0;
|
||||
}
|
||||
|
||||
if (n_bytes < MSGSZ) {
|
||||
slot(curr_slot)->msg[n_bytes] = 0;
|
||||
/* Make sure the next slot has a magic number
|
||||
* (so the reader can distinguish between
|
||||
* no-new-data and system-reset), but does NOT
|
||||
* have the correct successor ID (so can never
|
||||
* be picked up as valid data). We'll
|
||||
* increment it later when we terminate that
|
||||
* slot.
|
||||
*/
|
||||
int next_slot = slot_incr(curr_slot);
|
||||
uint16_t new_id = s->hdr.id + 1;
|
||||
|
||||
slot(next_slot)->hdr.id = new_id;
|
||||
slot(next_slot)->hdr.magic = SLOT_MAGIC;
|
||||
slot(next_slot)->msg[0] = 0;
|
||||
|
||||
s->hdr.id = new_id;
|
||||
s->hdr.magic = SLOT_MAGIC;
|
||||
|
||||
curr_slot = next_slot;
|
||||
n_bytes = 0;
|
||||
}
|
||||
}
|
||||
|
||||
data->curr_slot = curr_slot;
|
||||
data->n_bytes = n_bytes;
|
||||
k_spin_unlock(&data->lock, key);
|
||||
k_spin_unlock((void *)&data->lock, key);
|
||||
}
|
||||
|
||||
int arch_printk_char_out(int c)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue