soc/intel_adsp: Robustify logging code
The existing implementation of the adsplog.py script worked fine for individual runs (e.g. when running specific code) but had no support for detecting system reset events and thus could not be used for monitoring applications like test automation. It also could not handle the case where a rapid log burst would overflow the buffer before being noticed at the client. Also, the protocol here was also rife with opportunities for race conditions. Fix all that up via what is mostly a rewrite of the script. The protocol itself hasn't changed, just the handling. Also includes some changes to the trace_out.c code on the device side. These are required to get ordering correct to make race conditions tractably handleable on the reader side. Some of the specific cases that are managed: * There is a 0.4s backoff when a reset is detected. Continuing to poll the buffer has been observed to hang the device (I'm fairly sure this is actually a hardware bug, reads aren't visible to the DSP software). * The "no magic number" case needs to be reserved for detecting system reset. * Slot data must be read BETWEEN two reads of the ID value to detect the case where the slot gets clobbered while being read. * The "currently being filled" slot needs to always have an ID value that does not appear in sequence from the prior slot. * We need to check the full history in the buffer at each poll to detect resets, which opens up a race between the read of the "next slot" (which is absent) and the full history retrieval (when it can now be present!). Detect that. * A null termination bug in the current output slot got fixed. Broadly: this was a huge bear to make work. It sounds like this should be a simple protocol, but it's not in practice. Also: clean up the error reporting in the script so it can handle new PCI IDs being added, and reports permissions failures on the required sysfs file as a human-readable error. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
08253db46b
commit
a5110b52ca
2 changed files with 232 additions and 76 deletions
|
@ -3,83 +3,212 @@
|
|||
# Copyright (c) 2020 Intel Corporation
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import struct
|
||||
import subprocess
|
||||
import mmap
|
||||
|
||||
# Log reader for the trace output buffer on a ADSP device.
|
||||
#
|
||||
# The trace buffer is inside a shared memory region exposed by the
|
||||
# audio PCI device as a BAR at index 4. The hardware provides 4 128k
|
||||
# "windows" starting at 512kb in the BAR which the DSP firmware can
|
||||
# map to 4k-aligned locations within its own address space. By
|
||||
# protocol convention log output is an 8k region at window index 3.
|
||||
#
|
||||
# The 8k window is treated as an array of 64-byte "slots", each of
|
||||
# which is prefixed by a magic number, which should be 0x55aa for log
|
||||
# data, followed a 16 bit "ID" number, followed by a null-terminated
|
||||
# string in the final 60 bytes (or 60 non-null bytes of log data).
|
||||
# The DSP firmware will write sequential IDs into the buffer starting
|
||||
# from an ID of zero in the first slot, and wrapping at the end.
|
||||
|
||||
MAP_SIZE = 8192
|
||||
SLOT_SIZE = 64
|
||||
|
||||
# Location of the log output window within the DSP BAR on the PCI
|
||||
# device. The hardware provides 4x 128k "windows" starting at 512kb
|
||||
# in the BAR which the DSP software can map to 4k-aligned locations
|
||||
# within its own address space. By convention log output is an 8k
|
||||
# region at window index 3.
|
||||
NUM_SLOTS = int(MAP_SIZE / SLOT_SIZE)
|
||||
SLOT_MAGIC = 0x55aa
|
||||
WIN_OFFSET = 0x80000
|
||||
WIN_ID = 3
|
||||
WIN_IDX = 3
|
||||
WIN_SIZE = 0x20000
|
||||
LOG_OFFSET = WIN_OFFSET + WIN_ID * WIN_SIZE
|
||||
LOG_OFFSET = WIN_OFFSET + WIN_IDX * WIN_SIZE
|
||||
|
||||
# Find me a way to do this detection as cleanly in python as shell, I
|
||||
# dare you.
|
||||
barfile = subprocess.Popen(["sh", "-c",
|
||||
"echo -n "
|
||||
"$(dirname "
|
||||
" $(fgrep PCI_ID=8086:5A98 "
|
||||
" /sys/bus/pci/devices/*/uevent))"
|
||||
"/resource4"],
|
||||
stdout=subprocess.PIPE).stdout.read()
|
||||
fd = open(barfile)
|
||||
mem = mmap.mmap(fd.fileno(), MAP_SIZE, offset=LOG_OFFSET,
|
||||
prot=mmap.PROT_READ)
|
||||
# List of known ADSP devices by their PCI IDs
|
||||
DEVICES = ["8086:5a98"]
|
||||
|
||||
# The mapping is an array of 64-byte "slots", each of which is
|
||||
# prefixed by a magic number, which should be 0x55aa for log data,
|
||||
# followed a 16 bit "ID" number, followed by a null-terminated string
|
||||
# in the final 60 bytes. The DSP firmware will write sequential IDs
|
||||
# into the buffer starting from an ID of zero in the first slot, and
|
||||
# wrapping at the end. So the algorithm here is to find the smallest
|
||||
# valid slot, print its data, and then enter a polling loop waiting
|
||||
# for the next slot to be valid and have the correct next ID before
|
||||
# printing that too.
|
||||
mem = None
|
||||
|
||||
# NOTE: unfortunately there's no easy way to detect a warm reset of
|
||||
# the device, it will just jump back to the beginning and start
|
||||
# writing there, where we aren't looking. Really that level of
|
||||
# robustness needs to be handled in the kernel.
|
||||
for dev in DEVICES:
|
||||
# Find me a way to do this detection as cleanly in python as shell, I
|
||||
# dare you.
|
||||
barfile = subprocess.Popen(["sh", "-c",
|
||||
"echo -n "
|
||||
"$(dirname "
|
||||
f" $(fgrep PCI_ID={dev.upper()} "
|
||||
" /sys/bus/pci/devices/*/uevent))"
|
||||
"/resource4"],
|
||||
stdout=subprocess.PIPE).stdout.read()
|
||||
if not os.path.exists(barfile):
|
||||
continue
|
||||
|
||||
next_slot = 0
|
||||
next_id = 0xffff
|
||||
if not os.access(barfile, os.R_OK):
|
||||
sys.stderr.write(f"ERROR: Cannot open {barfile} for reading.")
|
||||
sys.exit(1)
|
||||
|
||||
for slot in range(int(MAP_SIZE / SLOT_SIZE)):
|
||||
fd = open(barfile)
|
||||
mem = mmap.mmap(fd.fileno(), MAP_SIZE, offset=LOG_OFFSET,
|
||||
prot=mmap.PROT_READ)
|
||||
|
||||
if mem is None:
|
||||
sys.stderr.write("ERROR: No ADSP device found.")
|
||||
sys.exit(1)
|
||||
|
||||
# Returns a tuple of (id, msg) if the slot is valid, or (-1, "") if
|
||||
# the slot does not contain firmware trace data
|
||||
def read_slot(slot, mem):
|
||||
off = slot * SLOT_SIZE
|
||||
(magic, sid) = struct.unpack("HH", mem[off:off+4])
|
||||
if magic == 0x55aa:
|
||||
if sid < next_id:
|
||||
next_slot = slot
|
||||
next_id = sid
|
||||
|
||||
while True:
|
||||
off = next_slot * SLOT_SIZE
|
||||
(magic, sid) = struct.unpack("HH", mem[off:off+4])
|
||||
if magic == 0x55aa and sid == next_id:
|
||||
# This dance because indexing large variable-length slices of
|
||||
# the mmap() array seems to produce garbage....
|
||||
msgbytes = []
|
||||
for i in range(4, SLOT_SIZE):
|
||||
b = mem[off+i]
|
||||
if b == 0:
|
||||
break
|
||||
msgbytes.append(b)
|
||||
msg = bytearray(len(msgbytes))
|
||||
for i, elem in enumerate(msgbytes):
|
||||
msg[i] = elem
|
||||
magic = (mem[off + 1] << 8) | mem[off]
|
||||
sid = (mem[off + 3] << 8) | mem[off + 2]
|
||||
|
||||
sys.stdout.write(msg.decode(encoding="utf-8", errors="ignore"))
|
||||
next_slot = int((next_slot + 1) % (MAP_SIZE / SLOT_SIZE))
|
||||
next_id += 1
|
||||
else:
|
||||
if magic != SLOT_MAGIC:
|
||||
return (-1, "")
|
||||
|
||||
# This dance because indexing large variable-length slices of
|
||||
# the mmap() array seems to produce garbage....
|
||||
msgbytes = []
|
||||
for i in range(4, SLOT_SIZE):
|
||||
b = mem[off+i]
|
||||
if b == 0:
|
||||
break
|
||||
msgbytes.append(b)
|
||||
msg = bytearray(len(msgbytes))
|
||||
for i, elem in enumerate(msgbytes):
|
||||
msg[i] = elem
|
||||
|
||||
return (sid, msg.decode(encoding="utf-8", errors="ignore"))
|
||||
|
||||
def read_hist(start_slot):
|
||||
id0, msg = read_slot(start_slot, mem)
|
||||
|
||||
# An invalid slot zero means no data has ever been placed in the
|
||||
# trace buffer, which is likely a system reset condition. Back
|
||||
# off for one second, because continuing to read the buffer has
|
||||
# been observed to hang the flash process (which I think can only
|
||||
# be a hardware bug).
|
||||
if start_slot == 0 and id0 < 0:
|
||||
sys.stdout.write("===\n=== [ADSP Device Reset]\n===\n")
|
||||
sys.stdout.flush()
|
||||
time.sleep(0.25)
|
||||
time.sleep(1)
|
||||
return (0, 0, "")
|
||||
|
||||
# Start at zero and read forward to get the last data in the
|
||||
# buffer. We are always guaranteed that slot zero will contain
|
||||
# valid data if any slot contains valid data.
|
||||
last_id = id0
|
||||
final_slot = start_slot
|
||||
for i in range(start_slot + 1, NUM_SLOTS):
|
||||
id, s = read_slot(i, mem)
|
||||
if id != ((last_id + 1) & 0xffff):
|
||||
break
|
||||
msg += s
|
||||
final_slot = i
|
||||
last_id = id
|
||||
|
||||
final_id = last_id
|
||||
|
||||
# Now read backwards from the end to get the prefix blocks from
|
||||
# the last wraparound
|
||||
last_id = id0
|
||||
for i in range(NUM_SLOTS - 1, final_slot, -1):
|
||||
id, s = read_slot(i, mem)
|
||||
if id < 0:
|
||||
break
|
||||
|
||||
# Race protection: the other side might have clobbered the
|
||||
# data after we read the ID, make sure it hasn't changed.
|
||||
id_check = read_slot(i, mem)[0]
|
||||
if id_check != id:
|
||||
break
|
||||
|
||||
if ((id + 1) & 0xffff) == last_id:
|
||||
msg = s + msg
|
||||
last_id = id
|
||||
|
||||
# If we were unable to read forward from slot zero, but could read
|
||||
# backward, then this is a wrapped buffer being currently updated
|
||||
# into slot zero. See comment below.
|
||||
if final_slot == start_slot and last_id != id0:
|
||||
return None
|
||||
|
||||
return ((final_slot + 1) % NUM_SLOTS, (final_id + 1) & 0xffff, msg)
|
||||
|
||||
# Returns a tuple containing the next slot to expect data in, the ID
|
||||
# that slot should hold, and the full string history of trace data
|
||||
# from the buffer. Start with slot zero (which is always part of the
|
||||
# current string if there is any data at all) and scan forward and
|
||||
# back to find the maximum extent.
|
||||
def trace_history():
|
||||
# This loop is a race protection for the situation where the
|
||||
# buffer has wrapped and new data is currently being placed into
|
||||
# slot zero. In those circumstances, slot zero will have a valid
|
||||
# magic number but its sequence ID will not correlate with the
|
||||
# previous and next slots.
|
||||
ret = None
|
||||
while ret is None:
|
||||
ret = read_hist(0)
|
||||
if ret is None:
|
||||
ret = read_hist(1)
|
||||
return ret
|
||||
|
||||
|
||||
# Loop, reading the next slot if it has new data. Otherwise check the
|
||||
# full buffer and see if history is discontiguous (i.e. what is in the
|
||||
# buffer should be a proper suffix of what we have stored). If it
|
||||
# doesn't match, then just print it (it's a reboot or a ring buffer
|
||||
# overrun). If nothing has changed, then sleep a bit and keep
|
||||
# polling.
|
||||
def main():
|
||||
next_slot, next_id, last_hist = trace_history()
|
||||
sys.stdout.write(last_hist)
|
||||
while True:
|
||||
id, smsg = read_slot(next_slot, mem)
|
||||
|
||||
if id == next_id:
|
||||
next_slot = int((next_slot + 1) % NUM_SLOTS)
|
||||
next_id = (id + 1) & 0xffff
|
||||
last_hist += smsg
|
||||
sys.stdout.write(smsg)
|
||||
else:
|
||||
slot2, id2, msg2 = trace_history()
|
||||
|
||||
# Device reset:
|
||||
if slot2 == 0 and id2 == 0 and msg2 == "":
|
||||
next_id = 1
|
||||
next_slot = slot2
|
||||
last_hist = ""
|
||||
|
||||
if not last_hist.endswith(msg2):
|
||||
# On a mismatch, go back and check one last time to
|
||||
# address the race where a new slot wasn't present
|
||||
# just JUST THEN but is NOW.
|
||||
id3, s3 = read_slot(next_slot, mem)
|
||||
if id3 == next_id:
|
||||
next_slot = int((next_slot + 1) % NUM_SLOTS)
|
||||
next_id = (next_id + 1) & 0xffff
|
||||
last_hist += s3
|
||||
sys.stdout.write(s3)
|
||||
continue
|
||||
|
||||
# Otherwise it represents discontiguous data, either a
|
||||
# reset of an overrun, just dump what we have and
|
||||
# start over.
|
||||
next_slot = slot2
|
||||
last_hist = msg2
|
||||
sys.stdout.write(msg2)
|
||||
else:
|
||||
sys.stdout.flush()
|
||||
time.sleep(0.10)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -56,7 +56,7 @@ static __aligned(64) union {
|
|||
uint32_t cache_pad[16];
|
||||
} data_rec;
|
||||
|
||||
#define data ((struct metadata *)UNCACHED_PTR(&data_rec.meta))
|
||||
#define data ((volatile struct metadata *)UNCACHED_PTR(&data_rec.meta))
|
||||
|
||||
static inline struct slot *slot(int i)
|
||||
{
|
||||
|
@ -65,9 +65,18 @@ static inline struct slot *slot(int i)
|
|||
return &slots[i];
|
||||
}
|
||||
|
||||
static int slot_incr(int s)
|
||||
{
|
||||
return (s + 1) % NSLOTS;
|
||||
}
|
||||
|
||||
void intel_adsp_trace_out(int8_t *str, size_t len)
|
||||
{
|
||||
k_spinlock_key_t key = k_spin_lock(&data->lock);
|
||||
if (len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
k_spinlock_key_t key = k_spin_lock((void *)&data->lock);
|
||||
|
||||
if (!data->initialized) {
|
||||
slot(0)->hdr.magic = 0;
|
||||
|
@ -77,8 +86,8 @@ void intel_adsp_trace_out(int8_t *str, size_t len)
|
|||
}
|
||||
|
||||
/* We work with a local copy of the global data for
|
||||
* performance reasons (*data is uncached!) and put it back at
|
||||
* the end.
|
||||
* performance reasons (The memory behind the "data" pointer
|
||||
* is uncached and volatile!) and put it back at the end.
|
||||
*/
|
||||
uint32_t curr_slot = data->curr_slot;
|
||||
uint32_t n_bytes = data->n_bytes;
|
||||
|
@ -89,22 +98,40 @@ void intel_adsp_trace_out(int8_t *str, size_t len)
|
|||
|
||||
s->msg[n_bytes++] = c;
|
||||
|
||||
/* Are we done with this slot? Terminate it and flag
|
||||
* it for consumption on the other side
|
||||
*/
|
||||
if (c == '\n' || n_bytes >= MSGSZ) {
|
||||
curr_slot = (curr_slot + 1) % NSLOTS;
|
||||
n_bytes = 0;
|
||||
slot(curr_slot)->hdr.magic = 0;
|
||||
slot(curr_slot)->hdr.id = s->hdr.id + 1;
|
||||
s->hdr.magic = SLOT_MAGIC;
|
||||
}
|
||||
}
|
||||
if (n_bytes < MSGSZ) {
|
||||
s->msg[n_bytes] = 0;
|
||||
}
|
||||
|
||||
if (n_bytes < MSGSZ) {
|
||||
slot(curr_slot)->msg[n_bytes] = 0;
|
||||
/* Make sure the next slot has a magic number
|
||||
* (so the reader can distinguish between
|
||||
* no-new-data and system-reset), but does NOT
|
||||
* have the correct successor ID (so can never
|
||||
* be picked up as valid data). We'll
|
||||
* increment it later when we terminate that
|
||||
* slot.
|
||||
*/
|
||||
int next_slot = slot_incr(curr_slot);
|
||||
uint16_t new_id = s->hdr.id + 1;
|
||||
|
||||
slot(next_slot)->hdr.id = new_id;
|
||||
slot(next_slot)->hdr.magic = SLOT_MAGIC;
|
||||
slot(next_slot)->msg[0] = 0;
|
||||
|
||||
s->hdr.id = new_id;
|
||||
s->hdr.magic = SLOT_MAGIC;
|
||||
|
||||
curr_slot = next_slot;
|
||||
n_bytes = 0;
|
||||
}
|
||||
}
|
||||
|
||||
data->curr_slot = curr_slot;
|
||||
data->n_bytes = n_bytes;
|
||||
k_spin_unlock(&data->lock, key);
|
||||
k_spin_unlock((void *)&data->lock, key);
|
||||
}
|
||||
|
||||
int arch_printk_char_out(int c)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue