diff --git a/boards/xtensa/intel_adsp_cavs15/tools/adsplog.py b/boards/xtensa/intel_adsp_cavs15/tools/adsplog.py index 8df555cf8e5..d099aa0dc48 100755 --- a/boards/xtensa/intel_adsp_cavs15/tools/adsplog.py +++ b/boards/xtensa/intel_adsp_cavs15/tools/adsplog.py @@ -3,83 +3,212 @@ # Copyright (c) 2020 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import os import sys import time -import struct import subprocess import mmap +# Log reader for the trace output buffer on a ADSP device. +# +# The trace buffer is inside a shared memory region exposed by the +# audio PCI device as a BAR at index 4. The hardware provides 4 128k +# "windows" starting at 512kb in the BAR which the DSP firmware can +# map to 4k-aligned locations within its own address space. By +# protocol convention log output is an 8k region at window index 3. +# +# The 8k window is treated as an array of 64-byte "slots", each of +# which is prefixed by a magic number, which should be 0x55aa for log +# data, followed a 16 bit "ID" number, followed by a null-terminated +# string in the final 60 bytes (or 60 non-null bytes of log data). +# The DSP firmware will write sequential IDs into the buffer starting +# from an ID of zero in the first slot, and wrapping at the end. + MAP_SIZE = 8192 SLOT_SIZE = 64 - -# Location of the log output window within the DSP BAR on the PCI -# device. The hardware provides 4x 128k "windows" starting at 512kb -# in the BAR which the DSP software can map to 4k-aligned locations -# within its own address space. By convention log output is an 8k -# region at window index 3. +NUM_SLOTS = int(MAP_SIZE / SLOT_SIZE) +SLOT_MAGIC = 0x55aa WIN_OFFSET = 0x80000 -WIN_ID = 3 +WIN_IDX = 3 WIN_SIZE = 0x20000 -LOG_OFFSET = WIN_OFFSET + WIN_ID * WIN_SIZE +LOG_OFFSET = WIN_OFFSET + WIN_IDX * WIN_SIZE -# Find me a way to do this detection as cleanly in python as shell, I -# dare you. -barfile = subprocess.Popen(["sh", "-c", - "echo -n " - "$(dirname " - " $(fgrep PCI_ID=8086:5A98 " - " /sys/bus/pci/devices/*/uevent))" - "/resource4"], - stdout=subprocess.PIPE).stdout.read() -fd = open(barfile) -mem = mmap.mmap(fd.fileno(), MAP_SIZE, offset=LOG_OFFSET, - prot=mmap.PROT_READ) +# List of known ADSP devices by their PCI IDs +DEVICES = ["8086:5a98"] -# The mapping is an array of 64-byte "slots", each of which is -# prefixed by a magic number, which should be 0x55aa for log data, -# followed a 16 bit "ID" number, followed by a null-terminated string -# in the final 60 bytes. The DSP firmware will write sequential IDs -# into the buffer starting from an ID of zero in the first slot, and -# wrapping at the end. So the algorithm here is to find the smallest -# valid slot, print its data, and then enter a polling loop waiting -# for the next slot to be valid and have the correct next ID before -# printing that too. +mem = None -# NOTE: unfortunately there's no easy way to detect a warm reset of -# the device, it will just jump back to the beginning and start -# writing there, where we aren't looking. Really that level of -# robustness needs to be handled in the kernel. +for dev in DEVICES: + # Find me a way to do this detection as cleanly in python as shell, I + # dare you. + barfile = subprocess.Popen(["sh", "-c", + "echo -n " + "$(dirname " + f" $(fgrep PCI_ID={dev.upper()} " + " /sys/bus/pci/devices/*/uevent))" + "/resource4"], + stdout=subprocess.PIPE).stdout.read() + if not os.path.exists(barfile): + continue -next_slot = 0 -next_id = 0xffff + if not os.access(barfile, os.R_OK): + sys.stderr.write(f"ERROR: Cannot open {barfile} for reading.") + sys.exit(1) -for slot in range(int(MAP_SIZE / SLOT_SIZE)): + fd = open(barfile) + mem = mmap.mmap(fd.fileno(), MAP_SIZE, offset=LOG_OFFSET, + prot=mmap.PROT_READ) + +if mem is None: + sys.stderr.write("ERROR: No ADSP device found.") + sys.exit(1) + +# Returns a tuple of (id, msg) if the slot is valid, or (-1, "") if +# the slot does not contain firmware trace data +def read_slot(slot, mem): off = slot * SLOT_SIZE - (magic, sid) = struct.unpack("HH", mem[off:off+4]) - if magic == 0x55aa: - if sid < next_id: - next_slot = slot - next_id = sid -while True: - off = next_slot * SLOT_SIZE - (magic, sid) = struct.unpack("HH", mem[off:off+4]) - if magic == 0x55aa and sid == next_id: - # This dance because indexing large variable-length slices of - # the mmap() array seems to produce garbage.... - msgbytes = [] - for i in range(4, SLOT_SIZE): - b = mem[off+i] - if b == 0: - break - msgbytes.append(b) - msg = bytearray(len(msgbytes)) - for i, elem in enumerate(msgbytes): - msg[i] = elem + magic = (mem[off + 1] << 8) | mem[off] + sid = (mem[off + 3] << 8) | mem[off + 2] - sys.stdout.write(msg.decode(encoding="utf-8", errors="ignore")) - next_slot = int((next_slot + 1) % (MAP_SIZE / SLOT_SIZE)) - next_id += 1 - else: + if magic != SLOT_MAGIC: + return (-1, "") + + # This dance because indexing large variable-length slices of + # the mmap() array seems to produce garbage.... + msgbytes = [] + for i in range(4, SLOT_SIZE): + b = mem[off+i] + if b == 0: + break + msgbytes.append(b) + msg = bytearray(len(msgbytes)) + for i, elem in enumerate(msgbytes): + msg[i] = elem + + return (sid, msg.decode(encoding="utf-8", errors="ignore")) + +def read_hist(start_slot): + id0, msg = read_slot(start_slot, mem) + + # An invalid slot zero means no data has ever been placed in the + # trace buffer, which is likely a system reset condition. Back + # off for one second, because continuing to read the buffer has + # been observed to hang the flash process (which I think can only + # be a hardware bug). + if start_slot == 0 and id0 < 0: + sys.stdout.write("===\n=== [ADSP Device Reset]\n===\n") sys.stdout.flush() - time.sleep(0.25) + time.sleep(1) + return (0, 0, "") + + # Start at zero and read forward to get the last data in the + # buffer. We are always guaranteed that slot zero will contain + # valid data if any slot contains valid data. + last_id = id0 + final_slot = start_slot + for i in range(start_slot + 1, NUM_SLOTS): + id, s = read_slot(i, mem) + if id != ((last_id + 1) & 0xffff): + break + msg += s + final_slot = i + last_id = id + + final_id = last_id + + # Now read backwards from the end to get the prefix blocks from + # the last wraparound + last_id = id0 + for i in range(NUM_SLOTS - 1, final_slot, -1): + id, s = read_slot(i, mem) + if id < 0: + break + + # Race protection: the other side might have clobbered the + # data after we read the ID, make sure it hasn't changed. + id_check = read_slot(i, mem)[0] + if id_check != id: + break + + if ((id + 1) & 0xffff) == last_id: + msg = s + msg + last_id = id + + # If we were unable to read forward from slot zero, but could read + # backward, then this is a wrapped buffer being currently updated + # into slot zero. See comment below. + if final_slot == start_slot and last_id != id0: + return None + + return ((final_slot + 1) % NUM_SLOTS, (final_id + 1) & 0xffff, msg) + +# Returns a tuple containing the next slot to expect data in, the ID +# that slot should hold, and the full string history of trace data +# from the buffer. Start with slot zero (which is always part of the +# current string if there is any data at all) and scan forward and +# back to find the maximum extent. +def trace_history(): + # This loop is a race protection for the situation where the + # buffer has wrapped and new data is currently being placed into + # slot zero. In those circumstances, slot zero will have a valid + # magic number but its sequence ID will not correlate with the + # previous and next slots. + ret = None + while ret is None: + ret = read_hist(0) + if ret is None: + ret = read_hist(1) + return ret + + +# Loop, reading the next slot if it has new data. Otherwise check the +# full buffer and see if history is discontiguous (i.e. what is in the +# buffer should be a proper suffix of what we have stored). If it +# doesn't match, then just print it (it's a reboot or a ring buffer +# overrun). If nothing has changed, then sleep a bit and keep +# polling. +def main(): + next_slot, next_id, last_hist = trace_history() + sys.stdout.write(last_hist) + while True: + id, smsg = read_slot(next_slot, mem) + + if id == next_id: + next_slot = int((next_slot + 1) % NUM_SLOTS) + next_id = (id + 1) & 0xffff + last_hist += smsg + sys.stdout.write(smsg) + else: + slot2, id2, msg2 = trace_history() + + # Device reset: + if slot2 == 0 and id2 == 0 and msg2 == "": + next_id = 1 + next_slot = slot2 + last_hist = "" + + if not last_hist.endswith(msg2): + # On a mismatch, go back and check one last time to + # address the race where a new slot wasn't present + # just JUST THEN but is NOW. + id3, s3 = read_slot(next_slot, mem) + if id3 == next_id: + next_slot = int((next_slot + 1) % NUM_SLOTS) + next_id = (next_id + 1) & 0xffff + last_hist += s3 + sys.stdout.write(s3) + continue + + # Otherwise it represents discontiguous data, either a + # reset of an overrun, just dump what we have and + # start over. + next_slot = slot2 + last_hist = msg2 + sys.stdout.write(msg2) + else: + sys.stdout.flush() + time.sleep(0.10) + +if __name__ == "__main__": + main() diff --git a/soc/xtensa/intel_adsp/common/trace_out.c b/soc/xtensa/intel_adsp/common/trace_out.c index 3760268ce0b..4edc18cacb6 100644 --- a/soc/xtensa/intel_adsp/common/trace_out.c +++ b/soc/xtensa/intel_adsp/common/trace_out.c @@ -56,7 +56,7 @@ static __aligned(64) union { uint32_t cache_pad[16]; } data_rec; -#define data ((struct metadata *)UNCACHED_PTR(&data_rec.meta)) +#define data ((volatile struct metadata *)UNCACHED_PTR(&data_rec.meta)) static inline struct slot *slot(int i) { @@ -65,9 +65,18 @@ static inline struct slot *slot(int i) return &slots[i]; } +static int slot_incr(int s) +{ + return (s + 1) % NSLOTS; +} + void intel_adsp_trace_out(int8_t *str, size_t len) { - k_spinlock_key_t key = k_spin_lock(&data->lock); + if (len == 0) { + return; + } + + k_spinlock_key_t key = k_spin_lock((void *)&data->lock); if (!data->initialized) { slot(0)->hdr.magic = 0; @@ -77,8 +86,8 @@ void intel_adsp_trace_out(int8_t *str, size_t len) } /* We work with a local copy of the global data for - * performance reasons (*data is uncached!) and put it back at - * the end. + * performance reasons (The memory behind the "data" pointer + * is uncached and volatile!) and put it back at the end. */ uint32_t curr_slot = data->curr_slot; uint32_t n_bytes = data->n_bytes; @@ -89,22 +98,40 @@ void intel_adsp_trace_out(int8_t *str, size_t len) s->msg[n_bytes++] = c; + /* Are we done with this slot? Terminate it and flag + * it for consumption on the other side + */ if (c == '\n' || n_bytes >= MSGSZ) { - curr_slot = (curr_slot + 1) % NSLOTS; - n_bytes = 0; - slot(curr_slot)->hdr.magic = 0; - slot(curr_slot)->hdr.id = s->hdr.id + 1; - s->hdr.magic = SLOT_MAGIC; - } - } + if (n_bytes < MSGSZ) { + s->msg[n_bytes] = 0; + } - if (n_bytes < MSGSZ) { - slot(curr_slot)->msg[n_bytes] = 0; + /* Make sure the next slot has a magic number + * (so the reader can distinguish between + * no-new-data and system-reset), but does NOT + * have the correct successor ID (so can never + * be picked up as valid data). We'll + * increment it later when we terminate that + * slot. + */ + int next_slot = slot_incr(curr_slot); + uint16_t new_id = s->hdr.id + 1; + + slot(next_slot)->hdr.id = new_id; + slot(next_slot)->hdr.magic = SLOT_MAGIC; + slot(next_slot)->msg[0] = 0; + + s->hdr.id = new_id; + s->hdr.magic = SLOT_MAGIC; + + curr_slot = next_slot; + n_bytes = 0; + } } data->curr_slot = curr_slot; data->n_bytes = n_bytes; - k_spin_unlock(&data->lock, key); + k_spin_unlock((void *)&data->lock, key); } int arch_printk_char_out(int c)