soc/intel_adsp: Add unified "cavstool" loader/logger script

The existing scripting for these platforms has gotten a little stale.
The loader had bifurcated into a v15 and v25 variant, both of which
lived in the cavs15 board directory.  Building off Shao Ming's
excellent (if somewhat surprisingly committed) rework to unify
unchanged parts of the scripts, let's finish the job.

This adds a "cavstool.py" script with the following advantages:

+ It's just one script for everything, with a single unified load
  process that works reliably on both 1.5 and 1.8+ hardware.

+ It runs on all cAVS platforms (with a compatible kernel, those
  requirements haven't changed)

+ It automatically emitts logging synchronously after loading,
  eliminating the race between adsplog.py and cavs-fw.py where you
  could see logging from a previous test run.

+ It automatically detects and unloads a linux kernel module managing
  the same device (even if SOF has renamed the module again, heh).

+ Timings have been tuned up in general, it's about 2 seconds faster
  to get to first log output now.

Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
Andy Ross 2022-01-13 07:37:29 -08:00 committed by Anas Nashif
commit f581d59821

View file

@ -0,0 +1,300 @@
#!/usr/bin/env python3
# Copyright(c) 2022 Intel Corporation. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
import os
import sys
import struct
import logging
import asyncio
import time
import subprocess
import ctypes
import mmap
logging.basicConfig()
log = logging.getLogger("cavs-fw")
log.setLevel(logging.INFO)
PAGESZ = 4096
HUGEPAGESZ = 2 * 1024 * 1024
HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp"
# Log is in the fourth window, they appear in 128k regions starting at 512k
WINSTREAM_OFFSET = (512 + (3 * 128)) * 1024
def map_regs():
p = runx(f"grep -iPl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
pcidir = os.path.dirname(p)
cavs15 = open(f"{pcidir}/device").read().rstrip() in [ "0x5a98", "0x1a98", "0x3198" ]
# Check sysfs for a loaded driver and remove it
if os.path.exists(f"{pcidir}/driver"):
mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
log.warning(f"Existing driver found! Unloading \"{mod}\" module")
runx(f"rmmod {mod}")
# Disengage runtime power management so the kernel doesn't put it to sleep
with open(f"{pcidir}/power/control", "w") as ctrl:
ctrl.write("on")
# Make sure PCI memory space access and busmastering are enabled.
# Also disable interrupts so as not to confuse the kernel.
with open(f"{pcidir}/config", "wb+") as cfg:
cfg.seek(4)
cfg.write(b'\x06\x04')
# Standard HD Audio Registers
(hdamem, _) = bar_map(pcidir, 0)
hda = Regs(hdamem)
hda.GCAP = 0x0000
hda.GCTL = 0x0008
hda.SPBFCTL = 0x0704
hda.PPCTL = 0x0804
# Find the ID of the first output stream
hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
hda.freeze()
# Standard HD Audio Stream Descriptor
sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
sd.CTL = 0x00
sd.CBL = 0x08
sd.LVI = 0x0c
sd.BDPL = 0x18
sd.BDPU = 0x1c
sd.freeze()
# Intel Audio DSP Registers
global bar4_mmap
(bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
dsp = Regs(bar4_mem)
dsp.ADSPCS = 0x00004
dsp.HIPCIDR = 0x00048 if cavs15 else 0x000d0
dsp.SRAM_FW_STATUS = 0x80000 # Start of first SRAM window
dsp.freeze()
return (hda, sd, dsp, hda_ostream_id, cavs15)
def setup_dma_mem(fw_bytes):
(mem, phys_addr) = map_phys_mem()
mem[0:len(fw_bytes)] = fw_bytes
log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
% (phys_addr, len(fw_bytes)))
# HDA requires at least two buffers be defined, but we don't care about
# boundaries because it's all a contiguous region. Place a vestigial
# 128-byte (minimum size and alignment) buffer after the main one, and put
# the 4-entry BDL list into the final 128 bytes of the page.
buf0_len = HUGEPAGESZ - 2 * 128
buf1_len = 128
bdl_off = buf0_len + buf1_len
mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
phys_addr, buf0_len,
phys_addr + buf0_len, buf1_len)
log.info("Filled the buffer descriptor list (BDL) for DMA.")
return (phys_addr + bdl_off, 2)
global_mmaps = [] # protect mmap mappings from garbage collection!
# Maps 2M of contiguous memory using a single page from hugetlbfs,
# then locates its physical address for use as a DMA buffer.
def map_phys_mem():
# Make sure hugetlbfs is mounted (not there on chromeos)
os.system("mount | grep -q hugetlbfs ||"
+ " (mkdir -p /dev/hugepages; "
+ " mount -t hugetlbfs hugetlbfs /dev/hugepages)")
# Ensure the kernel has enough budget for one new page
free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
if free == 0:
tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
hugef = open(HUGEPAGE_FILE, "w+")
hugef.truncate(HUGEPAGESZ)
mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
global_mmaps.append(mem)
os.unlink(HUGEPAGE_FILE)
# Find the local process address of the mapping, then use that to extract
# the physical address from the kernel's pagemap interface. The physical
# page frame number occupies the bottom bits of the entry.
mem[0] = 0 # Fault the page in so it has an address!
vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
vpagenum = vaddr >> 12
pagemap = open("/proc/self/pagemap", "rb")
pagemap.seek(vpagenum * 8)
pent = pagemap.read(8)
paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
pagemap.close()
return (mem, paddr)
# Maps a PCI BAR and returns the in-process address
def bar_map(pcidir, barnum):
f = open(pcidir + "/resource" + str(barnum), "r+")
mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
global_mmaps.append(mm)
log.info("Mapped PCI bar %d of length %d bytes."
% (barnum, os.fstat(f.fileno()).st_size))
return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
# Syntactic sugar to make register block definition & use look nice.
# Instantiate from a base address, assign offsets to (uint32) named registers as
# fields, call freeze(), then the field acts as a direct alias for the register!
class Regs:
def __init__(self, base_addr):
vars(self)["base_addr"] = base_addr
vars(self)["ptrs"] = {}
vars(self)["frozen"] = False
def freeze(self):
vars(self)["frozen"] = True
def __setattr__(self, name, val):
if not self.frozen and name not in self.ptrs:
addr = self.base_addr + val
self.ptrs[name] = ctypes.c_uint32.from_address(addr)
else:
self.ptrs[name].value = val
def __getattr__(self, name):
return self.ptrs[name].value
def runx(cmd):
return subprocess.check_output(cmd, shell=True).decode().rstrip()
def load_firmware(fw_file):
fw_bytes = open(fw_file, "rb").read()
(magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
if magic == b'XMan':
log.info(f"Trimming {sz} bytes of extended manifest")
fw_bytes = fw_bytes[sz:len(fw_bytes)]
# This actually means "enable access to BAR4 registers"!
hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
log.info("Resetting HDA device")
hda.GCTL = 0
while hda.GCTL & 1: pass
hda.GCTL = 1
while not hda.GCTL & 1: pass
log.info("Powering down DSP cores")
dsp.ADSPCS = 0xffff
while dsp.ADSPCS & 0xff000000: pass
log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
(buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
sd.CTL = 1
while (sd.CTL & 1) == 0: pass
sd.CTL = 0
while (sd.CTL & 1) == 1: pass
sd.CTL = (1 << 20) # Set stream ID to anything non-zero
sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
sd.BDPL = buf_list_addr & 0xffffffff
sd.CBL = len(fw_bytes)
sd.LVI = num_bufs - 1
hda.PPCTL |= (1 << hda_ostream_id)
# SPIB ("Software Position In Buffer") is an Intel HDA extension
# that puts a transfer boundary into the stream beyond which the
# other side will not read. The ROM wants to poll on a "buffer
# full" bit on the other side that only works with this enabled.
hda.SPBFCTL |= (1 << hda_ostream_id)
hda.SD_SPIB = len(fw_bytes)
# Start DSP. Just core 0 on 1.8+ (secondary core startup is handled
# internally), but on 1.5 the host controls the power levers.
log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
dsp.ADSPCS = 0xff0000 if cavs15 else 0x01fefe
while (dsp.ADSPCS & 0x1000000) == 0: pass
# Wait for the ROM to boot and signal it's ready. This short
# sleep seems to be needed; if we're banging on the memory window
# during initial boot (before/while the window control registers
# are configured?) the DSP hardware will hang fairly reliably.
log.info("Wait for ROM startup")
time.sleep(0.1)
while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
# Send the DSP an IPC message to tell the device how to boot.
# Note: with cAVS 1.8+ the ROM receives the stream argument as an
# index within the array of output streams (and we always use the
# first one by construction). But with 1.5 it's the HDA index,
# and depends on the number of input streams on the device.
stream_idx = hda_ostream_id if cavs15 else 0
ipcval = ( (1 << 31) # BUSY bit
| (0x01 << 24) # type = PURGE_FW
| (1 << 14) # purge_fw = 1
| (stream_idx << 9)) # dma_id
log.info(f"Sending IPC command, HIPCR = 0x{ipcval:x}")
dsp.HIPCIDR = ipcval
log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
sd.CTL |= 2 # START flag
log.info(f"Waiting for firmware handoff, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
for _ in range(200):
alive = dsp.SRAM_FW_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
if alive: break
time.sleep(0.01)
if not alive:
log.warning(f"Load failed? FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
# Turn DMA off and reset the stream. Clearing START first is a noop
# per the spec, but required for 1.5, and makes the load on 1.8
# unstable. Go figure.
if cavs15:
sd.CTL &= ~2 # clear START
sd.CTL |= 1
log.info(f"cAVS firmware load complete")
# This SHOULD be just "mem[start:start+length]", but slicing an mmap
# array seems to be unreliable on one of my machines (python 3.6.9 on
# Ubuntu 18.04). Read out bytes individually.
def win_read(start, length):
return b''.join(bar4_mmap[x + WINSTREAM_OFFSET].to_bytes(1, 'little')
for x in range(start, start + length))
def win_hdr():
return struct.unpack("<IIII", win_read(0, 16))
# Python implementation of the same algorithm in sys_winstream_read(),
# see there for details.
def winstream_read(last_seq):
while True:
(wlen, start, end, seq) = win_hdr()
if seq == last_seq or start == end:
return (seq, "")
behind = seq - last_seq
if behind > ((end - start) % wlen):
return (seq, "")
copy = (end - behind) % wlen
suffix = min(behind, wlen - copy)
result = win_read(16 + copy, suffix)
if suffix < behind:
result += win_read(16, behind - suffix)
(wlen, start1, end, seq1) = win_hdr()
if start1 == start and seq1 == seq:
return (seq, result.decode("utf-8"))
async def main():
global hda, sd, dsp, hda_ostream_id, cavs15
(hda, sd, dsp, hda_ostream_id, cavs15) = map_regs()
log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware")
load_firmware(sys.argv[1])
sys.stdout.write("--\n")
last_seq = 0
while True:
await asyncio.sleep(0.03)
(last_seq, output) = winstream_read(last_seq)
if output:
sys.stdout.write(output)
sys.stdout.flush()
if __name__ == "__main__":
asyncio.get_event_loop().run_until_complete(main())