boards/intel_adsp_cavs15: Newer, much simpler firmware load tool
This is a smaller firmware loader with fewer dependencies and much faster operation: + No need for an externally built SOF diag_driver, it gets its DMA memory in userspace and works with any unmodified kernel (that has hugetlbfs anyway) + Does not leak kernel memory on failure (diag_driver was basically a front end for kmalloc(), and if the script exited early...) + Much smaller: 230 lines of python in one file vs. 1600 in nine. + Much faster; no needless operations and sleeping steps. Completes load and launches hello_world in 0.2s of real time. + Correctly resets the stream state and can actually recover form the "wedged DSP" state that the previous loader would sometimes get stuck in. + Clearer structure, easier to use as a testbed for driver-side interaction. Signed-off-by: Andy Ross <andrew.j.ross@intel.com>
This commit is contained in:
parent
10ee29e947
commit
9073db4323
1 changed files with 242 additions and 0 deletions
242
boards/xtensa/intel_adsp_cavs15/tools/cavs-fw.py
Executable file
242
boards/xtensa/intel_adsp_cavs15/tools/cavs-fw.py
Executable file
|
@ -0,0 +1,242 @@
|
|||
#!/usr/bin/env python3
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# Copyright(c) 2021 Intel Corporation. All rights reserved.
|
||||
|
||||
import ctypes
|
||||
import mmap
|
||||
import os
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
# Intel Audio DSP firmware loader. No dependencies on anything
|
||||
# outside this file beyond Python3 builtins. Assumes the host system
|
||||
# has a hugetlbs mounted at /dev/hugepages. Confirmed to run out of
|
||||
# the box on Ubuntu 18.04 and 20.04. Run as root with the firmware
|
||||
# file as the single argument.
|
||||
|
||||
FW_FILE = sys.argv[2] if sys.argv[1] == "-f" else sys.argv[1]
|
||||
|
||||
PAGESZ = 4096
|
||||
HUGEPAGESZ = 2 * 1024 * 1024
|
||||
HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp"
|
||||
|
||||
HDA_PPCTL__GPROCEN = 1 << 30
|
||||
HDA_SD_CTL__TRAFFIC_PRIO = 1 << 18
|
||||
HDA_SD_CTL__START = 1 << 1
|
||||
|
||||
def main():
|
||||
with open(FW_FILE, "rb") as f:
|
||||
fw_bytes = f.read()
|
||||
|
||||
(hda, sd, dsp) = map_regs() # Device register mappings
|
||||
|
||||
# Turn on HDA "global processing enable" first, which actually
|
||||
# means "enable access to the ADSP registers in PCI BAR 4" (!)
|
||||
hda.PPCTL |= HDA_PPCTL__GPROCEN
|
||||
|
||||
# Turn off the DSP CPUs (each byte of ADSPCS is a bitmask for each
|
||||
# of 1-8 DSP cores: lowest byte controls "stall", the second byte
|
||||
# engages "reset", the third controls power, and the highest byte
|
||||
# is the output state for "powered" to be read after a state
|
||||
# change. Set stall and reset, and turn off power for everything:
|
||||
dsp.ADSPCS = 0xffff
|
||||
while dsp.ADSPCS & 0xff000000: pass
|
||||
|
||||
# Reset the HDA device
|
||||
hda.GCTL = 0
|
||||
while hda.GCTL & 1: pass
|
||||
hda.GCTL = 1
|
||||
while not hda.GCTL & 1: pass
|
||||
|
||||
# Power up (and clear stall and reset on) all the cores on the DSP
|
||||
# and wait for CPU0 to show that it has power
|
||||
dsp.ADSPCS = 0xff0000
|
||||
while (dsp.ADSPCS & 0x1000000) == 0: pass
|
||||
|
||||
# Wait for the ROM to boot and signal it's ready. This short
|
||||
# sleep seems to be needed; if we're banging on the memory window
|
||||
# during initial boot (before/while the window control registers
|
||||
# are configured?) the DSP hardware will hang fairly reliably.
|
||||
time.sleep(0.01)
|
||||
while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
|
||||
|
||||
# Send the DSP an IPC message to tell the device how to boot
|
||||
# ("PURGE_FW" means "load new code") and which DMA channel to use.
|
||||
# The high bit is the "BUSY" signal bit that latches a device
|
||||
# interrupt.
|
||||
dsp.HIPCI = ( (1 << 31) # BUSY bit
|
||||
| (0x01 << 24) # type = PURGE_FW
|
||||
| (1 << 14) # purge_fw = 1
|
||||
| (hda_ostream_id << 9)) # dma_id
|
||||
|
||||
# Configure our DMA stream to transfer the firmware image
|
||||
(buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
|
||||
sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
|
||||
sd.BDPL = buf_list_addr & 0xffffffff
|
||||
sd.CBL = len(fw_bytes)
|
||||
sd.LVI = num_bufs - 1
|
||||
|
||||
# Enable "processing" on the output stream (send DMA to the DSP
|
||||
# and not the audio output hardware)
|
||||
hda.PPCTL |= (HDA_PPCTL__GPROCEN | (1 << hda_ostream_id))
|
||||
|
||||
# SPIB ("Software Position In Buffer") a Intel HDA extension that
|
||||
# puts a transfer boundary into the stream beyond which the other
|
||||
# side will not read. The ROM wants to poll on a "buffer full"
|
||||
# bit on the other side that only works with this enabled.
|
||||
hda.SD_SPIB = len(fw_bytes)
|
||||
hda.SPBFCTL |= (1 << hda_ostream_id)
|
||||
|
||||
# Uncork the stream
|
||||
sd.CTL |= HDA_SD_CTL__START
|
||||
|
||||
# FIXME: The ROM sets a FW_ENTERED value of 5 into the bottom 28
|
||||
# bit "state" field of FW_STATUS on entry to the app. But this is
|
||||
# actually ephemeral and racy, because Zephyr is free to write its
|
||||
# own data once the app launches and we might miss it. There's no
|
||||
# standard "alive" signaling from the OS, which is really what we
|
||||
# want to wait for. So give it one second and move on.
|
||||
for _ in range(100):
|
||||
alive = dsp.SRAM_FW_STATUS & ((1 << 28) - 1) == 5
|
||||
if alive: break
|
||||
time.sleep(0.01)
|
||||
if not alive:
|
||||
print(f"Load failed? FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
|
||||
|
||||
# Turn DMA off and reset the stream. If this doesn't happen the
|
||||
# hardware continues streaming out of our now-stale page and can
|
||||
# has been observed to glitch the next boot.
|
||||
sd.CTL &= ~HDA_SD_CTL__START
|
||||
sd.CTL |= 1
|
||||
|
||||
def map_regs():
|
||||
# List cribbed from kernel SOF driver. Not all tested!
|
||||
for id in ["119a", "5a98", "1a98", "3198", "9dc8",
|
||||
"a348", "34C8", "38c8", "4dc8", "02c8",
|
||||
"06c8", "a3f0", "a0c8", "4b55", "4b58"]:
|
||||
p = runx(f"grep -il PCI_ID=8086:{id} /sys/bus/pci/devices/*/uevent")
|
||||
if p:
|
||||
pcidir = os.path.dirname(p)
|
||||
break
|
||||
|
||||
hdamem = bar_map(pcidir, 0)
|
||||
|
||||
# Standard HD Audio Registers
|
||||
hda = Regs(hdamem)
|
||||
hda.GCAP = 0x0000
|
||||
hda.GCTL = 0x0008
|
||||
hda.SPBFCTL = 0x0704
|
||||
hda.PPCTL = 0x0804
|
||||
|
||||
global hda_ostream_id
|
||||
hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
|
||||
hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
|
||||
|
||||
hda.freeze()
|
||||
|
||||
# Standard HD Audio Stream Descriptor
|
||||
sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
|
||||
sd.CTL = 0x00
|
||||
sd.LPIB = 0x04
|
||||
sd.CBL = 0x08
|
||||
sd.LVI = 0x0c
|
||||
sd.FMT = 0x12
|
||||
sd.BDPL = 0x18
|
||||
sd.BDPU = 0x1c
|
||||
sd.freeze()
|
||||
|
||||
# Intel Audio DSP Registers
|
||||
dsp = Regs(bar_map(pcidir, 4))
|
||||
dsp.ADSPCS = 0x00004
|
||||
dsp.HIPCI = 0x00048
|
||||
dsp.SRAM_FW_STATUS = 0x80000 # Start of first SRAM window
|
||||
dsp.freeze()
|
||||
|
||||
return (hda, sd, dsp)
|
||||
|
||||
def setup_dma_mem(fw_bytes):
|
||||
(mem, phys_addr) = map_phys_mem()
|
||||
mem[0:len(fw_bytes)] = fw_bytes
|
||||
|
||||
# HDA requires at least two buffers be defined, but we don't care
|
||||
# about boundaries because it's all a contiguous region. Place a
|
||||
# vestigial 128-byte (minimum size and alignment) buffer after the
|
||||
# main one, and put the 4-entry BDL list into the final 128 bytes
|
||||
# of the page.
|
||||
buf0_len = HUGEPAGESZ - 2 * 128
|
||||
buf1_len = 128
|
||||
bdl_off = buf0_len + buf1_len
|
||||
mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
|
||||
phys_addr, buf0_len,
|
||||
phys_addr + buf0_len, buf1_len)
|
||||
return (phys_addr + bdl_off, 2)
|
||||
|
||||
global_mmaps = [] # protect mmap mappings from garbage collection!
|
||||
|
||||
# Maps 2M of contiguous memory using a single page from hugetlbfs,
|
||||
# then locates its physical address for use as a DMA buffer.
|
||||
def map_phys_mem():
|
||||
# Ensure the kernel has enough budget for one new page
|
||||
free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
|
||||
if free == 0:
|
||||
tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
|
||||
os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
|
||||
|
||||
hugef = open(HUGEPAGE_FILE, "w+")
|
||||
hugef.truncate(HUGEPAGESZ)
|
||||
mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
|
||||
global_mmaps.append(mem)
|
||||
os.unlink(HUGEPAGE_FILE)
|
||||
|
||||
mem[0] = 0 # Fault the page in so it occupuies real memory!
|
||||
|
||||
# Find the local process address of the mapping, then use that to
|
||||
# extract the physical address from the kernel's pagemap
|
||||
# interface.
|
||||
vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
|
||||
vpagenum = vaddr >> 12
|
||||
pagemap = open("/proc/self/pagemap", "rb")
|
||||
pagemap.seek(vpagenum * 8)
|
||||
pent = pagemap.read(8)
|
||||
|
||||
# The PFN in a pagemap entry is the bottom 54 (?!) bits
|
||||
paddr = (struct.unpack("Q", pent)[0] & ((1 << 54) - 1)) * PAGESZ
|
||||
pagemap.close()
|
||||
|
||||
return (mem, paddr)
|
||||
|
||||
# Maps a PCI BAR and returns the in-process address
|
||||
def bar_map(pcidir, barnum):
|
||||
f = open(pcidir.decode() + "/resource" + str(barnum), "r+")
|
||||
mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
|
||||
global_mmaps.append(mm)
|
||||
return ctypes.addressof(ctypes.c_int.from_buffer(mm))
|
||||
|
||||
# Syntactic sugar to make register block definition & use look nice.
|
||||
# Instantiate from a base address, assign offsets to (uint32) named
|
||||
# registers as fields, call freeze(), then the field acts as a direct
|
||||
# alias for the register!
|
||||
class Regs:
|
||||
def __init__(self, base_addr):
|
||||
vars(self)["base_addr"] = base_addr
|
||||
vars(self)["ptrs"] = {}
|
||||
vars(self)["frozen"] = False
|
||||
def freeze(self):
|
||||
vars(self)["frozen"] = True
|
||||
def __setattr__(self, name, val):
|
||||
if not self.frozen and name not in self.ptrs:
|
||||
addr = self.base_addr + val
|
||||
self.ptrs[name] = ctypes.c_uint32.from_address(addr)
|
||||
else:
|
||||
self.ptrs[name].value = val
|
||||
def __getattr__(self, name):
|
||||
return self.ptrs[name].value
|
||||
|
||||
def runx(cmd):
|
||||
return subprocess.Popen(["sh", "-c", cmd],
|
||||
stdout=subprocess.PIPE).stdout.read()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue