diff --git a/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw.py b/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v15.py similarity index 99% rename from boards/xtensa/intel_adsp_cavs15/tools/cavs-fw.py rename to boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v15.py index cdc0478ee2b..a6051b00306 100755 --- a/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw.py +++ b/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v15.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2021 Intel Corporation. All rights reserved. - import ctypes import mmap import os diff --git a/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v25.py b/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v25.py new file mode 100755 index 00000000000..64a510168f9 --- /dev/null +++ b/boards/xtensa/intel_adsp_cavs15/tools/cavs-fw-v25.py @@ -0,0 +1,348 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: Apache-2.0 +# Copyright(c) 2021 Intel Corporation. All rights reserved. +import ctypes +import mmap +import os +import struct +import subprocess +import sys +import time +import logging + +# Intel Audio DSP firmware loader. No dependencies on anything +# outside this file beyond Python3 builtins. Pass a signed rimage +# file as the single argument. + +logging.basicConfig() +log = logging.getLogger("cavs-fw") +log.setLevel(logging.INFO) + +FW_FILE = sys.argv[1] + +PAGESZ = 4096 +HUGEPAGESZ = 2 * 1024 * 1024 +HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp" + +HDA_PPCTL__GPROCEN = 1 << 30 +HDA_SD_CTL__TRAFFIC_PRIO = 1 << 18 +HDA_SD_CTL__START = 1 << 1 + +def main(): + if os.system("lsmod | grep -q snd_sof_pci") == 0: + log.warning("The Linux snd-sof-pci kernel module is loaded. While this") + log.warning(" loader will normally work in such circumstances, things") + log.warning(" will get confused if the system tries to touch the hardware") + log.warning(" simultaneously. Operation is most reliable if it is") + log.warning(" unloaded first.") + + # Make sure hugetlbfs is mounted (not there on chromeos) + os.system("mount | grep -q hugetlbfs ||" + + " (mkdir -p /dev/hugepages; " + + " mount -t hugetlbfs hugetlbfs /dev/hugepages)") + + with open(FW_FILE, "rb") as f: + fw_bytes = f.read() + + (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) + if magic == b'XMan': + log.info(f"Trimming {sz} bytes of extended manifest") + fw_bytes = fw_bytes[sz:len(fw_bytes)] + + (hda, sd, dsp) = map_regs() # Device register mappings + + # Reset the HDA device + log.info("Reset HDA device") + hda.GCTL = 0 + while hda.GCTL & 1: pass + hda.GCTL = 1 + while not hda.GCTL & 1: pass + + # Turn on HDA "global processing enable" first. As documented, + # this enables the audio DSP (vs. hardware HDA emulation). But it + # actually means "enable access to the ADSP registers in PCI BAR 4" (!) + log.info("Enable HDA global processing") + hda.PPCTL |= HDA_PPCTL__GPROCEN + + # Turn off the DSP CPUs (each byte of ADSPCS is a bitmask for each + # of 1-8 DSP cores: lowest byte controls "stall", the second byte + # engages "reset", the third controls power, and the highest byte + # is the output state for "powered" to be read after a state + # change. Set stall and reset, and turn off power for everything: + log.info(f"Powering down, ADSPCS = 0x{dsp.ADSPCS:x}") + dsp.ADSPCS = 0xffff + while dsp.ADSPCS & 0xff000000: pass + log.info(f"Powered down, ADSPCS = 0x{dsp.ADSPCS:x}") + + # Configure our DMA stream to transfer the firmware image + log.info(f"Configuring DMA output stream {hda_ostream_id}...") + (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) + + # Reset stream + sd.CTL = 1 + while (sd.CTL & 1) == 0: pass + sd.CTL = 0 + while (sd.CTL & 1) == 1: pass + + sd.CTL = (1 << 20) # Set stream ID to anything non-zero + sd.BDPU = (buf_list_addr >> 32) & 0xffffffff + sd.BDPL = buf_list_addr & 0xffffffff + sd.CBL = len(fw_bytes) + sd.LVI = num_bufs - 1 + + # Enable "processing" on the output stream (send DMA to the DSP + # and not the audio output hardware) + hda.PPCTL |= (HDA_PPCTL__GPROCEN | (1 << hda_ostream_id)) + + # SPIB ("Software Position In Buffer") is an Intel HDA extension + # that puts a transfer boundary into the stream beyond which the + # other side will not read. The ROM wants to poll on a "buffer + # full" bit on the other side that only works with this enabled. + hda.SPBFCTL |= (1 << hda_ostream_id) + hda.SD_SPIB = len(fw_bytes) + + # Power up all the cores on the DSP and wait for CPU0 to show that + # it has power. Leave stall and reset high for now + log.info(f"Powering up DSP core #0, ADSPCS = 0x{dsp.ADSPCS:x}") + dsp.ADSPCS = 0x01ffff + while (dsp.ADSPCS & 0x01000000) == 0: pass + log.info(f"Powered up {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") + + # Send the DSP an IPC message to tell the device how to boot + # ("PURGE_FW" means "load new code") and which DMA channel to use. + # The high bit is the "BUSY" signal bit that latches a device + # interrupt. + # + # Note: with cAVS 1.8+ the ROM receives the stream argument as an index + # within the array of output streams (and we always use the first + # one by construction). But with 1.5 it's the HDA index, and + # depends on the number of input streams on the device. + stream_idx = hda_ostream_id if cavs15 else 0 + ipcval = ( (1 << 31) # BUSY bit + | (0x01 << 24) # type = PURGE_FW + | (1 << 14) # purge_fw = 1 + | (stream_idx << 9)) # dma_id + log.info(f"Sending PURGW_FW IPC, HIPCR = 0x{ipcval:x}") + dsp.HIPCI = ipcval + + # Now start CPU #0 by dropping stall and reset + log.info(f"Starting {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") + dsp.ADSPCS = 0x01fffe # Out of reset + time.sleep(0.1) + dsp.ADSPCS = 0x01fefe # Un-stall + log.info(f"Started {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") + + # Experimentation shows that these steps aren't actually required, + # the ROM just charges ahead and initializes itself correctly even + # if we don't wait for it. Do them anyway for better visibility, + # when requested. Potentially remove later once this code is + # mature. + if log.level <= logging.INFO: + # Wait for the ROM to boot and signal it's ready. NOTE: This + # short sleep seems to be needed; if we're banging on the + # memory window during initial boot (before/while the window + # control registers are configured?) the DSP hardware will + # hang fairly reliably. + time.sleep(0.1) + log.info(f"Waiting for ROM init, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + while (dsp.SRAM_FW_STATUS >> 24) != 5: pass + log.info(f"ROM ready, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + + # Newer devices have an ACK bit we can check + if not cavs15: + log.info(f"Awaiting IPC acknowledgment, HIPCA 0x{dsp.HIPCA:x}") + while not dsp.HIPCA & (1 << 31): pass + dsp.HIPCA |= ~(1 << 31) + + # Wait for it to signal ROM_INIT_DONE + log.info(f"Awaiting ROM init... FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + while (dsp.SRAM_FW_STATUS & 0x00ffffff) != 1: pass + + # It's ready, uncork the stream + log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + sd.CTL |= HDA_SD_CTL__START + + # The ROM sets a FW_ENTERED value of 5 into the bottom 28 bit + # "state" field of FW_STATUS on entry to the app. (Pedantry: this + # is actually ephemeral and racy, because Zephyr is free to write + # its own data once the app launches and we might miss it. + # There's no standard "alive" signaling from the OS, which is + # really what we want to wait for. So give it one second and move + # on). + log.info(f"Waiting for load, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + for _ in range(100): + alive = dsp.SRAM_FW_STATUS & ((1 << 28) - 1) == 5 + if alive: break + time.sleep(0.01) + if alive: + log.info("ROM reports firmware was entered") + else: + log.warning(f"Load failed? FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") + + # Turn DMA off and reset the stream. If this doesn't happen the + # hardware continues streaming out of our now-stale page and has + # been observed to glitch the next boot. + sd.CTL = 1 + + time.sleep(1) + + log.info(f"ADSPCS = 0x{dsp.ADSPCS:x}") + log.info(f"Load complete, {ncores(dsp)} cores active") + +# Count of active/running cores +def ncores(dsp): + return bin(dsp.ADSPCS >> 24).count("1") + +def map_regs(): + # List cribbed from kernel SOF driver. Not all tested! + for id in ["119a", "5a98", "1a98", "3198", "9dc8", + "a348", "34C8", "38c8", "4dc8", "02c8", + "06c8", "a3f0", "a0c8", "4b55", "4b58"]: + p = runx(f"grep -il PCI_ID=8086:{id} /sys/bus/pci/devices/*/uevent") + if p: + pcidir = os.path.dirname(p) + break + + # Detect hardware version, this matters in a few spots + global cavs15 + cavs15 = id in [ "5a98", "1a98", "3198" ] + log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware") + + # Disengage runtime power management so the kernel doesn't put it to sleep + with open(pcidir + b"/power/control", "w") as ctrl: + ctrl.write("on") + + # Make sure PCI memory space access and busmastering are enabled. + # Also disable interrupts so as not to confuse the kernel. + with open(pcidir + b"/config", "wb+") as cfg: + cfg.seek(4) + cfg.write(b'\x06\x04') + + time.sleep(0.1) + + hdamem = bar_map(pcidir, 0) + + # Standard HD Audio Registers + hda = Regs(hdamem) + hda.GCAP = 0x0000 + hda.GCTL = 0x0008 + hda.SPBFCTL = 0x0704 + hda.PPCTL = 0x0804 + + # Find the ID of the first output stream + global hda_ostream_id + hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams + log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})") + hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id) + + hda.freeze() + + # Standard HD Audio Stream Descriptor + sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20)) + sd.CTL = 0x00 + sd.LPIB = 0x04 + sd.CBL = 0x08 + sd.LVI = 0x0c + sd.FMT = 0x12 + sd.BDPL = 0x18 + sd.BDPU = 0x1c + sd.freeze() + + # Intel Audio DSP Registers + dsp = Regs(bar_map(pcidir, 4)) + dsp.ADSPCS = 0x00004 + if cavs15: + dsp.HIPCI = 0x00048 # original name of the register... + else: + dsp.HIPCI = 0x000d0 # ...now named "HIPCR" per 1.8+ docs + dsp.HIPCA = 0x000d4 + dsp.SRAM_FW_STATUS = 0x80000 # Start of first SRAM window + dsp.freeze() + + return (hda, sd, dsp) + +def setup_dma_mem(fw_bytes): + (mem, phys_addr) = map_phys_mem() + mem[0:len(fw_bytes)] = fw_bytes + + log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware" + % (phys_addr, len(fw_bytes))) + + # HDA requires at least two buffers be defined, but we don't care + # about boundaries because it's all a contiguous region. Place a + # vestigial 128-byte (minimum size and alignment) buffer after the + # main one, and put the 4-entry BDL list into the final 128 bytes + # of the page. + buf0_len = HUGEPAGESZ - 2 * 128 + buf1_len = 128 + bdl_off = buf0_len + buf1_len + mem[bdl_off:bdl_off + 32] = struct.pack(" /proc/sys/vm/nr_hugepages") + + hugef = open(HUGEPAGE_FILE, "w+") + hugef.truncate(HUGEPAGESZ) + mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ) + global_mmaps.append(mem) + os.unlink(HUGEPAGE_FILE) + + # Find the local process address of the mapping, then use that to + # extract the physical address from the kernel's pagemap + # interface. The physical page frame number occupies the bottom + # bits of the entry. + mem[0] = 0 # Fault the page in so it has an address! + vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem)) + vpagenum = vaddr >> 12 + pagemap = open("/proc/self/pagemap", "rb") + pagemap.seek(vpagenum * 8) + pent = pagemap.read(8) + paddr = (struct.unpack("Q", pent)[0] & ((1 << 54) - 1)) * PAGESZ + pagemap.close() + + return (mem, paddr) + +# Maps a PCI BAR and returns the in-process address +def bar_map(pcidir, barnum): + f = open(pcidir.decode() + "/resource" + str(barnum), "r+") + mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size) + global_mmaps.append(mm) + return ctypes.addressof(ctypes.c_int.from_buffer(mm)) + +# Syntactic sugar to make register block definition & use look nice. +# Instantiate from a base address, assign offsets to (uint32) named +# registers as fields, call freeze(), then the field acts as a direct +# alias for the register! +class Regs: + def __init__(self, base_addr): + vars(self)["base_addr"] = base_addr + vars(self)["ptrs"] = {} + vars(self)["frozen"] = False + def freeze(self): + vars(self)["frozen"] = True + def __setattr__(self, name, val): + if not self.frozen and name not in self.ptrs: + addr = self.base_addr + val + self.ptrs[name] = ctypes.c_uint32.from_address(addr) + else: + self.ptrs[name].value = val + def __getattr__(self, name): + return self.ptrs[name].value + +def runx(cmd): + return subprocess.Popen(["sh", "-c", cmd], + stdout=subprocess.PIPE).stdout.read() + +if __name__ == "__main__": + main()