#!/usr/bin/env python3 # SPDX-License-Identifier: Apache-2.0 # Copyright(c) 2021 Intel Corporation. All rights reserved. import ctypes import mmap import os import struct import subprocess import sys import time import logging # Intel Audio DSP firmware loader. No dependencies on anything # outside this file beyond Python3 builtins. Pass a signed rimage # file as the single argument. logging.basicConfig() log = logging.getLogger("cavs-fw") log.setLevel(logging.INFO) FW_FILE = sys.argv[1] PAGESZ = 4096 HUGEPAGESZ = 2 * 1024 * 1024 HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp" HDA_PPCTL__GPROCEN = 1 << 30 HDA_SD_CTL__TRAFFIC_PRIO = 1 << 18 HDA_SD_CTL__START = 1 << 1 def main(): if os.system("lsmod | grep -q snd_sof_pci") == 0: log.warning("The Linux snd-sof-pci kernel module is loaded. While this") log.warning(" loader will normally work in such circumstances, things") log.warning(" will get confused if the system tries to touch the hardware") log.warning(" simultaneously. Operation is most reliable if it is") log.warning(" unloaded first.") # Make sure hugetlbfs is mounted (not there on chromeos) os.system("mount | grep -q hugetlbfs ||" + " (mkdir -p /dev/hugepages; " + " mount -t hugetlbfs hugetlbfs /dev/hugepages)") with open(FW_FILE, "rb") as f: fw_bytes = f.read() (magic, sz) = struct.unpack("4sI", fw_bytes[0:8]) if magic == b'XMan': log.info(f"Trimming {sz} bytes of extended manifest") fw_bytes = fw_bytes[sz:len(fw_bytes)] (hda, sd, dsp) = map_regs() # Device register mappings # Reset the HDA device log.info("Reset HDA device") hda.GCTL = 0 while hda.GCTL & 1: pass hda.GCTL = 1 while not hda.GCTL & 1: pass # Turn on HDA "global processing enable" first. As documented, # this enables the audio DSP (vs. hardware HDA emulation). But it # actually means "enable access to the ADSP registers in PCI BAR 4" (!) log.info("Enable HDA global processing") hda.PPCTL |= HDA_PPCTL__GPROCEN # Turn off the DSP CPUs (each byte of ADSPCS is a bitmask for each # of 1-8 DSP cores: lowest byte controls "stall", the second byte # engages "reset", the third controls power, and the highest byte # is the output state for "powered" to be read after a state # change. Set stall and reset, and turn off power for everything: log.info(f"Powering down, ADSPCS = 0x{dsp.ADSPCS:x}") dsp.ADSPCS = 0xffff while dsp.ADSPCS & 0xff000000: pass log.info(f"Powered down, ADSPCS = 0x{dsp.ADSPCS:x}") # Configure our DMA stream to transfer the firmware image log.info(f"Configuring DMA output stream {hda_ostream_id}...") (buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes) # Reset stream sd.CTL = 1 while (sd.CTL & 1) == 0: pass sd.CTL = 0 while (sd.CTL & 1) == 1: pass sd.CTL = (1 << 20) # Set stream ID to anything non-zero sd.BDPU = (buf_list_addr >> 32) & 0xffffffff sd.BDPL = buf_list_addr & 0xffffffff sd.CBL = len(fw_bytes) sd.LVI = num_bufs - 1 # Enable "processing" on the output stream (send DMA to the DSP # and not the audio output hardware) hda.PPCTL |= (HDA_PPCTL__GPROCEN | (1 << hda_ostream_id)) # SPIB ("Software Position In Buffer") is an Intel HDA extension # that puts a transfer boundary into the stream beyond which the # other side will not read. The ROM wants to poll on a "buffer # full" bit on the other side that only works with this enabled. hda.SPBFCTL |= (1 << hda_ostream_id) hda.SD_SPIB = len(fw_bytes) # Power up all the cores on the DSP and wait for CPU0 to show that # it has power. Leave stall and reset high for now log.info(f"Powering up DSP core #0, ADSPCS = 0x{dsp.ADSPCS:x}") dsp.ADSPCS = 0x01ffff while (dsp.ADSPCS & 0x01000000) == 0: pass log.info(f"Powered up {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") # Send the DSP an IPC message to tell the device how to boot # ("PURGE_FW" means "load new code") and which DMA channel to use. # The high bit is the "BUSY" signal bit that latches a device # interrupt. # # Note: with cAVS 1.8+ the ROM receives the stream argument as an index # within the array of output streams (and we always use the first # one by construction). But with 1.5 it's the HDA index, and # depends on the number of input streams on the device. stream_idx = hda_ostream_id if cavs15 else 0 ipcval = ( (1 << 31) # BUSY bit | (0x01 << 24) # type = PURGE_FW | (1 << 14) # purge_fw = 1 | (stream_idx << 9)) # dma_id log.info(f"Sending PURGW_FW IPC, HIPCR = 0x{ipcval:x}") dsp.HIPCI = ipcval # Now start CPU #0 by dropping stall and reset log.info(f"Starting {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") dsp.ADSPCS = 0x01fffe # Out of reset time.sleep(0.1) dsp.ADSPCS = 0x01fefe # Un-stall log.info(f"Started {ncores(dsp)} cores, ADSPCS = 0x{dsp.ADSPCS:x}") # Experimentation shows that these steps aren't actually required, # the ROM just charges ahead and initializes itself correctly even # if we don't wait for it. Do them anyway for better visibility, # when requested. Potentially remove later once this code is # mature. if log.level <= logging.INFO: # Wait for the ROM to boot and signal it's ready. NOTE: This # short sleep seems to be needed; if we're banging on the # memory window during initial boot (before/while the window # control registers are configured?) the DSP hardware will # hang fairly reliably. time.sleep(0.1) log.info(f"Waiting for ROM init, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") while (dsp.SRAM_FW_STATUS >> 24) != 5: pass log.info(f"ROM ready, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") # Newer devices have an ACK bit we can check if not cavs15: log.info(f"Awaiting IPC acknowledgment, HIPCA 0x{dsp.HIPCA:x}") while not dsp.HIPCA & (1 << 31): pass dsp.HIPCA |= ~(1 << 31) # Wait for it to signal ROM_INIT_DONE log.info(f"Awaiting ROM init... FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") while (dsp.SRAM_FW_STATUS & 0x00ffffff) != 1: pass # It's ready, uncork the stream log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") sd.CTL |= HDA_SD_CTL__START # The ROM sets a FW_ENTERED value of 5 into the bottom 28 bit # "state" field of FW_STATUS on entry to the app. (Pedantry: this # is actually ephemeral and racy, because Zephyr is free to write # its own data once the app launches and we might miss it. # There's no standard "alive" signaling from the OS, which is # really what we want to wait for. So give it one second and move # on). log.info(f"Waiting for load, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") for _ in range(100): alive = dsp.SRAM_FW_STATUS & ((1 << 28) - 1) == 5 if alive: break time.sleep(0.01) if alive: log.info("ROM reports firmware was entered") else: log.warning(f"Load failed? FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}") # Turn DMA off and reset the stream. If this doesn't happen the # hardware continues streaming out of our now-stale page and has # been observed to glitch the next boot. sd.CTL = 1 time.sleep(1) log.info(f"ADSPCS = 0x{dsp.ADSPCS:x}") log.info(f"Load complete, {ncores(dsp)} cores active") # Count of active/running cores def ncores(dsp): return bin(dsp.ADSPCS >> 24).count("1") def map_regs(): # List cribbed from kernel SOF driver. Not all tested! for id in ["119a", "5a98", "1a98", "3198", "9dc8", "a348", "34C8", "38c8", "4dc8", "02c8", "06c8", "a3f0", "a0c8", "4b55", "4b58"]: p = runx(f"grep -il PCI_ID=8086:{id} /sys/bus/pci/devices/*/uevent") if p: pcidir = os.path.dirname(p) break # Detect hardware version, this matters in a few spots global cavs15 cavs15 = id in [ "5a98", "1a98", "3198" ] log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware") # Disengage runtime power management so the kernel doesn't put it to sleep with open(pcidir + b"/power/control", "w") as ctrl: ctrl.write("on") # Make sure PCI memory space access and busmastering are enabled. # Also disable interrupts so as not to confuse the kernel. with open(pcidir + b"/config", "wb+") as cfg: cfg.seek(4) cfg.write(b'\x06\x04') time.sleep(0.1) hdamem = bar_map(pcidir, 0) # Standard HD Audio Registers hda = Regs(hdamem) hda.GCAP = 0x0000 hda.GCTL = 0x0008 hda.SPBFCTL = 0x0704 hda.PPCTL = 0x0804 # Find the ID of the first output stream global hda_ostream_id hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})") hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id) hda.freeze() # Standard HD Audio Stream Descriptor sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20)) sd.CTL = 0x00 sd.LPIB = 0x04 sd.CBL = 0x08 sd.LVI = 0x0c sd.FMT = 0x12 sd.BDPL = 0x18 sd.BDPU = 0x1c sd.freeze() # Intel Audio DSP Registers dsp = Regs(bar_map(pcidir, 4)) dsp.ADSPCS = 0x00004 if cavs15: dsp.HIPCI = 0x00048 # original name of the register... else: dsp.HIPCI = 0x000d0 # ...now named "HIPCR" per 1.8+ docs dsp.HIPCA = 0x000d4 dsp.SRAM_FW_STATUS = 0x80000 # Start of first SRAM window dsp.freeze() return (hda, sd, dsp) def setup_dma_mem(fw_bytes): (mem, phys_addr) = map_phys_mem() mem[0:len(fw_bytes)] = fw_bytes log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware" % (phys_addr, len(fw_bytes))) # HDA requires at least two buffers be defined, but we don't care # about boundaries because it's all a contiguous region. Place a # vestigial 128-byte (minimum size and alignment) buffer after the # main one, and put the 4-entry BDL list into the final 128 bytes # of the page. buf0_len = HUGEPAGESZ - 2 * 128 buf1_len = 128 bdl_off = buf0_len + buf1_len mem[bdl_off:bdl_off + 32] = struct.pack(" /proc/sys/vm/nr_hugepages") hugef = open(HUGEPAGE_FILE, "w+") hugef.truncate(HUGEPAGESZ) mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ) global_mmaps.append(mem) os.unlink(HUGEPAGE_FILE) # Find the local process address of the mapping, then use that to # extract the physical address from the kernel's pagemap # interface. The physical page frame number occupies the bottom # bits of the entry. mem[0] = 0 # Fault the page in so it has an address! vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem)) vpagenum = vaddr >> 12 pagemap = open("/proc/self/pagemap", "rb") pagemap.seek(vpagenum * 8) pent = pagemap.read(8) paddr = (struct.unpack("Q", pent)[0] & ((1 << 54) - 1)) * PAGESZ pagemap.close() return (mem, paddr) # Maps a PCI BAR and returns the in-process address def bar_map(pcidir, barnum): f = open(pcidir.decode() + "/resource" + str(barnum), "r+") mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size) global_mmaps.append(mm) return ctypes.addressof(ctypes.c_int.from_buffer(mm)) # Syntactic sugar to make register block definition & use look nice. # Instantiate from a base address, assign offsets to (uint32) named # registers as fields, call freeze(), then the field acts as a direct # alias for the register! class Regs: def __init__(self, base_addr): vars(self)["base_addr"] = base_addr vars(self)["ptrs"] = {} vars(self)["frozen"] = False def freeze(self): vars(self)["frozen"] = True def __setattr__(self, name, val): if not self.frozen and name not in self.ptrs: addr = self.base_addr + val self.ptrs[name] = ctypes.c_uint32.from_address(addr) else: self.ptrs[name].value = val def __getattr__(self, name): return self.ptrs[name].value def runx(cmd): return subprocess.Popen(["sh", "-c", cmd], stdout=subprocess.PIPE).stdout.read() if __name__ == "__main__": main()