301 lines
11 KiB
Python
301 lines
11 KiB
Python
|
#!/usr/bin/env python3
|
||
|
# Copyright(c) 2022 Intel Corporation. All rights reserved.
|
||
|
# SPDX-License-Identifier: Apache-2.0
|
||
|
import os
|
||
|
import sys
|
||
|
import struct
|
||
|
import logging
|
||
|
import asyncio
|
||
|
import time
|
||
|
import subprocess
|
||
|
import ctypes
|
||
|
import mmap
|
||
|
|
||
|
logging.basicConfig()
|
||
|
log = logging.getLogger("cavs-fw")
|
||
|
log.setLevel(logging.INFO)
|
||
|
|
||
|
PAGESZ = 4096
|
||
|
HUGEPAGESZ = 2 * 1024 * 1024
|
||
|
HUGEPAGE_FILE = "/dev/hugepages/cavs-fw-dma.tmp"
|
||
|
|
||
|
# Log is in the fourth window, they appear in 128k regions starting at 512k
|
||
|
WINSTREAM_OFFSET = (512 + (3 * 128)) * 1024
|
||
|
|
||
|
def map_regs():
|
||
|
p = runx(f"grep -iPl 'PCI_CLASS=40(10|38)0' /sys/bus/pci/devices/*/uevent")
|
||
|
pcidir = os.path.dirname(p)
|
||
|
|
||
|
cavs15 = open(f"{pcidir}/device").read().rstrip() in [ "0x5a98", "0x1a98", "0x3198" ]
|
||
|
|
||
|
# Check sysfs for a loaded driver and remove it
|
||
|
if os.path.exists(f"{pcidir}/driver"):
|
||
|
mod = os.path.basename(os.readlink(f"{pcidir}/driver/module"))
|
||
|
log.warning(f"Existing driver found! Unloading \"{mod}\" module")
|
||
|
runx(f"rmmod {mod}")
|
||
|
|
||
|
# Disengage runtime power management so the kernel doesn't put it to sleep
|
||
|
with open(f"{pcidir}/power/control", "w") as ctrl:
|
||
|
ctrl.write("on")
|
||
|
|
||
|
# Make sure PCI memory space access and busmastering are enabled.
|
||
|
# Also disable interrupts so as not to confuse the kernel.
|
||
|
with open(f"{pcidir}/config", "wb+") as cfg:
|
||
|
cfg.seek(4)
|
||
|
cfg.write(b'\x06\x04')
|
||
|
|
||
|
# Standard HD Audio Registers
|
||
|
(hdamem, _) = bar_map(pcidir, 0)
|
||
|
hda = Regs(hdamem)
|
||
|
hda.GCAP = 0x0000
|
||
|
hda.GCTL = 0x0008
|
||
|
hda.SPBFCTL = 0x0704
|
||
|
hda.PPCTL = 0x0804
|
||
|
|
||
|
# Find the ID of the first output stream
|
||
|
hda_ostream_id = (hda.GCAP >> 8) & 0x0f # number of input streams
|
||
|
log.info(f"Selected output stream {hda_ostream_id} (GCAP = 0x{hda.GCAP:x})")
|
||
|
hda.SD_SPIB = 0x0708 + (8 * hda_ostream_id)
|
||
|
hda.freeze()
|
||
|
|
||
|
# Standard HD Audio Stream Descriptor
|
||
|
sd = Regs(hdamem + 0x0080 + (hda_ostream_id * 0x20))
|
||
|
sd.CTL = 0x00
|
||
|
sd.CBL = 0x08
|
||
|
sd.LVI = 0x0c
|
||
|
sd.BDPL = 0x18
|
||
|
sd.BDPU = 0x1c
|
||
|
sd.freeze()
|
||
|
|
||
|
# Intel Audio DSP Registers
|
||
|
global bar4_mmap
|
||
|
(bar4_mem, bar4_mmap) = bar_map(pcidir, 4)
|
||
|
dsp = Regs(bar4_mem)
|
||
|
dsp.ADSPCS = 0x00004
|
||
|
dsp.HIPCIDR = 0x00048 if cavs15 else 0x000d0
|
||
|
dsp.SRAM_FW_STATUS = 0x80000 # Start of first SRAM window
|
||
|
dsp.freeze()
|
||
|
|
||
|
return (hda, sd, dsp, hda_ostream_id, cavs15)
|
||
|
|
||
|
def setup_dma_mem(fw_bytes):
|
||
|
(mem, phys_addr) = map_phys_mem()
|
||
|
mem[0:len(fw_bytes)] = fw_bytes
|
||
|
|
||
|
log.info("Mapped 2M huge page at 0x%x to contain %d bytes of firmware"
|
||
|
% (phys_addr, len(fw_bytes)))
|
||
|
|
||
|
# HDA requires at least two buffers be defined, but we don't care about
|
||
|
# boundaries because it's all a contiguous region. Place a vestigial
|
||
|
# 128-byte (minimum size and alignment) buffer after the main one, and put
|
||
|
# the 4-entry BDL list into the final 128 bytes of the page.
|
||
|
buf0_len = HUGEPAGESZ - 2 * 128
|
||
|
buf1_len = 128
|
||
|
bdl_off = buf0_len + buf1_len
|
||
|
mem[bdl_off:bdl_off + 32] = struct.pack("<QQQQ",
|
||
|
phys_addr, buf0_len,
|
||
|
phys_addr + buf0_len, buf1_len)
|
||
|
log.info("Filled the buffer descriptor list (BDL) for DMA.")
|
||
|
return (phys_addr + bdl_off, 2)
|
||
|
|
||
|
global_mmaps = [] # protect mmap mappings from garbage collection!
|
||
|
|
||
|
# Maps 2M of contiguous memory using a single page from hugetlbfs,
|
||
|
# then locates its physical address for use as a DMA buffer.
|
||
|
def map_phys_mem():
|
||
|
# Make sure hugetlbfs is mounted (not there on chromeos)
|
||
|
os.system("mount | grep -q hugetlbfs ||"
|
||
|
+ " (mkdir -p /dev/hugepages; "
|
||
|
+ " mount -t hugetlbfs hugetlbfs /dev/hugepages)")
|
||
|
|
||
|
# Ensure the kernel has enough budget for one new page
|
||
|
free = int(runx("awk '/HugePages_Free/ {print $2}' /proc/meminfo"))
|
||
|
if free == 0:
|
||
|
tot = 1 + int(runx("awk '/HugePages_Total/ {print $2}' /proc/meminfo"))
|
||
|
os.system(f"echo {tot} > /proc/sys/vm/nr_hugepages")
|
||
|
|
||
|
hugef = open(HUGEPAGE_FILE, "w+")
|
||
|
hugef.truncate(HUGEPAGESZ)
|
||
|
mem = mmap.mmap(hugef.fileno(), HUGEPAGESZ)
|
||
|
global_mmaps.append(mem)
|
||
|
os.unlink(HUGEPAGE_FILE)
|
||
|
|
||
|
# Find the local process address of the mapping, then use that to extract
|
||
|
# the physical address from the kernel's pagemap interface. The physical
|
||
|
# page frame number occupies the bottom bits of the entry.
|
||
|
mem[0] = 0 # Fault the page in so it has an address!
|
||
|
vaddr = ctypes.addressof(ctypes.c_int.from_buffer(mem))
|
||
|
vpagenum = vaddr >> 12
|
||
|
pagemap = open("/proc/self/pagemap", "rb")
|
||
|
pagemap.seek(vpagenum * 8)
|
||
|
pent = pagemap.read(8)
|
||
|
paddr = (struct.unpack("Q", pent)[0] & ((1 << 55) - 1)) * PAGESZ
|
||
|
pagemap.close()
|
||
|
return (mem, paddr)
|
||
|
|
||
|
# Maps a PCI BAR and returns the in-process address
|
||
|
def bar_map(pcidir, barnum):
|
||
|
f = open(pcidir + "/resource" + str(barnum), "r+")
|
||
|
mm = mmap.mmap(f.fileno(), os.fstat(f.fileno()).st_size)
|
||
|
global_mmaps.append(mm)
|
||
|
log.info("Mapped PCI bar %d of length %d bytes."
|
||
|
% (barnum, os.fstat(f.fileno()).st_size))
|
||
|
return (ctypes.addressof(ctypes.c_int.from_buffer(mm)), mm)
|
||
|
|
||
|
# Syntactic sugar to make register block definition & use look nice.
|
||
|
# Instantiate from a base address, assign offsets to (uint32) named registers as
|
||
|
# fields, call freeze(), then the field acts as a direct alias for the register!
|
||
|
class Regs:
|
||
|
def __init__(self, base_addr):
|
||
|
vars(self)["base_addr"] = base_addr
|
||
|
vars(self)["ptrs"] = {}
|
||
|
vars(self)["frozen"] = False
|
||
|
def freeze(self):
|
||
|
vars(self)["frozen"] = True
|
||
|
def __setattr__(self, name, val):
|
||
|
if not self.frozen and name not in self.ptrs:
|
||
|
addr = self.base_addr + val
|
||
|
self.ptrs[name] = ctypes.c_uint32.from_address(addr)
|
||
|
else:
|
||
|
self.ptrs[name].value = val
|
||
|
def __getattr__(self, name):
|
||
|
return self.ptrs[name].value
|
||
|
|
||
|
def runx(cmd):
|
||
|
return subprocess.check_output(cmd, shell=True).decode().rstrip()
|
||
|
|
||
|
def load_firmware(fw_file):
|
||
|
fw_bytes = open(fw_file, "rb").read()
|
||
|
|
||
|
(magic, sz) = struct.unpack("4sI", fw_bytes[0:8])
|
||
|
if magic == b'XMan':
|
||
|
log.info(f"Trimming {sz} bytes of extended manifest")
|
||
|
fw_bytes = fw_bytes[sz:len(fw_bytes)]
|
||
|
|
||
|
# This actually means "enable access to BAR4 registers"!
|
||
|
hda.PPCTL |= (1 << 30) # GPROCEN, "global processing enable"
|
||
|
|
||
|
log.info("Resetting HDA device")
|
||
|
hda.GCTL = 0
|
||
|
while hda.GCTL & 1: pass
|
||
|
hda.GCTL = 1
|
||
|
while not hda.GCTL & 1: pass
|
||
|
|
||
|
log.info("Powering down DSP cores")
|
||
|
dsp.ADSPCS = 0xffff
|
||
|
while dsp.ADSPCS & 0xff000000: pass
|
||
|
|
||
|
log.info(f"Configuring HDA stream {hda_ostream_id} to transfer firmware image")
|
||
|
(buf_list_addr, num_bufs) = setup_dma_mem(fw_bytes)
|
||
|
sd.CTL = 1
|
||
|
while (sd.CTL & 1) == 0: pass
|
||
|
sd.CTL = 0
|
||
|
while (sd.CTL & 1) == 1: pass
|
||
|
sd.CTL = (1 << 20) # Set stream ID to anything non-zero
|
||
|
sd.BDPU = (buf_list_addr >> 32) & 0xffffffff
|
||
|
sd.BDPL = buf_list_addr & 0xffffffff
|
||
|
sd.CBL = len(fw_bytes)
|
||
|
sd.LVI = num_bufs - 1
|
||
|
hda.PPCTL |= (1 << hda_ostream_id)
|
||
|
|
||
|
# SPIB ("Software Position In Buffer") is an Intel HDA extension
|
||
|
# that puts a transfer boundary into the stream beyond which the
|
||
|
# other side will not read. The ROM wants to poll on a "buffer
|
||
|
# full" bit on the other side that only works with this enabled.
|
||
|
hda.SPBFCTL |= (1 << hda_ostream_id)
|
||
|
hda.SD_SPIB = len(fw_bytes)
|
||
|
|
||
|
# Start DSP. Just core 0 on 1.8+ (secondary core startup is handled
|
||
|
# internally), but on 1.5 the host controls the power levers.
|
||
|
log.info(f"Starting DSP, ADSPCS = 0x{dsp.ADSPCS:x}")
|
||
|
dsp.ADSPCS = 0xff0000 if cavs15 else 0x01fefe
|
||
|
while (dsp.ADSPCS & 0x1000000) == 0: pass
|
||
|
|
||
|
# Wait for the ROM to boot and signal it's ready. This short
|
||
|
# sleep seems to be needed; if we're banging on the memory window
|
||
|
# during initial boot (before/while the window control registers
|
||
|
# are configured?) the DSP hardware will hang fairly reliably.
|
||
|
log.info("Wait for ROM startup")
|
||
|
time.sleep(0.1)
|
||
|
while (dsp.SRAM_FW_STATUS >> 24) != 5: pass
|
||
|
|
||
|
# Send the DSP an IPC message to tell the device how to boot.
|
||
|
# Note: with cAVS 1.8+ the ROM receives the stream argument as an
|
||
|
# index within the array of output streams (and we always use the
|
||
|
# first one by construction). But with 1.5 it's the HDA index,
|
||
|
# and depends on the number of input streams on the device.
|
||
|
stream_idx = hda_ostream_id if cavs15 else 0
|
||
|
ipcval = ( (1 << 31) # BUSY bit
|
||
|
| (0x01 << 24) # type = PURGE_FW
|
||
|
| (1 << 14) # purge_fw = 1
|
||
|
| (stream_idx << 9)) # dma_id
|
||
|
log.info(f"Sending IPC command, HIPCR = 0x{ipcval:x}")
|
||
|
dsp.HIPCIDR = ipcval
|
||
|
|
||
|
log.info(f"Starting DMA, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
|
||
|
sd.CTL |= 2 # START flag
|
||
|
|
||
|
log.info(f"Waiting for firmware handoff, FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
|
||
|
for _ in range(200):
|
||
|
alive = dsp.SRAM_FW_STATUS & ((1 << 28) - 1) == 5 # "FW_ENTERED"
|
||
|
if alive: break
|
||
|
time.sleep(0.01)
|
||
|
if not alive:
|
||
|
log.warning(f"Load failed? FW_STATUS = 0x{dsp.SRAM_FW_STATUS:x}")
|
||
|
|
||
|
# Turn DMA off and reset the stream. Clearing START first is a noop
|
||
|
# per the spec, but required for 1.5, and makes the load on 1.8
|
||
|
# unstable. Go figure.
|
||
|
if cavs15:
|
||
|
sd.CTL &= ~2 # clear START
|
||
|
sd.CTL |= 1
|
||
|
log.info(f"cAVS firmware load complete")
|
||
|
|
||
|
# This SHOULD be just "mem[start:start+length]", but slicing an mmap
|
||
|
# array seems to be unreliable on one of my machines (python 3.6.9 on
|
||
|
# Ubuntu 18.04). Read out bytes individually.
|
||
|
def win_read(start, length):
|
||
|
return b''.join(bar4_mmap[x + WINSTREAM_OFFSET].to_bytes(1, 'little')
|
||
|
for x in range(start, start + length))
|
||
|
|
||
|
def win_hdr():
|
||
|
return struct.unpack("<IIII", win_read(0, 16))
|
||
|
|
||
|
# Python implementation of the same algorithm in sys_winstream_read(),
|
||
|
# see there for details.
|
||
|
def winstream_read(last_seq):
|
||
|
while True:
|
||
|
(wlen, start, end, seq) = win_hdr()
|
||
|
if seq == last_seq or start == end:
|
||
|
return (seq, "")
|
||
|
behind = seq - last_seq
|
||
|
if behind > ((end - start) % wlen):
|
||
|
return (seq, "")
|
||
|
copy = (end - behind) % wlen
|
||
|
suffix = min(behind, wlen - copy)
|
||
|
result = win_read(16 + copy, suffix)
|
||
|
if suffix < behind:
|
||
|
result += win_read(16, behind - suffix)
|
||
|
(wlen, start1, end, seq1) = win_hdr()
|
||
|
if start1 == start and seq1 == seq:
|
||
|
return (seq, result.decode("utf-8"))
|
||
|
|
||
|
async def main():
|
||
|
global hda, sd, dsp, hda_ostream_id, cavs15
|
||
|
(hda, sd, dsp, hda_ostream_id, cavs15) = map_regs()
|
||
|
log.info(f"Detected cAVS {'1.5' if cavs15 else '1.8+'} hardware")
|
||
|
|
||
|
load_firmware(sys.argv[1])
|
||
|
|
||
|
sys.stdout.write("--\n")
|
||
|
last_seq = 0
|
||
|
while True:
|
||
|
await asyncio.sleep(0.03)
|
||
|
(last_seq, output) = winstream_read(last_seq)
|
||
|
if output:
|
||
|
sys.stdout.write(output)
|
||
|
sys.stdout.flush()
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
asyncio.get_event_loop().run_until_complete(main())
|